Skip to content

Commit

Permalink
kbs: report numbers update
Browse files Browse the repository at this point in the history
* Add new often cited report numbers, found with
invenio-scripts/unrecognized_report_numbers.py

* Remove broken pattern escaping

* Relax patterns to allow dropping leading 0

* Replace all ' ' patterns by 's' to allow additional spaces

* Cosmetic improvement: align second column

Signed-off-by: Micha Moskovic <michamos@gmail.com>
  • Loading branch information
michamos committed Nov 18, 2016
1 parent e426e7d commit 921a64e
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 56 deletions.
4 changes: 1 addition & 3 deletions refextract/references/kbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from .regexs import (
re_kb_line,
re_regexp_character_class,
re_report_num_chars_to_escape,
re_extract_quoted_text,
re_extract_char_class,
re_punctuation,
Expand Down Expand Up @@ -181,6 +180,7 @@ def institute_num_pattern_to_regex(pattern):
simple_replacements = [
('9', r'\d'),
('9+', r'\d+'),
('9?', r'\d?'),
('w+', r'\w+'),
('a', r'[A-Za-z]'),
('v', r'[Vv]'),
Expand All @@ -189,8 +189,6 @@ def institute_num_pattern_to_regex(pattern):
('yy', r'\d\d'),
('s', r'\s*'),
(r'/', r'\/')]
# first, escape certain characters that could be sensitive to a regexp:
pattern = re_report_num_chars_to_escape.sub(r'\\\g<1>', pattern)

# now loop through and carry out the simple replacements:
for repl in simple_replacements:
Expand Down
Loading

0 comments on commit 921a64e

Please sign in to comment.