Skip to content

Commit

Permalink
Add support for APS style arXiv identifiers
Browse files Browse the repository at this point in the history
* Adds support for new style arXiv identifiers with class, such as e.g.
  arXiv:hep-th/1601.07616. These identifiers are technical invalid 
  according to the arXiv identifier specification, however they do occur
  in the frequently in the literature as seen on e.g. INSPIRE.
  • Loading branch information
szymonlopaciuk authored and lnielsen committed Nov 15, 2017
1 parent f159a41 commit a47f820
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
16 changes: 15 additions & 1 deletion idutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@
"""See http://arxiv.org/help/arxiv_identifier and
http://arxiv.org/help/arxiv_identifier_for_services."""

arxiv_post_2007_with_class_regexp = re.compile(
"(arxiv:)?(?:[a-z\-]+)(?:\.[a-z]{2})?/(\d{4})\.(\d{4,5})(v\d+)?$",
flags=re.I
)
"""Matches new style arXiv ID, with an old-style class specification;
technically malformed, however appears in real data."""

ads_regexp = re.compile("(ads:|ADS:)?(\d{4}[A-Z]\S{13}[A-Z.:])$")
"""See http://adsabs.harvard.edu/abs_doc/help_pages/data.html"""

Expand Down Expand Up @@ -286,7 +293,8 @@ def is_ads(val):

def is_arxiv_post_2007(val):
"""Test if argument is a post-2007 arXiv ID."""
return arxiv_post_2007_regexp.match(val)
return arxiv_post_2007_regexp.match(val) \
or arxiv_post_2007_with_class_regexp.match(val)


def is_arxiv_pre_2007(val):
Expand Down Expand Up @@ -441,6 +449,12 @@ def normalize_arxiv(val):
val = "".join(m.group(1, 2, 4, 5))
if m.group(6):
val += m.group(6)

m = is_arxiv_post_2007(val)
if m:
val = 'arXiv:' + '.'.join(m.group(2, 3))
if m.group(4):
val += m.group(4)
return val


Expand Down
4 changes: 4 additions & 0 deletions tests/test_idutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@
'http://arxiv.org/abs/arXiv:hep-th/9901001v27'),
('9912.12345v2', ['arxiv', ], 'arXiv:9912.12345v2',
'http://arxiv.org/abs/arXiv:9912.12345v2'),
('arXiv:hep-th/1601.07616', ['arxiv', ], 'arXiv:1601.07616',
'http://arxiv.org/abs/arXiv:1601.07616'),
('hep-th/1601.07616', ['arxiv', ], 'arXiv:1601.07616',
'http://arxiv.org/abs/arXiv:1601.07616'),
('http://d-nb.info/gnd/1055864695', ['gnd', 'url'], 'gnd:1055864695',
'http://d-nb.info/gnd/1055864695'),
('GND:4079154-3', ['gnd', ], 'gnd:4079154-3',
Expand Down

0 comments on commit a47f820

Please sign in to comment.