Skip to content

Commit

Permalink
global: add support for HAL identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
marmol authored and lnielsen committed Feb 12, 2019
1 parent 8c6f3e3 commit bee1d76
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Authors
- Adrian Pawel Baran
- Alan Rubin
- Alexander Ioannidis
- Bruno Marmol
- Jiri Kuncar
- Lars Holm Nielsen
- Pedro Gaudencio
Expand Down
23 changes: 23 additions & 0 deletions idutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@
"""Matches new style arXiv ID, with an old-style class specification;
technically malformed, however appears in real data."""

hal_regexp = re.compile(
"(hal:|HAL:)?([a-z]{3}[a-z]*-|(sic|mem|ijn)_)\d{8}(v\d+)?$"
)
"""Matches HAL identifiers (sic mem and ijn are old identifiers form)."""

ads_regexp = re.compile("(ads:|ADS:)?(\d{4}[A-Za-z]\S{13}[A-Z.:])$")
"""See http://adsabs.harvard.edu/abs_doc/help_pages/data.html"""

Expand Down Expand Up @@ -454,6 +459,14 @@ def is_arxiv(val):
return is_arxiv_post_2007(val) or is_arxiv_pre_2007(val)


def is_hal(val):
"""Test if argument is a HAL identifier.
See (https://hal.archives-ouvertes.fr)
"""
return hal_regexp.match(val)


def is_pmid(val):
"""Test if argument is a PubMed ID.
Expand Down Expand Up @@ -520,6 +533,7 @@ def is_genome(val):
('urn', is_urn),
('ads', is_ads),
('arxiv', is_arxiv),
('hal', is_hal),
('pmcid', is_pmcid),
('isbn', is_isbn),
('issn', is_issn),
Expand Down Expand Up @@ -644,6 +658,12 @@ def normalize_arxiv(val):
return val


def normalize_hal(val):
"""Normalize a HAL identifier."""
val = val.replace(' ', '').lower().replace('hal:', '')
return val


def normalize_isbn(val):
"""Normalize an ISBN identifier."""
val = val.replace(' ', '').replace('-', '').strip().upper()
Expand Down Expand Up @@ -683,6 +703,8 @@ def normalize_pid(val, scheme):
return normalize_isbn(val)
elif scheme == 'issn':
return normalize_issn(val)
elif scheme == 'hal':
return normalize_hal(val)
return val


Expand All @@ -703,6 +725,7 @@ def normalize_pid(val, scheme):
'uniprot': u'{scheme}://purl.uniprot.org/uniprot/{pid}',
'refseq': u'{scheme}://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val={pid}',
'genome': u'{scheme}://www.ncbi.nlm.nih.gov/assembly/{pid}',
'hal': u'{scheme}://hal.archives-ouvertes.fr/{pid}',
}
"""URL generation configuration for the supported PID providers."""

Expand Down
6 changes: 6 additions & 0 deletions tests/test_idutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,12 @@
'http://www.ncbi.nlm.nih.gov/assembly/GCA_000002275.2'),
('GCF_000001405.38', ['genome', ], '',
'http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.38'),
('hal:inserm-13102590', ['hal', ], 'inserm-13102590',
'http://hal.archives-ouvertes.fr/inserm-13102590'),
('inserm-13102590', ['hal', ], 'inserm-13102590',
'http://hal.archives-ouvertes.fr/inserm-13102590'),
('mem_13102590', ['hal', ], 'mem_13102590',
'http://hal.archives-ouvertes.fr/mem_13102590'),
]


Expand Down

0 comments on commit bee1d76

Please sign in to comment.