Skip to content

Commit

Permalink
fixed entrez
Browse files Browse the repository at this point in the history
  • Loading branch information
mygene_hub committed Feb 22, 2022
1 parent d7f4fe6 commit 7678a55
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions src/hub/dataload/sources/entrez/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,21 @@ def _ff(d):
for x in dbxrefs.split('|'):
if x == '-':
continue
xd = x.split(':')
if len(xd) == 3 and xd[0] == xd[1] and \
xd[0] in ['VGNC', 'HGNC', 'MGI']:
# xd = x.split(':')
# if len(xd) == 3 and xd[0] == xd[1] and \
# xd[0] in ['VGNC', 'HGNC', 'MGI']:
# a fix for NCBI bug for dup xref prefix, 'HGNC:HGNC:36328'
xd = xd[1:]
try:
_db, _id = xd
except:
print(repr(x))
raise
# xd = xd[1:]
# try:
# _db, _id = xd
# except:
# print(repr(x))
# raise
_db, _id = x.split(':', maxsplit=1)
for prefix in ['VGNC', 'HGNC', 'MGI', 'WB']:
prefix_len = len(prefix) + 1 # add ":" to the prefix
if _id[:prefix_len] == prefix + ':':
_id = _id[prefix_len:]
# we don't need ensembl xref from here, we will get it from
# Ensembl directly
if _db.lower() in ['ensembl', 'imgt/gene-db']:
Expand Down

0 comments on commit 7678a55

Please sign in to comment.