Skip to content

Commit

Permalink
GeneInfoParser: add "Symbol_from_nomenclature_authority" to "other_na…
Browse files Browse the repository at this point in the history
…mes"

Close #119
  • Loading branch information
zcqian committed Nov 16, 2021
1 parent 3f7d787 commit 33ab26d
Showing 1 changed file with 18 additions and 5 deletions.
23 changes: 18 additions & 5 deletions src/hub/dataload/sources/entrez/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,21 @@ def load(self, aslist=False):
te (tab is used as a separator, pound sign - start of a comment)
'''
gene_d = tab2dict_iter(self.datafile, (0, 1, 2, 3, 4, 5, 7, 8, 9, 13, 14), key=1,
alwayslist=0, includefn=self.species_filter)
gene_d = tab2dict_iter(
self.datafile,
(0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 13, 14),
key=1,
alwayslist=0,
includefn=self.species_filter
)

def _ff(d):
(
taxid, symbol, locus_tag, synonyms,
dbxrefs, map_location,
description, type_of_gene, other_designations,
description, type_of_gene,
symbol_from_nomenclature_authority,
other_designations,
modification_date
) = d
out = dict(taxid=int(taxid),
Expand All @@ -201,9 +208,15 @@ def _ff(d):
out['alias'] = normalized_value(synonyms.split('|'))
if locus_tag != '-':
out['locus_tag'] = locus_tag
other_names = []
if other_designations != "-":
out['other_names'] = normalized_value(other_designations.split('|'))

out['other_names'] = other_names.extend(other_designations.split('|'))
if symbol_from_nomenclature_authority != "-" \
and symbol_from_nomenclature_authority != symbol \
and symbol_from_nomenclature_authority not in other_names:
other_names.append(symbol_from_nomenclature_authority)
if other_names:
out['other_names'] = normalized_value(other_names)
### when merged, this will become the default timestamp
### as of 2017/12/10, some timestamps can have different formats
##if len(modification_date) > 8:
Expand Down

0 comments on commit 33ab26d

Please sign in to comment.