Skip to content

Commit

Permalink
fix missing ref, alt issue due to data structure change in clinvar 20…
Browse files Browse the repository at this point in the history
…19-11 release
  • Loading branch information
kevinxin90 committed Nov 12, 2019
1 parent f7537cf commit 67a0788
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/hub/dataload/sources/clinvar/clinvar_xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,23 @@ def parse_measure(Measure, hg19=True):
chromEnd_19 = SequenceLocation.stop
if not ref:
ref = SequenceLocation.referenceAllele
if not ref:
ref = SequenceLocation.referenceAlleleVCF
if not alt:
alt = SequenceLocation.alternateAllele
if not alt:
alt = SequenceLocation.alternateAlleleVCF
if 'GRCh38' in SequenceLocation.Assembly:
chromStart_38 = SequenceLocation.start
chromEnd_38 = SequenceLocation.stop
if not ref:
ref = SequenceLocation.referenceAllele
if not ref:
ref = SequenceLocation.referenceAlleleVCF
if not alt:
alt = SequenceLocation.alternateAllele
if not alt:
alt = SequenceLocation.alternateAlleleVCF
if Measure.MeasureRelationship:
try:
symbol = Measure.MeasureRelationship[0].\
Expand Down Expand Up @@ -148,8 +156,11 @@ def parse_measure(Measure, hg19=True):
break
if chrom and chromStart and chromEnd:
# if its SNP, make sure chrom, chromStart, chromEnd, ref, alt are all provided
if variation_type == 'single nucleotide variant' and ref and alt:
hgvs_id = "chr%s:g.%s%s>%s" % (chrom, chromStart, ref, alt)
if variation_type == 'single nucleotide variant':
if ref and alt:
hgvs_id = "chr%s:g.%s%s>%s" % (chrom, chromStart, ref, alt)
else:
print('hgvs not found chr {}, chromStart {}, chromEnd {}, ref {}, alt {}, allele id {}'.format(chrom, chromStart, chromEnd, ref, alt, allele_id))
# items whose type belong to 'Indel, Insertion, \
# Duplication' might not hava explicit alt information, \
# so we will parse from hgvs_genome
Expand Down

0 comments on commit 67a0788

Please sign in to comment.