Skip to content

Commit

Permalink
upstream changed directory structure/symlink
Browse files Browse the repository at this point in the history
UCSC changed what Homo_sapiens points to, used to point to hg19, now hg38.
Also added test to assert exons and exons_hg19 are different on one test sample.
  • Loading branch information
zcqian committed Nov 2, 2021
1 parent 907f87b commit 99c132c
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/hub/dataload/sources/ucsc/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@ def get_file_list(self):
genome_li = [x for x in self.client.nlst() if not x.endswith('.')]
fli = []
fixed_files = ['../hg38/database/refFlat.txt.gz',
'../hg19/database/refFlat.txt.gz',
'../mm9/database/refFlat.txt.gz',
'../hgFixed/database/refLink.txt.gz']
# hg38 is downloaded twice but having the directory 'Homo_sapiens'
# is essential for the parser/uploader to trigger the upload for hg19 and hg38
# and the tradeoff is only less than 10MB wasted disk space
for file_path in [os.path.join(genome, "database/refFlat.txt.gz") for genome in genome_li] + fixed_files:
lastmodified = self.get_ftpfile_lastmodified(file_path)
if lastmodified:
Expand Down
4 changes: 2 additions & 2 deletions src/hub/dataload/sources/ucsc/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def load_exons_for_human(data_folder):
exons --> hg38
exons_hg19 --> hg19
'''
gene2exons_hg19 = load_exons_for_species(data_folder, 'Homo_sapiens', exons_key='exons_hg19')
gene2exons = load_exons_for_species(data_folder, '../hg38')
gene2exons_hg19 = load_exons_for_species(data_folder, '../hg19', exons_key='exons_hg19')
gene2exons = load_exons_for_species(data_folder, '../hg38', exons_key='exons')
for gid in gene2exons_hg19:
if gid in gene2exons:
gene2exons[gid].update(gene2exons_hg19[gid])
Expand Down
4 changes: 4 additions & 0 deletions src/tests/data_tests/test_6_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,10 @@ def test_670_pharos(self):
res = self.request("gene/56141?fields=pharos").json()
assert res["pharos"]["target_id"] == 4745

def test_680_exons_hg19_hg38(self):
res = self.request('gene/9150?fields=exons,exons_hg19').json()
assert res['exons'] != res['exons_hg19']


def filter_hits(dic, field=None):
''' Filter hits by removing specified fields or by default meta fields '''
Expand Down

0 comments on commit 99c132c

Please sign in to comment.