Skip to content

Commit

Permalink
finished first draft of plugin may need changes later
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 committed Oct 7, 2022
1 parent 0677641 commit 23868c4
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions src/hub/dataload/sources/ncbi_gene/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class NcbiGeneDumper(FTPDumper):
FTP_HOST = 'ftp.ncbi.nih.gov'
CWD_DIR = '/gene/DATA/ASN_BINARY/Mammalia'

#SCHEDULE = "0 22 * * 6"
SCHEDULE = "0 6 * * 6"

def get_newest_info(self):
res = self.client.sendcmd("MDTM All_Mammalia.ags.gz")
Expand All @@ -36,13 +36,13 @@ def new_release_available(self):

def create_todump_list(self, force=False, **kwargs):
self.get_newest_info()
for fn in ['Sus_scrofa.ags.gz']: #TODO change to all, using sus_scrofa for testing
for fn in ['All_Mammalia.ags.gz']:
local_file = os.path.join(self.new_data_folder,fn)
if force or not os.path.exists(local_file) or self.remote_is_better(fn,local_file) or self.new_release_available():
self.to_dump.append({"remote": fn, "local":local_file})

def post_dump(self, *args, **kwargs):
self.logger.info("Extracting Gene Summary Data in %s", self.new_data_folder)
os.chdir(self.new_data_folder)
os.system('time gunzip -c Sus_scrofa.ags.gz |../gene2xml -i stdin -b T | ../xtract -pattern Entrezgene -element Gene-track_geneid,Entrezgene_summary | awk -F "\t" \'length($2)\' | xz -9 --stdout > gene2summary_sus.txt.xz')
os.system('time gunzip -c All_Mammalia.ags.gz |../gene2xml -i stdin -b T | ../xtract -pattern Entrezgene -element Gene-track_geneid,Entrezgene_summary | awk -F "\t" \'length($2)\' | xz -9 --stdout > gene2summary_all.txt.xz')

2 changes: 1 addition & 1 deletion src/hub/dataload/sources/ncbi_gene/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GeneSummaryParser(EntrezParserBase):
'''Parser for gene2summary_all.txt.xz, adding "summary" field in gene doc'''

# TODO testing only need to change file name
DATAFILE = 'gene2summary_sus.txt.xz'
DATAFILE = 'gene2summary_all.txt.xz'

def load(self, aslist=False):

Expand Down

0 comments on commit 23868c4

Please sign in to comment.