Permalink
Browse files

geneinfo dumper

  • Loading branch information...
sirloon committed Jan 25, 2017
1 parent ee674c5 commit d3b3486f71e865235efd673d2f371b53eaa0bc5b
Showing with 36 additions and 0 deletions.
  1. +1 −0 src/dataload/__init__.py
  2. +1 −0 src/dataload/sources/geneinfo/__init__.py
  3. +34 −0 src/dataload/sources/geneinfo/dumper.py
@@ -1,4 +1,5 @@
__sources__ = [
"dataload.sources.taxonomy",
"dataload.sources.species",
"dataload.sources.geneinfo",
]
@@ -0,0 +1 @@
from .dumper import GeneInfoDumper
@@ -0,0 +1,34 @@
import os
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.dataload.dumper import FTPDumper
from biothings.utils.common import gunzipall
class GeneInfoDumper(FTPDumper):
SRC_NAME = "geneinfo"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.ncbi.nih.gov'
CWD_DIR = '/gene/DATA'
SUFFIX_ATTR = "timestamp"
SCHEDULE = "0 9 * * *"
def create_todump_list(self, force=False):
file_to_dump = "gene_info.gz"
new_localfile = os.path.join(self.new_data_folder,file_to_dump)
try:
current_localfile = os.path.join(self.current_data_folder, file_to_dump)
except TypeError:
# current data folder doesn't even exist
current_localfile = new_localfile
if force or not os.path.exists(current_localfile) or self.remote_is_better(file_to_dump, current_localfile):
# register new release (will be stored in backend)
self.to_dump.append({"remote": file_to_dump, "local":new_localfile})
def post_dump(self):
gunzipall(self.new_data_folder)

0 comments on commit d3b3486

Please sign in to comment.