Skip to content

Commit

Permalink
Merge pull request #106 from erikyao/master
Browse files Browse the repository at this point in the history
Fix to Issue#83
  • Loading branch information
erikyao committed Aug 3, 2021
2 parents a7a0a9d + beed5ac commit 1f1d79a
Show file tree
Hide file tree
Showing 5 changed files with 578 additions and 250 deletions.
2 changes: 2 additions & 0 deletions requirements_hub.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ xmltodict==0.11.0 # drugbank parsing
beautifulsoup4==4.5.1 # drugbank dumper
lxml # bs4 html parsing (note: no version avail to set it fixed)
pandas==1.0.1 # sider parser
obonet==0.3.0 # chebi parser
networkx==2.5 # chebi parser (dependency of obonet)
25 changes: 13 additions & 12 deletions src/hub/dataload/sources/chebi/chebi_dump.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
import os.path
import sys
import time

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import FTPDumper, DumperException
Expand All @@ -13,7 +11,7 @@ class ChebiDumper(FTPDumper):
SRC_NAME = "chebi"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.ebi.ac.uk'
CWD_DIR = '/pub/databases/chebi/archive' # contains all releases
CWD_DIR = '/pub/databases/chebi/archive' # contains all releases

SCHEDULE = "0 12 * * *"

Expand All @@ -25,7 +23,7 @@ def get_release(self):
self.release = releases[-1]

def new_release_available(self):
current_release = self.src_doc.get("download",{}).get("release")
current_release = self.src_doc.get("download", {}).get("release")
if not current_release or self.release > current_release:
self.logger.info("New release '%s' found" % self.release)
return True
Expand All @@ -34,17 +32,20 @@ def new_release_available(self):
return False

def create_todump_list(self, force=False):
def append_todump(sub_dir, filename):
work_dir = os.path.join(self.__class__.CWD_DIR, self.release, sub_dir)
self.client.cwd(work_dir)

remote = os.path.join(work_dir, filename)
local = os.path.join(self.new_data_folder, filename)

self.to_dump.append({"remote": remote, "local": local})

self.get_release()
if force or self.new_release_available():
# get list of files to download
remote_path = os.path.join(self.__class__.CWD_DIR,self.release,"SDF")
self.client.cwd(remote_path)
data_file = "ChEBI_complete.sdf.gz"
remote = os.path.join(remote_path,data_file)
local = os.path.join(self.new_data_folder,data_file)
self.to_dump.append({"remote": remote,"local":local})
append_todump("SDF", "ChEBI_complete.sdf.gz")
append_todump("ontology", "chebi_lite.obo.gz")

def post_dump(self, *args, **kwargs):
gunzipall(self.new_data_folder)


0 comments on commit 1f1d79a

Please sign in to comment.