Permalink
Browse files

"species" dumper

  • Loading branch information...
sirloon committed Jan 25, 2017
1 parent 82b1507 commit ee674c55bad849b43c8514fcc6b7139423c70074
Showing with 47 additions and 67 deletions.
  1. +14 −67 .gitignore
  2. +1 −0 src/dataload/__init__.py
  3. +1 −0 src/dataload/sources/species/__init__.py
  4. +31 −0 src/dataload/sources/species/dumper.py
@@ -1,67 +1,14 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
#Ipython Notebook
.ipynb_checkpoints
# ansible stuff
.gitmodules
src/ansible_deploy/
src/ansible_deploy
*.pyc
*.swp
src/config.py
__pycache__
.DS_Store
logs
pyenv
docs/_build
docs/doc_html
src/www/static/docs
src/config_prod.py
src/run/*.pickle
src/run/done/*.pickle
src/bin/ssh_host_key*
@@ -1,3 +1,4 @@
__sources__ = [
"dataload.sources.taxonomy",
"dataload.sources.species",
]
@@ -0,0 +1 @@
from .dumper import SpeciesDumper
@@ -0,0 +1,31 @@
import os
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.dataload.dumper import FTPDumper
class SpeciesDumper(FTPDumper):
SRC_NAME = "species"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.uniprot.org'
CWD_DIR = '/pub/databases/uniprot/current_release/knowledgebase/complete/docs'
SUFFIX_ATTR = "timestamp"
SCHEDULE = "0 9 * * *"
def create_todump_list(self, force=False):
file_to_dump = "speclist.txt"
new_localfile = os.path.join(self.new_data_folder,file_to_dump)
try:
current_localfile = os.path.join(self.current_data_folder, file_to_dump)
except TypeError:
# current data folder doesn't even exist
current_localfile = new_localfile
if force or not os.path.exists(current_localfile) or self.remote_is_better(file_to_dump, current_localfile):
# register new release (will be stored in backend)
self.to_dump.append({"remote": file_to_dump, "local":new_localfile})

0 comments on commit ee674c5

Please sign in to comment.