Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
"species" dumper
  • Loading branch information
sirloon committed Jan 25, 2017
1 parent 82b1507 commit ee674c5
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 67 deletions.
81 changes: 14 additions & 67 deletions .gitignore
@@ -1,67 +1,14 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

#Ipython Notebook
.ipynb_checkpoints

# ansible stuff
.gitmodules
src/ansible_deploy/
src/ansible_deploy
*.pyc
*.swp
src/config.py
__pycache__
.DS_Store
logs
pyenv
docs/_build
docs/doc_html
src/www/static/docs
src/config_prod.py
src/run/*.pickle
src/run/done/*.pickle
src/bin/ssh_host_key*
1 change: 1 addition & 0 deletions src/dataload/__init__.py
@@ -1,3 +1,4 @@
__sources__ = [
"dataload.sources.taxonomy",
"dataload.sources.species",
]
1 change: 1 addition & 0 deletions src/dataload/sources/species/__init__.py
@@ -0,0 +1 @@
from .dumper import SpeciesDumper
31 changes: 31 additions & 0 deletions src/dataload/sources/species/dumper.py
@@ -0,0 +1,31 @@
import os

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.dataload.dumper import FTPDumper


class SpeciesDumper(FTPDumper):

SRC_NAME = "species"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.uniprot.org'
CWD_DIR = '/pub/databases/uniprot/current_release/knowledgebase/complete/docs'
SUFFIX_ATTR = "timestamp"

SCHEDULE = "0 9 * * *"

def create_todump_list(self, force=False):
file_to_dump = "speclist.txt"
new_localfile = os.path.join(self.new_data_folder,file_to_dump)
try:
current_localfile = os.path.join(self.current_data_folder, file_to_dump)
except TypeError:
# current data folder doesn't even exist
current_localfile = new_localfile
if force or not os.path.exists(current_localfile) or self.remote_is_better(file_to_dump, current_localfile):
# register new release (will be stored in backend)
self.to_dump.append({"remote": file_to_dump, "local":new_localfile})

0 comments on commit ee674c5

Please sign in to comment.