Skip to content

Commit

Permalink
Removed bel library dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Dec 20, 2019
1 parent fb5c786 commit 5e1485c
Show file tree
Hide file tree
Showing 61 changed files with 4,563 additions and 3,219 deletions.
23 changes: 23 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"workbench.colorCustomizations": {
"activityBar.background": "#60e75a",
"activityBar.activeBorder": "#7479eb",
"activityBar.foreground": "#15202b",
"activityBar.inactiveForeground": "#15202b99",
"activityBarBadge.background": "#7479eb",
"activityBarBadge.foreground": "#15202b",
"titleBar.activeBackground": "#35e12d",
"titleBar.inactiveBackground": "#35e12d99",
"titleBar.activeForeground": "#15202b",
"titleBar.inactiveForeground": "#15202b99",
"statusBar.background": "#35e12d",
"statusBarItem.hoverBackground": "#23c01b",
"statusBar.foreground": "#15202b"
},
"peacock.remoteColor": "#35e12d",
"python.pythonPath": ".venv/bin/python",
"python.testing.promptToConfigure": false,
"python.testing.pytestEnabled": false,
"python.testing.unittestEnabled": false,
"python.testing.nosetestsEnabled": false
}
9 changes: 7 additions & 2 deletions README.rst → README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
BEL Resource tools
==============
# BEL Resource tools

This repository contains scripts to create standardized load files for BEL Resources for use with the BEL.bio API.

`Documentation <http://bel-resources.readthedocs.io/en/latest/>`_

## Install/setup

Run

bin/
File renamed without changes.
File renamed without changes.
14 changes: 5 additions & 9 deletions tools/backbone/gene2protein.py → app/backbone/gene2protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,17 @@
m() and r() translatedTo p()
"""

import json
import gzip
import json

from tools.utils.Config import config

import tools.setup_logging
import app.settings as settings
import structlog

log = structlog.getLogger(__name__)

eg_datafile = f'{config["bel_resources"]["file_locations"]["data"]}/namespaces/eg.jsonl.gz'
backbone_fn = (
f'{config["bel_resources"]["file_locations"]["data"]}/backbone/eg_backbone_nanopubs.jsonl.gz'
)
backbone_hmrz_fn = f'{config["bel_resources"]["file_locations"]["data"]}/backbone/eg_backbone_nanopubs_hmrz.jsonl.gz'
eg_datafile = f"{settings.DATA_DIR}/namespaces/eg.jsonl.gz"
backbone_fn = f"{settings.DATA_DIR}/backbone/eg_backbone_nanopubs.jsonl.gz"
backbone_hmrz_fn = f"{settings.DATA_DIR}/backbone/eg_backbone_nanopubs_hmrz.jsonl.gz"

hmrz_species = ["TAX:9606", "TAX:10090", "TAX:10116", "TAX:7955"]

Expand Down
File renamed without changes.
File renamed without changes.
96 changes: 49 additions & 47 deletions tools/namespaces/TEMPLATE.py → app/namespaces/TEMPLATE.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,23 @@
"""

import sys
import re
import os
import tempfile
import json
import yaml
import datetime
import copy
import datetime
import gzip
from typing import List, Mapping, Any, Iterable
import json
import os
import re
import sys
import tempfile
from typing import Any, Iterable, List, Mapping

import tools.utils.utils as utils
from tools.utils.Config import config
import yaml

import tools.setup_logging
import app.settings as settings
import app.setup_logging
import app.utils as utils
import structlog

log = structlog.getLogger(__name__)

"""
Expand All @@ -31,42 +32,44 @@
3. Dataset preprocessing - e.g. double check term names for duplicates if you
plan on using them for IDs, pre-collect information needed to build the term record
4. Process terms and write them to terms_fp file
filter out species not in config['bel_resources']['species_list'] unless empty list
filter out species not in settings.SPECIES_LIST unless empty list
"""

# Globals ###################################################################
namespace_key = 'REPLACEME' # namespace key into namespace definitions file
namespace_def = utils.get_namespace(namespace_key, config)
ns_prefix = namespace_def['namespace']
namespace_key = "REPLACEME" # namespace key into namespace definitions file
namespace_def = settings.NAMESPACE_DEFINITIONS[namespace_key]
ns_prefix = namespace_def[namespace_key]["namespace"]

# FTP options
server = 'REPLACEME'
source_data_fp = 'REPLACEME' # may have multiple files to be downloaded
server = "REPLACEME"
source_data_fp = "REPLACEME" # may have multiple files to be downloaded
# Web file options
url = 'REPLACEME' # may have multiple files to be downloaded
url = "REPLACEME" # may have multiple files to be downloaded

# Local data filepath setup
basename = os.path.basename(source_data_fp)

if not re.search('.gz$', basename): # we basically gzip everything retrieved that isn't already gzipped
basename = f'{basename}.gz'
if not re.search(
".gz$", basename
): # we basically gzip everything retrieved that isn't already gzipped
basename = f"{basename}.gz"

# Pick one of the two following options
local_data_fp = f'{config["bel_resources"]["file_locations"]["downloads"]}/{namespace_key}_{basename}'
# local_data_fp = f'{config["bel_resources"]["file_locations"]["downloads"]}/{basename}' # if namespace_key already is prefixed to basename
local_data_fp = f"{settings.DOWNLOAD_DIR}/{namespace_key}_{basename}"
# local_data_fp = f'{settings.DOWNLOAD_DIR}/{basename}' # if namespace_key already is prefixed to basename


def get_metadata():
# Setup metadata info - mostly captured from namespace definition file which
# can be overridden in belbio_conf.yml file
dt = datetime.datetime.now().replace(microsecond=0).isoformat()
metadata = {
"name": namespace_def['namespace'],
"namespace": namespace_def['namespace'],
"description": namespace_def['description'],
"name": namespace_def["namespace"],
"namespace": namespace_def["namespace"],
"description": namespace_def["description"],
"version": dt,
"src_url": namespace_def['src_url'],
"url_template": namespace_def['template_url'],
"src_url": namespace_def["src_url"],
"url_template": namespace_def["template_url"],
}

return metadata
Expand All @@ -81,59 +84,58 @@ def update_data_files() -> bool:
bool: files updated = True, False if not
"""

update_cycle_days = config['bel_resources']['update_cycle_days']
# Can override/hard-code update_cycle_days in each term collection file if desired
# Can override/hard-code settings.UPDATE_CYCLE_DAYS in each term collection file if desired

# This is all customizable - but here are some of the most common options

# Get ftp file - but not if local downloaded file is newer
# result = utils.get_ftp_file(server, source_data_fp, local_data_fp, days_old=update_cycle_days)
# result = utils.get_ftp_file(server, source_data_fp, local_data_fp, days_old=settings.UPDATE_CYCLE_DAYS)

# Get web file - but not if local downloaded file is newer
# (changed_flag, msg) = utils.get_web_file(url, local_data_fp, days_old=update_cycle_days)
# (changed_flag, msg) = utils.get_web_file(url, local_data_fp, days_old=settings.UPDATE_CYCLE_DAYS)
# log.info(msg)


def build_json(force: bool = False):
"""Build term JSONL file"""

# Terminology JSONL output filename
data_fp = config["bel_resources"]["file_locations"]["data"]
terms_fp = f'{data_fp}/namespaces/{namespace_key}.jsonl.gz'
data_fp = settings.DATA_DIR
terms_fp = f"{data_fp}/namespaces/{namespace_key}.jsonl.gz"

# used if you need a tmp dir to do some processing
# tmpdir = tempfile.TemporaryDirectory()

# Don't rebuild file if it's newer than downloaded source file
if not force:
if utils.file_newer(terms_fp, local_data_fp):
log.info('Will not rebuild data file as it is newer than downloaded source files')
log.info("Will not rebuild data file as it is newer than downloaded source files")
return False

with gzip.open(local_data_fp, 'rt') as fi, gzip.open(terms_fp, 'wt') as fo:
with gzip.open(local_data_fp, "rt") as fi, gzip.open(terms_fp, "wt") as fo:

# Header JSONL record for terminology
metadata = get_metadata()
fo.write("{}\n".format(json.dumps({'metadata': metadata})))
fo.write("{}\n".format(json.dumps({"metadata": metadata})))

for row in fi:

# review https://github.com/belbio/schemas/blob/master/schemas/terminology-0.1.0.yaml
# for what should go in here
term = {
'namespace': ns_prefix,
'src_id': '',
'id': '',
'alt_ids': [],
'label': '',
'name': '',
'synonyms': copy.copy(list(set([]))),
'entity_types': [],
'equivalences': [],
"namespace": ns_prefix,
"src_id": "",
"id": "",
"alt_ids": [],
"label": "",
"name": "",
"synonyms": copy.copy(list(set([]))),
"entity_types": [],
"equivalences": [],
}

# Add term to JSONLines file
fo.write("{}\n".format(json.dumps({'term': term})))
fo.write("{}\n".format(json.dumps({"term": term})))


def main():
Expand All @@ -142,5 +144,5 @@ def main():
build_json()


if __name__ == '__main__':
if __name__ == "__main__":
main()
File renamed without changes.

0 comments on commit 5e1485c

Please sign in to comment.