Skip to content

Commit

Permalink
Update data acquisition
Browse files Browse the repository at this point in the history
Probably need to rewrite since this was for drugbank 4
  • Loading branch information
cthoyt committed Dec 14, 2020
1 parent ccab91a commit 18c7ffd
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 69 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ max-complexity = 10
import-order-style = pycharm
application-import-names =
bio2bel_drugbank
bio2bel_uniprot
tests
bio2bel
pybel
pyobo
format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@
'License :: OSI Approved :: MIT License',
]
INSTALL_REQUIRES = [
'pybel>=0.13.0,<0.14.0',
'bio2bel>=0.2.0,<0.3.0',
'bio2bel_hgnc>=0.1.0',
'pybel>=0.15.0',
'bio2bel',
'click',
'pandas',
'sqlalchemy',
'tqdm',
'beautifulsoup4',
'requests',
'bio2bel_uniprot',
'drugbank_downloader',
]
EXTRAS_REQUIRE = {
'web': [
Expand Down
3 changes: 0 additions & 3 deletions src/bio2bel_drugbank/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,3 @@

MODULE_NAME = 'drugbank'
DATA_DIR = get_data_dir(MODULE_NAME)

DRUGBANK_URL = 'https://www.drugbank.ca/releases/5-1-4/downloads/all-full-database'
DRUGBANK_PATH = os.path.join(DATA_DIR, 'drugbank_all_full_database.xml.zip')
16 changes: 3 additions & 13 deletions src/bio2bel_drugbank/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
from collections import Counter, defaultdict
from typing import Dict, Iterable, List, Optional, TextIO, Tuple


import click
import networkx as nx
import pyobo
from sqlalchemy import func
from tqdm import tqdm

import bio2bel_hgnc
from bio2bel import AbstractManager
from bio2bel.manager.bel_manager import BELManagerMixin
from bio2bel.manager.flask_manager import FlaskMixin
Expand All @@ -31,6 +30,7 @@
)
from .parser import extract_drug_info, get_xml_root


__all__ = ['Manager']

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -579,14 +579,10 @@ def to_bel(self, drug_namespace: Optional[str] = None, target_namespace: Optiona
"""Export DrugBank as BEL."""
graph = BELGraph(
name='DrugBank',
version='5.1.4',
)

self.add_namespace_to_graph(graph)

hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine, session=self.session)
hgnc_manager.add_namespace_to_graph(graph)

dpis = self.list_drug_protein_interactions()
dpis: Iterable[DrugProteinInteraction] = tqdm(
dpis,
Expand Down Expand Up @@ -651,18 +647,12 @@ def get_drug_to_hgnc_symbols(self, cache=True, recalculate=False) -> Dict[str, L
with open(_dti_symbols_cache_path) as file:
return json.load(file)

hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine, session=self.session)
if not hgnc_manager.is_populated():
hgnc_manager.populate()

hgnc_id_symbol_mapping = hgnc_manager.build_hgnc_id_symbol_mapping()
drug_to_hgnc_ids = self.get_drug_to_hgnc_ids()

rv = defaultdict(list)

for drug, hgnc_ids in drug_to_hgnc_ids.items():
for hgnc_id in hgnc_ids:
hgnc_symbol = hgnc_id_symbol_mapping.get(hgnc_id)
hgnc_symbol = pyobo.get_name('hgnc', hgnc_id)

if hgnc_symbol is None:
log.warning('could not map HGNC identifier: %s', hgnc_id)
Expand Down
62 changes: 14 additions & 48 deletions src/bio2bel_drugbank/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,16 @@

import itertools as itt
import logging
import os
import re
import sys
import time
import zipfile
from datetime import datetime
from typing import Mapping, Optional
from xml.etree import ElementTree

from drugbank_downloader import parse_drugbank
from tqdm import tqdm

from bio2bel_drugbank.constants import DRUGBANK_PATH
import pyobo.config


log = logging.getLogger(__name__)

Expand All @@ -27,52 +25,19 @@
pubmed_re = re.compile('pubmed/([0-9]+)')


def get_path(path: Optional[str] = None) -> str:
"""Get the path to the DrugBank data."""
if path is not None:
return path

if os.path.exists(DRUGBANK_PATH):
return DRUGBANK_PATH

log.critical("""DrugBank data not found.
Unfortunately, the data for DrugBank is not available via an open HTTP(S) or FTP endpoint, so please follow these
steps to get it yourself:
1. Make an account at: https://www.drugbank.ca
2. Navigate to: https://www.drugbank.ca/releases/5-1-1/downloads/all-full-database
3. Unzip the file that gets downloaded. There should be an XML called "full database.xml"
4. Run the following command on the command line: `mkdir -p ~/.bio2bel/drugbank`. If you're specifying the Bio2BEL
directory through a config file or an environment variable, just make a "drugbank" folder there.
5. Copy "full database.xml" into this folder.
6. Resume business as usual (try `bio2bel_drugbank populate` now)""")
sys.exit(0)


def get_xml_root(path: Optional[str] = None) -> ElementTree.Element:
def get_xml_root(path: Optional[str] = None):
"""Get the XML parser root.
Takes between 35-60 seconds.
:param path: A custom URL for DrugBank XML file
"""
path = get_path(path=path)
log.info('parsing drugbank at %s', path)
t = time.time()

if path.endswith('.xml'):
tree = ElementTree.parse(path)
elif path.endswith('.zip'):
with zipfile.ZipFile(path) as z:
with z.open('full database.xml') as f:
tree = ElementTree.parse(f)
else:
raise ValueError

log.info('parsed drugbank in %.2f seconds', time.time() - t)

return tree.getroot()
if path:
return ElementTree.parse(path).getroot()
return parse_drugbank(
username=pyobo.config.get_config('drugbank_username'),
password=pyobo.config.get_config('drugbank_password'),
)


def extract_drug_info(drug_xml: ElementTree.Element):
Expand Down Expand Up @@ -232,11 +197,12 @@ def get_pubchem_to_drugbank(path=None) -> Mapping[str, str]:


def main():
x = get_pubchem_to_drugbank('/Users/cthoyt/.bio2bel/drugbank/test.xml')
import json
import os

with open('/Users/cthoyt/Desktop/pubchem_to_drugbank.json', 'w') as f:
json.dump(x, f)
x = get_pubchem_to_drugbank()
with open(os.path.expanduser('~/Desktop/pubchem_to_drugbank.json'), 'w') as f:
json.dump(x, f, indent=2)


if __name__ == '__main__':
Expand Down

0 comments on commit 18c7ffd

Please sign in to comment.