## Instructions for Notebook
### Drugbank Integration using Cardiac Drug List

1: Put this in an empty directory

2: Edit the username, password, and uri to match your local server

3: Run all!


In [65]:
from neo4j import GraphDatabase
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import os.path
import wget 
import json
import requests

In [66]:
#downloads the files in for the integration

url = "https://drive.google.com/uc?export=download&id=15esDF2aHkpJI7xvycUyhprpV41bNau3s"

if not Path("cvdrug_ent_drugpw.json").exists():
    file = wget.download(url)

file = open("cvdrug_ent_drugpw.json")
file = json.load(file)
print(file[0])

{'drugbank_id': 'DB00009', 'name': 'Alteplase', 'synonyms': ['Alteplasa', 'Alteplase (genetical recombination)', 'Alteplase, recombinant', 'Alteplase,recombinant', 'Plasminogen activator (human tissue-type protein moiety)', 'rt-PA', 't-PA', 't-plasminogen activator', 'Tissue plasminogen activator', 'Tissue plasminogen activator alteplase', 'Tissue plasminogen activator, recombinant', 'tPA'], 'descriptions': 'Human tissue plasminogen activator, purified, glycosylated, 527 residues purified from CHO cells', 'categories': ['Agents causing angioedema', 'Amino Acids, Peptides, and Proteins', 'Anticoagulants', 'Biological Factors', 'Blood and Blood Forming Organs', 'Blood Proteins', 'Cardiovascular Agents', 'Endopeptidases', 'Enzymes', 'Enzymes and Coenzymes', 'Fibrin Modulating Agents', 'Fibrinolytic Agents', 'Hematologic Agents', 'Hydrolases', 'Ophthalmologicals', 'Peptide Hydrolases', 'Plasminogen Activators', 'Proteins', 'Sensory Organs', 'Serine Endopeptidases', 'Serine Proteases', 'Tis

In [67]:
#change username and password
username = "neo4j"
password = "heart"
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth = (username, password))

#### Deploying Drug Nodes:

In [68]:
def create_drugs(tx, drug, name, descr, cat):
    query = ("""
            MERGE (d:Drug{id: $drug})
            ON CREATE SET d.name = $name, d.description = $descr , d.cat = $cat
            """)
    return tx.run(query, drug = drug, name = name, descr = descr, cat = cat)

In [69]:
with driver.session() as session:
    for datum in tqdm(file, desc = "Deploying Drugs: "):
        drug = datum["drugbank_id"]
        name = datum["name"]
        descr = datum["descriptions"]
        cat = ", ".join(datum["categories"])
        session.write_transaction(create_drugs, drug, name, descr, cat)
print("----Done----")    

Deploying Drugs: 100%|██████████| 322/322 [00:00<00:00, 451.08it/s]

----Done----





#### Deploying Protein Nodes:

In [70]:
def create_proteins(tx, unid, name):
    query = ("""
            MERGE (p:Protein{id: $unid})
            ON CREATE SET p.name = $name, p.t_type = 'target'
            ON MATCH SET p.name = $name
            """)
    return tx.run(query, unid = unid, name = name)

In [71]:
with driver.session() as session:
    for datum in tqdm(file, desc = "Deploying Target Proteins: "):
        for targets in datum["targets"]:
            unid = targets["uniprot_id"]
            name = targets["name"]
            session.write_transaction(create_proteins, unid, name)
print("----Done----")    

Deploying Target Proteins: 100%|██████████| 322/322 [00:02<00:00, 124.51it/s]

----Done----





#### Matching Drugs with Drug Targets:

In [72]:
def link_drug_target(tx, drug, unid):
    query = ("""
            MATCH (p:Protein) WHERE p.id = $unid
            MATCH (d:Drug) WHERE d.id = $drug
            MERGE (p)<-[:TARGET]-(d)
            """)
    return tx.run(query, drug = drug, unid = unid)

In [73]:
with driver.session() as session:
    for datum in tqdm(file, desc = "Creating Relationships "):
        drug = datum["drugbank_id"]
        for targets in datum["targets"]:
            unid = targets["uniprot_id"]
            session.write_transaction(link_drug_target, drug, unid)
print("----Done----")    

Creating Relationships : 100%|██████████| 322/322 [00:02<00:00, 108.00it/s]

----Done----





#### Matching Drug Targets with Pathways (NOTE: ~10 minute runtime)

In [74]:
def extract_pathway(unid) -> list:
    """
    grabs all reactome pathways associated with protein
    @param id is the uniprot id that is going to be grabbed
    @return is a list with all of the reactome pathways
    """
    #get the url
    url = 'https://www.uniprot.org/uniprot/' + unid + '.txt'

    #check the response
    response = requests.get(url=url)

    #if successful
    if response.status_code == 200:
        pathway_list = []
        #new line delimiter
        response = response.text.splitlines()
        #searcg for pathways
        for i in range(len(response)):
            #if found
            if 'Reactome;' in response[i]:
                #process string
                pathway = response[i]
                pathway = str(pathway.split()[2][:-1])
                pathway_list.append(pathway)
        return pathway_list

    #if not successful
    else:
        #get error
        print('Error, Status Code:' % response.status_code)

In [75]:
def deploy_pathways(tx, pathway):
    query = ("""
            MERGE (r:Pathway{id: $pathway})
            """)
    return tx.run(query, pathway = pathway)

def link_pathways(tx, unid, pathway):
    query = ("""
            MATCH (p:Protein) WHERE p.id = $unid
            MATCH (r:Pathway) WHERE r.id = $pathway
            MERGE (p)<-[:CANDIDATE]-(r)
            """)
    return tx.run(query, unid = unid, pathway = pathway)

In [None]:
print("NOTE: ~10 minute runtime")
with driver.session() as session:
    for datum in tqdm(file, desc = "Linking Pathways: "):
        for targets in datum["targets"]:
            unid = targets["uniprot_id"]
            pathways = extract_pathway(unid)
            for p in pathways:
                session.write_transaction(deploy_pathways, p)
                session.write_transaction(link_pathways, unid, p)
print("----Done----")    