#### Oxidative Stress - Ion Channel Knowledge Graph (Network)
Author: Irsyad Adam

03/1/022

In [None]:
import json 
import pandas as pd
from neo4j import GraphDatabase
from tqdm import tqdm

driver = GraphDatabase.driver(uri = "bolt://localhost:7687",\
                              auth = ("neo4j","heart"))

##### Create Constraints for Uniqueness (Run Only Once)

In [None]:
query = "CREATE CONSTRAINT ON (p:Protein) ASSERT p.id  IS UNIQUE"
query1 = "CREATE CONSTRAINT ON (pw:Pathway) ASSERT pw.id  IS UNIQUE"
query2 = "CREATE CONSTRAINT ON (d:Document) ASSERT d.id  IS UNIQUE"
query3 = "CREATE CONSTRAINT ON (m:MeSH) ASSERT m.id  IS UNIQUE"
query4 = "CREATE CONSTRAINT ON (dg:Drug) ASSERT dg.id  IS UNIQUE"
        

'''UNCOMMENT AND RUN THIS CELL ONLY ONCE'''

with driver.session() as session:
    info = session.run(query)
    info = session.run(query1)
    info = session.run(query2)
    info = session.run(query3)
    info = session.run(query4)

##### Import Ion Channel Proteins

In [3]:
proteins = pd.read_csv("data/importantProteins.csv")
proteins[0:10]

Unnamed: 0,name,id
0,Amyloid beta A4 protein,P05067
1,Matrix metalloproteinase-9,P14780
2,Voltage-dependent P/Q-type calcium channel sub...,O00555
3,Extracellular calcium-sensing receptor,P41180
4,"Nitric oxide synthase, brain",P29475
5,Dystrophin,P11532
6,Gap junction alpha-1 protein,P17302
7,Caveolin-1,Q03135
8,Dopamine D2 receptor,P14416
9,P2X purinoceptor 3,P56373


In [4]:
def create_protein_nodes(tx, uid, name, type):
    """
    :param uid: is the protein id in uniprot
    :param name: is the name of the protein in uniprot
    """
    query = "MERGE (p:Protein{id: $uid, name: $name, type: $type})"
    tx.run(query, uid = uid, name = name, type = type)

In [5]:
print("Length of Data: ", len(proteins), "iterations", flush = True)
type = "Ion Channel"
with driver.session() as session:
    for uid, name in tqdm(zip(proteins["id"], proteins['name']), desc="Importing Ion Channel Proteins"):
        session.write_transaction(create_protein_nodes, uid, name, type)

print("Success")

Length of Data:  133 iterations


Importing Ion Channel Proteins: 133it [00:00, 258.98it/s]

Success





##### Importing PMIDs (PubMed Docs)

In [12]:
df2 = pd.read_csv("data/CVDpmid.csv")
df2

Unnamed: 0.1,Unnamed: 0,pmid,title,abstract,mesh,date
0,0,20091048,Cardiac sodium channelopathies.,cardiac sodium channel are protein complexes t...,"['Animals', 'Arrhythmias, Cardiac', 'genetics'...","{'Year': '2010', 'Month': 'Jul', 'Day': '', 'S..."
1,1,30517097,The administration of high-mobility group box ...,we hypothesized that systemic administration o...,"['Animals', 'Animals, Genetically Modified', '...","{'Year': '2018', 'Month': '', 'Day': '', 'Seas..."
2,2,27853260,The synthetic antimicrobial peptide 19-2.5 att...,an impairment of cardiac function is a key fea...,"['Animals', 'Antimicrobial Cationic Peptides',...","{'Year': '2016', 'Month': '11', 'Day': '17', '..."
3,3,19432907,Current understanding and management of dilate...,to review the current understanding of the pat...,"['Adrenergic beta-Antagonists', 'therapeutic u...","{'Year': '2009', 'Month': 'May', 'Day': '', 'S..."
4,4,24898986,Comorbidity profiles and inpatient outcomes du...,treatment of heart failure (hf) is particularl...,"['Aged', 'Comorbidity', 'Female', 'Heart Failu...","{'Year': '2014', 'Month': 'Jun', 'Day': '05', ..."
...,...,...,...,...,...,...
16733,16770,27796324,Myocyte-specific enhancer factor 2C: a novel t...,the role of microrna 214 3p (mir 214 3p) in ca...,"['Angiotensin II', 'toxicity', 'Animals', 'Ant...","{'Year': '2016', 'Month': '10', 'Day': '31', '..."
16734,16771,2963469,123-Iodine heptadecanoic acid (HDA) cardiac me...,in 8 sportsmen and 8 healthy subjects of simil...,"['Adult', 'Cardiomegaly', 'diagnostic imaging'...","{'Year': '1987', 'Month': '', 'Day': '', 'Seas..."
16735,16772,15522283,Inhibition of phenylephrine induced hypertroph...,the effect of the putative mitochondrial k(atp...,"['Adrenergic alpha-1 Receptor Agonists', 'Anim...","{'Year': '2004', 'Month': 'Nov', 'Day': '', 'S..."
16736,16773,30573811,Association between epicardial adipose tissue ...,"epicardial adipose tissue (eat), metabolically...","['Adipose Tissue', 'diagnostic imaging', 'Aged...","{'Year': '2019', 'Month': '04', 'Day': '', 'Se..."


In [13]:
def create_document_nodes(tx, pmid, title, date):
    query = "MERGE (d:Document{id:$pmid, title:$title, date:$date})"
    tx.run(query, pmid=pmid, title=title, date=date)

In [14]:
print("Length of Data: ", len(df2), "iterations", flush = True)
with driver.session() as session:
    for pmid, title, date in tqdm(zip(df2["pmid"],df2["title"],df2["date"]), desc = "Importing Documents"):
        session.write_transaction(create_document_nodes, pmid, title, date)

print("Success")

Length of Data:  16738 iterations


Importing Documents: 16738it [02:21, 118.25it/s]

Success





##### Importing CVD MeSH

In [19]:
df3 = pd.read_csv("data/CVD_mesh_nodes.csv")
df3

Unnamed: 0.1,Unnamed: 0,Name,ID,CVD
0,0,cardiomyopathies,C14.280.238,CM
1,1,arrhythmogenic right ventricular dysplasia,C16.131.240.400.145,CM
2,2,"cardiomyopathy, alcoholic",C25.775.100.087.250,CM
3,3,"cardiomyopathy, dilated",C16.320.488.750,CM
4,4,"cardiomyopathy, hypertrophic",C14.280.484.048.750.070.160,CM
...,...,...,...,...
221,221,cardiac papillary fibroelastoma,C14.280.459.250,OHD
222,222,carney complex,C16.131.831.108,OHD
223,223,pericarditis,C14.280.720,OHD
224,224,"pericarditis, constrictive",C14.280.720.595,OHD


In [20]:
def create_mesh_nodes(tx, mid, name):
    query = "MERGE (m:MeSH{id:$mid, name:$name})"
    tx.run(query, mid=mid, name=name)

In [25]:
print("Length of Data: ", len(df3), "iterations", flush = True)
with driver.session() as session:
    for mid,name in tqdm(zip(df3["ID"],df3["Name"]), desc = "Importing MeSH"):
        session.write_transaction(create_mesh_nodes, mid,name)

print("Success")

Length of Data:  226 iterations


Importing MeSH: 226it [00:01, 201.11it/s]

Success





##### Linking CVD MeSH with PubMed Documents

In [27]:
dfe1 = pd.read_csv("data/PMIDtoCVDMesh.csv")
dfe1

Unnamed: 0.1,Unnamed: 0,pmid,name,mid
0,0,20091048,"arrhythmias, cardiac",C23.550.073
1,1,20091048,brugada syndrome,C16.320.100
2,2,20091048,"cardiomyopathy, dilated",C16.320.488.750
3,3,20091048,long qt syndrome,C23.550.073.547
4,4,20091048,sick sinus syndrome,C23.550.073.425.440
...,...,...,...,...
23118,25233,27796324,cardiomegaly,C23.300.775.250
23119,25234,2963469,cardiomegaly,C23.300.775.250
23120,25235,15522283,cardiomegaly,C23.300.775.250
23121,25236,30573811,"ventricular dysfunction, left",C14.280.945.900


In [29]:
def create_document2mesh_edge(tx, pmid, mid):
    query = '''
    MATCH (d:Document{id:$pmid})
    MATCH (m:MeSH{id:$mid})
    MERGE (d)-[:ASSIGNS]->(m)
    '''
    tx.run(query, pmid=pmid, mid=mid)

In [32]:
print("Length of Data: ", len(dfe1), "iterations", flush = True)
with driver.session() as session:
    for pmid, mid in tqdm(zip(dfe1["pmid"], dfe1["mid"]), desc = "Creating Relationships"):
        session.write_transaction(create_document2mesh_edge, pmid, mid)

print("Success")

Length of Data:  23123 iterations


Creating Relationships: 23123it [01:27, 265.28it/s]

Success





##### Linking Ion Channel Proteins with CVD MeSH

In [33]:
dfe2 = pd.read_csv("data/pmidtoProtein.csv")
dfe2

Unnamed: 0.1,Unnamed: 0,pmid,protein,edge,keep
0,0,20091048,P56539,MENTIONS,True
1,1,30517097,P11532,MENTIONS,True
2,2,27853260,P16615,MENTIONS,True
3,3,19432907,P11532,MENTIONS,True
4,4,24898986,O00555,MENTIONS,True
...,...,...,...,...,...
16733,16770,27796324,P32418,MENTIONS,True
16734,16771,2963469,O00555,MENTIONS,True
16735,16772,15522283,P19634,MENTIONS,True
16736,16773,30573811,P05067,MENTIONS,True


In [35]:
def create_document2protein_edge(tx, pmid, uid):
    query = '''
    MATCH (d:Document{id:$pmid})
    MATCH (p:Protein{id:$uid})
    MERGE (d)-[:MENTIONS]->(p)
    '''
    tx.run(query, pmid=pmid, uid=uid)

In [37]:
print("Length of Data: ", len(dfe2), "iterations", flush = True)
with driver.session() as session:
    for pmid, uid in tqdm(zip(dfe2["pmid"], dfe2["protein"]), desc = "Creating Relationships"):
        session.write_transaction(create_document2protein_edge, pmid, uid)

print("Success")

Length of Data:  16738 iterations


Creating Relationships: 16738it [02:36, 106.63it/s]

Success





##### Importing CVD Drugs

In [43]:
df4 = pd.read_csv("data/drug-nodes.csv")
df4

Unnamed: 0,name,ID,category,desc,syn,pathways,targets
0,heparin,DB01109,Anticoagulants,Unfractionated heparin (UH) is a heterogenous ...,"['Calciparine', 'Eparina', 'heparina', 'Hepari...","['\n ', '\n ']","['\n ', '\n ', '\n ', '\n ..."
1,warfarin,DB00682,Anticoagulants,Warfarin is an anticoagulant drug normally use...,"['4-Hydroxy-3-(3-oxo-1-phenylbutyl)coumarin', ...",['\n '],"['\n ', '\n ']"
2,streptokinase,DB00086,Thrombolytics,"Streptokinase, is a sterile, purified preparat...","['Streptokinase C precursor', 'Streptokinase']",['\n '],"['\n ', '\n ']"
3,urokinase,DB00013,Thrombolytics,"Low molecular weight form of human urokinase, ...","['U-plasminogen activator', 'uPA', 'Urokinase-...",['\n '],"['\n ', '\n ', '\n ', '\n ..."
4,tpa,DB00009,Thrombolytics,"Human tissue plasminogen activator, purified, ...","['Alteplasa', 'Alteplase (genetical recombinat...",['\n '],"['\n ', '\n ', '\n ', '\n ']"
...,...,...,...,...,...,...,...
137,dronedarone,DB04855,Potassium Channel Blockers,Dronedarone is a sinus rhythm controller for m...,"['Dronedarona', 'Dronedarone', 'N-(2-Butyl-3-(...",[],"['\n ', '\n ', '\n ', '\n ..."
138,vernakalant,DB06217,Potassium Channel Blockers,Vernakalant was developed by Cardiome Pharma a...,"['(3R)-1-((1R,2R)-2-(2-(3,4-dimethoxyphenyl)et...",[],"['\n ', '\n ', '\n ', '\n ']"
139,adenosine,DB00640,Other Anti Arrhythmics,A nucleoside that is composed of adenine and d...,"['(2R,3R,4S,5R)-2-(6-aminopurin-9-yl)-5-(hydro...","['\n ', '\n ', '\n ', '\n ...","['\n ', '\n ', '\n ', '\n ']"
140,magnesium sulfate,DB00653,Other Anti Arrhythmics,A small colorless crystal used as an anticonvu...,"['Magnesium sulfate (1:1)', 'Magnesium sulfate...",[],"['\n ', '\n ', '\n ', '\n ..."


In [49]:
def create_drug_nodes(tx, dgid, cat, name):
    query = "MERGE (dg:Drug{id: $dgid, cat: $cat, name: $name})"
    tx.run(query, dgid=dgid, cat=cat, name=name)

In [51]:
print("Length of Data: ", len(df4), "iterations", flush = True)
with driver.session() as session:
    for dgid, name, cat in tqdm(zip(df4["ID"], df4["name"], df4["category"]), desc= "Importing Drugs"):
        session.write_transaction(create_drug_nodes, dgid, cat, name)

print("Success")

Length of Data:  142 iterations


Importing Drugs: 142it [00:01, 109.48it/s]

Success





##### Importing Drug Targets

In [67]:
df5 = pd.read_csv("data/drug-target-nodes.csv")
df5

Unnamed: 0.1,Unnamed: 0,Protein
0,0,Q01064
1,1,P19634
2,2,Q9BXT2
3,3,P53805
4,4,Q13698
...,...,...
311,312,Q16515
312,313,P63252
313,314,Q9UF02
314,315,Q9H244


In [68]:
def create_drug_target_nodes(tx, uid):
    query = "MERGE (p:Protein{id:$uid})"
    tx.run(query, uid=uid)

In [69]:
print("Length of Data: ", len(df5), "iterations", flush = True)
with driver.session() as session:
    for uid in tqdm(df5["Protein"], desc = "Importing Drug Targets"):
        session.write_transaction(create_drug_target_nodes, uid)

print("Success")

Length of Data:  316 iterations


Importing Drug Targets: 100%|██████████| 316/316 [00:02<00:00, 138.90it/s]

Success





In [71]:
def update_drug_target_nodes(tx, uid,typ):
    query = "MERGE (p:Protein{id:$uid}) ON MATCH SET p.t_type=$typ"
    tx.run(query, uid=uid,typ=typ)

In [72]:
print("Length of Data: ", len(df5), "iterations", flush = True)
with driver.session() as session:
    typ = "target"
    for uid in df5["Protein"]:
        session.write_transaction(update_drug_target_nodes, uid,typ)
        
print("Success")

Length of Data:  316 iterations
Success


##### Linking CVD Drugs to Drug Targets
- if drug target is ion channel protein, cool
- if not, then import protein in graph in hopes of shared biological pathway

In [74]:
dfe3 = pd.read_csv("data/drug2targets-edge.csv")
dfe3

Unnamed: 0.1,Unnamed: 0,name,cat,protein
0,0,heparin,Anticoagulants,P01008
1,1,heparin,Anticoagulants,P00742
2,2,heparin,Anticoagulants,P16109
3,3,heparin,Anticoagulants,P22455
4,4,heparin,Anticoagulants,P08620
...,...,...,...,...
693,693,atropine,Other Anti Arrhythmics,P08173
694,694,atropine,Other Anti Arrhythmics,P08912
695,695,atropine,Other Anti Arrhythmics,P23415
696,696,atropine,Other Anti Arrhythmics,P43681


In [75]:
def create_dg2t_edge(tx, name, uid):
    query = '''
    MATCH (dg:Drug{name:$name})
    MATCH (p:Protein{id:$uid})
    MERGE (dg)-[:TARGET]->(p)
    '''
    tx.run(query, name=name, uid=uid)

In [77]:
print("Length of Data: ", len(dfe3), "iterations", flush = True)
with driver.session() as session:
    for dgid, uid in tqdm(zip(dfe3["name"], dfe3["protein"]), desc = "Creating Relationships"):
        session.write_transaction(create_dg2t_edge, dgid, uid)

print("Success")

Length of Data:  698 iterations


Creating Relationships: 698it [00:06, 102.69it/s]

Success





##### Deploying Pathways

In [88]:
df6 = pd.read_csv("data/pw2protein-edge.csv")
df6

Unnamed: 0.1,Unnamed: 0,ID,Pathway,Protein,Relation
0,0,R-HSA-5578775,Ion homeostasis,Q96D31,INVOLVED_IN
1,1,R-HSA-5578775,Ion homeostasis,Q14643,INVOLVED_IN
2,2,R-HSA-5578775,Ion homeostasis,Q92736,INVOLVED_IN
3,3,R-HSA-5578775,Ion homeostasis,Q13586,INVOLVED_IN
4,4,R-HSA-5578775,Ion homeostasis,P30626,INVOLVED_IN
...,...,...,...,...,...
1424,1424,R-HSA-74160,Gene expression (Transcription),P0DP23,INVOLVED_IN
1425,1425,R-HSA-74160,Gene expression (Transcription),O00141,INVOLVED_IN
1426,1426,R-HSA-74160,Gene expression (Transcription),Q13557,INVOLVED_IN
1427,1427,R-HSA-74160,Gene expression (Transcription),Q03135,INVOLVED_IN


In [93]:
def create_pathway_nodes(tx, pwid, name):
    query = "MERGE (pw:Pathway{id:$pwid,name:$name})"
    tx.run(query, pwid=pwid, name=name)

In [94]:
print("Length of Data: ", len(df6), "iterations", flush = True)
with driver.session() as session:
    for pwid,name in tqdm(zip(df6["ID"],df6["Pathway"]), "Importing Pathways"):
        session.write_transaction(create_pathway_nodes, pwid, name)
    
print("Success")

Length of Data:  1429 iterations


Importing Pathways: 1429it [00:08, 160.69it/s]

Success





In [95]:
def create_pw2p_edge(tx, pwid, uid):
    query = '''
    MATCH (pw:Pathway{id:$pwid})
    MATCH (p:Protein{id:$uid})
    MERGE (pw)-[:CANDIDATE]->(p)
    '''
    tx.run(query, pwid=pwid, uid=uid)

In [96]:
print("Length of Data: ", len(df6), "iterations", flush = True)
with driver.session() as session:
    for pwid, uid in tqdm(zip(df6["ID"], df6["Protein"]), desc = "Creating Relationships"):
        session.write_transaction(create_pw2p_edge, pwid, uid)

print("Success")

Length of Data:  1429 iterations


Creating Relationships: 1429it [00:13, 104.68it/s]

Success



