In [2]:
import json
import string
import pandas as pd
import nltk
from Bio import Entrez
import requests

## Read data

In [3]:
%%time
# confirmed genes
#with open("cancer_category_rna_prostate.json") as f: genes = json.load(f)
#with open("rheumatoid.json") as f: genes = json.load(f)
with open("Atherosclerosis.json") as f: genes = json.load(f)

# all gene-drug interactions from TTD
all_gene_drug = pd.read_excel("P1-07-Drug-TargetMapping.xlsx", dtype="string")

Wall time: 7.62 s


In [4]:
all_drugs_TTD = list(all_gene_drug.Drug_Name)
len(all_drugs_TTD)

43874

In [5]:
all_drugs_dgidb = pd.read_csv("drugs_dgidb.tsv", dtype="string", delimiter="\t", encoding='latin1')
all_drugs_dgidb = all_drugs_dgidb.fillna('')
all_drugs_dgidb["name"] = all_drugs_dgidb.drug_claim_name + "|" + all_drugs_dgidb.drug_name
all_drugs_dgidb = list(all_drugs_dgidb.name)
for i in range(len(all_drugs_dgidb)):
    if all_drugs_dgidb[i].startswith("|"): all_drugs_dgidb[i] = all_drugs_dgidb[i][1:]
    if all_drugs_dgidb[i].startswith("chembl:"): all_drugs_dgidb[i] = all_drugs_dgidb[i][7:]
    if all_drugs_dgidb[i].endswith("|")  : all_drugs_dgidb[i] = all_drugs_dgidb[i][:-1]
len(all_drugs_dgidb)

57497

In [6]:
from tqdm import tqdm
for item in tqdm(all_drugs_dgidb):
    if not(item in all_drugs_TTD or item.lower() in all_drugs_TTD):
        all_drugs_TTD.append(item)

100%|███████████████████████████████████████████████████████████████████████████| 57497/57497 [02:57<00:00, 324.15it/s]


In [43]:
with open('all_drugs.txt', 'w') as outfile:
    cnt = 0
    for drug in all_drugs_TTD:
        if len(drug) > 3:
            try:
                outfile.write("DRUG_" +  str(cnt) + "\t" + drug + "\n")
            except:
                print(drug)
            cnt+=1

Luspaterceptâaamt
Primaquine ÃÂ 
LuspaterceptÃ¢ÂÂaamt
Erdafitinib ÃÂ 


## Extract relevant data and write to file

In [7]:
gene_names = [ [item["Gene"]] + item["Gene synonym"] for item in genes]
# remove duplicates
gene_names = [list(set(i)) for i in gene_names]

In [10]:
ttdgenes = []
with open("P1-06-Target_disease.txt") as f:
    a = f.readline()
    while a:
        if len(a) > 0 and "TARGETID" in a:
            gene = f.readline()
            gene = gene.split("(")[-1].split(")")[0].split(" ")[0]
            interaction = f.readline()
            while len(interaction) > 0 and interaction[0] == "T":
                if "Atherosclerosis" in interaction or "Arteriosclerosis" in interaction:
                    ttdgenes.append(gene)
                interaction = f.readline()
            a = f.readline()
        else: a = f.readline()
    print(len(ttdgenes))

print(len(gene_names))
for g in ttdgenes:
    #print(g, any([g in gene for gene in gene_names]))
    if not any([g in gene for gene in gene_names]):
        gene_names.append([g])
print(len(gene_names))

52
27
76


In [None]:
# make a dictionary of gene-drug interactions
gene_drug = {}
for name in gene_names:
    searchString = ""
    stname = "|".join(name)
    for i in range(len(name)):
        searchString += "(" + name[i] + "[Title/Abstract])" # for entrez search
        if i < len(name)-1: searchString += " OR "
    x = all_gene_drug.loc[all_gene_drug["Target_Name"].str.contains(stname)]
    gene_drug[name[-1]] = {}
    gene_drug[name[-1]]["synonyms"] = name[:-1]
    gene_drug[name[-1]]["DrugID"] = x["DrugID"].values
    gene_drug[name[-1]]["Drug_Name"] = x["Drug_Name"].values
    gene_drug[name[-1]]["Highest_status"] = x["Highest_status"].values
    gene_drug[name[-1]]["MOA"] = x["MOA"].values
    gene_drug[name[-1]]["entrezQuery"] = searchString

In [12]:
# make a list of drugs for disease genes
drugs = set()
for gene in gene_drug.keys():
    for drug in gene_drug[gene]["Drug_Name"]:
        drugs.add(drug)

In [13]:
#with open('prostateCancer_genes.txt', 'w') as outfile:
#with open('rheumatoidArthritis_genes.txt', 'w') as outfile:
with open('atherosclerosis_genes.txt', 'w') as outfile:
    cnt = 0
    for genes in gene_names:
        outfile.write("GENE_" +  str(cnt) + "\t" + "|".join(genes) + "\n")
        cnt+=1

In [14]:
#with open('prostateCancer_drugs_TTD.txt', 'w') as outfile:
# with open('rheumatoidArthritis_drugs_TTD.txt', 'w') as outfile:
with open('atherosclerosis_drugs_TTD.txt', 'w') as outfile:
    cnt = 0
    for drug in drugs:
        outfile.write("DRUG_" +  str(cnt) + "\t" + drug + "\n")
        cnt+=1

In [15]:
all_genes = [",".join(genelist) for genelist in gene_names]
all_genes = ",".join(all_genes)

In [None]:
URL = "https://www.dgidb.org/api/v2/interactions.json"
r = requests.get(url = URL + "?genes=" + all_genes)
print(r.url)
data = r.json()

In [17]:
drug_genes_dgidb = {}
for key in ['matchedTerms', 'ambiguousTerms']:
    for hit in data[key]:
        drug_genes_dgidb[hit["searchTerm"]] = {"drugs": [], "MOA": []}
        for interaction in hit["interactions"]:
            drug_genes_dgidb[hit["searchTerm"]]["drugs"].append(interaction["drugName"])
            drug_genes_dgidb[hit["searchTerm"]]["MOA"].append(",".join(interaction["interactionTypes"]))

In [18]:
drugs_dgidb = {}
for key in ['matchedTerms', 'ambiguousTerms']:
    for hit in data[key]:
        for interaction in hit["interactions"]:
            drugs_dgidb[interaction["drugName"]] = interaction["interactionTypes"]
print(len(drugs_dgidb))

1724


In [None]:
data["unmatchedTerms"]

In [19]:
all_drugs = [i for i in drugs]
for item in drugs_dgidb.keys():
    if item not in all_drugs or item.lower() not in all_drugs:
        all_drugs.append(item)

In [20]:
#with open('prostateCancer_drugs_dgidb.txt', 'w') as outfile:
# with open('rheumatoidArthritis_drugs_dgidb.txt', 'w') as outfile:
with open('atherosclerosis_drugs_dgidb.txt', 'w') as outfile:
    cnt = 0
    for drug in drugs_dgidb.keys():
        outfile.write("DRUG_" +  str(cnt) + "\t" + drug + "\n")
        cnt+=1
#with open('prostateCancer_drugs_TTDandDgidb.txt', 'w') as outfile:
# with open('rheumatoidArthritis_drugs_TTDandDgidb.txt', 'w') as outfile:
with open('atherosclerosis_drugs_TTDandDgidb.txt', 'w') as outfile:
    cnt = 0
    for drug in all_drugs:
        outfile.write("DRUG_" +  str(cnt) + "\t" + drug + "\n")
        cnt+=1

### Graph vizualization

In [None]:
# MOA field for mechanism, gene has list of drugs and list of mechanisms
#gene_drug["KLK3"]
# gene: list of drugs and list of mechanisms
# drug_genes_dgidb

# n, geneString, list of drugs and list of mechanisms
gene_drug_dict = {}
ind = 0
for gene in gene_drug.keys():
    lookup = [gene] + gene_drug[gene]["synonyms"]
    combinedDrugs = list(gene_drug[gene]["Drug_Name"])
    combinedMechanisms = list(gene_drug[gene]["MOA"])
    for g in lookup:
        if g in drug_genes_dgidb.keys():
            otherDrugs = drug_genes_dgidb[g]["drugs"]
            otherMOA = drug_genes_dgidb[g]["MOA"]
            for i in range(len(otherMOA)):
                if otherDrugs[i].lower() in combinedDrugs or otherDrugs[i] in combinedDrugs: 
                    # combine mechanisms
                    try: j = combinedDrugs.index(otherDrugs[i].lower())
                    except: j = combinedDrugs.index(otherDrugs[i])
                    if otherMOA[i] != "": combinedMechanisms[j] = combinedMechanisms[j] + ", " + otherMOA[i]
                else:
                    combinedDrugs.append(otherDrugs[i])
                    if otherMOA[i] == "": combinedMechanisms.append(".")
                    combinedMechanisms.append(otherMOA[i])
    gene_drug_dict[ind] = {}    
    gene_drug_dict[ind]["genes"] = "|".join(lookup)
    gene_drug_dict[ind]["drugs"] = combinedDrugs
    gene_drug_dict[ind]["MOA"] = combinedMechanisms
    ind+=1
# want to make: drugs and genes as nodes, mechanism as link

In [None]:
gene_drug_dict

## Visualize as network

In [None]:
import networkx as nx
G = nx.Graph()
for gene in gene_drug.keys():
    for drug in gene_drug[gene]["Drug_Name"]:
        G.add_edge(gene, drug)

nx.write_gexf(G, "test.gexf")

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
def visualizegraph(G):
    pos = nx.spring_layout(G)
    nx.draw_networkx(G, pos)
    labels = nx.get_edge_attributes(G, 'moa')
    #print(labels)
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
    plt.savefig("graph.jpg", dpi=10)
    #plt.show()

G2 = nx.Graph()
for node in gene_drug_dict.keys():
    for i in range(len(gene_drug_dict[node]["drugs"])):
        G2.add_edge(gene_drug_dict[node]["genes"], gene_drug_dict[node]["drugs"][i])
        G2[gene_drug_dict[node]["genes"]][gene_drug_dict[node]["drugs"][i]]['moa'] = gene_drug_dict[node]["MOA"][i]

nx.write_gexf(G2, "GRAPH_gene_drug_MOA.gexf")

In [None]:
a = list(G2['Anti-PSA mabs'])
print(G2['Anti-PSA mabs'])
b = G2.subgraph(a)
print(b.edges(data=True))
visualizegraph(G2)