In [1]:
import pandas as pd
import numpy as np

In [2]:
info = pd.read_csv("../data/screened_compounds_rel_8.4.csv", header = 0)

In [3]:
info.columns

Index(['DRUG_ID', 'SCREENING_SITE', 'DRUG_NAME', 'SYNONYMS', 'TARGET',
       'TARGET_PATHWAY'],
      dtype='object')

In [4]:
info[0:5]

Unnamed: 0,DRUG_ID,SCREENING_SITE,DRUG_NAME,SYNONYMS,TARGET,TARGET_PATHWAY
0,1,MGH,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling
1,3,MGH,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling
2,5,MGH,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling
3,6,MGH,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling
4,9,MGH,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation


In [5]:
with open("../data/drugs_jaaks.txt") as file:
    drugs = [i.strip() for i in  file.readlines()]

In [6]:
drug_targets = pd.DataFrame(columns = info.columns)
for i in drugs:
    drug_targets = pd.concat([drug_targets, info[info["DRUG_NAME"] == i]])

In [7]:
drug_targets.shape

(100, 6)

In [8]:
drug_targets = drug_targets.drop_duplicates(subset = "DRUG_NAME")
drug_targets.reset_index(drop = True)
drug_targets.shape

(63, 6)

In [9]:
tar_coeff = np.zeros((len(drugs), len(drugs)))
for i in range(len(drugs)):
    tar_coeff[i,i] = 1

In [10]:
tar_coeff

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [11]:
dt = {drug_targets['DRUG_NAME'][i]:drug_targets['TARGET_PATHWAY'][i] for i in drug_targets.index}

In [12]:
dt

{'Venetoclax': 'Apoptosis regulation',
 'Erlotinib': 'EGFR signaling',
 'Vorinostat': 'Chromatin histone acetylation',
 'Pictilisib': 'PI3K/MTOR signaling',
 'Alpelisib': 'PI3K/MTOR signaling',
 'Olaparib': 'Genome integrity',
 'AZD4547': 'RTK signaling',
 'Nilotinib': 'ABL signaling',
 'AZD6482': 'PI3K/MTOR signaling',
 'Sapitinib': 'EGFR signaling',
 'Linsitinib': 'IGF1R signaling',
 'Dactolisib': 'PI3K/MTOR signaling',
 'Wee1 Inhibitor': 'Cell cycle',
 'Dabrafenib': 'ERK MAPK signaling',
 'Taselisib': 'PI3K/MTOR signaling',
 '5-Fluorouracil': 'Other',
 'Bortezomib': 'Protein stability and degradation',
 'LGK974': 'WNT signaling',
 'JQ1': 'Chromatin other',
 'AZD8055': 'PI3K/MTOR signaling',
 'Crizotinib': 'RTK signaling',
 'BMS-754807': 'RTK signaling',
 'OSI-027': 'PI3K/MTOR signaling',
 'Dasatinib': 'RTK signaling',
 'PF-4708671': 'PI3K/MTOR signaling',
 'Palbociclib': 'Cell cycle',
 'Nutlin-3a (-)': 'p53 pathway',
 'PD173074': 'RTK signaling',
 'Trametinib': 'ERK MAPK signaling',

In [13]:
dt.keys()

dict_keys(['Venetoclax', 'Erlotinib', 'Vorinostat', 'Pictilisib', 'Alpelisib', 'Olaparib', 'AZD4547', 'Nilotinib', 'AZD6482', 'Sapitinib', 'Linsitinib', 'Dactolisib', 'Wee1 Inhibitor', 'Dabrafenib', 'Taselisib', '5-Fluorouracil', 'Bortezomib', 'LGK974', 'JQ1', 'AZD8055', 'Crizotinib', 'BMS-754807', 'OSI-027', 'Dasatinib', 'PF-4708671', 'Palbociclib', 'Nutlin-3a (-)', 'PD173074', 'Trametinib', 'KU-55933', 'Camptothecin', 'Uprosertib', 'Lapatinib', 'Doramapimod', 'Oxaliplatin', 'Luminespib', 'Temozolomide', 'SB505124', 'Vinorelbine', 'RO-3306', 'SB216763', 'ZM447439', 'AZD7762', 'Navitoclax', 'Entinostat', 'Irinotecan', 'MK-1775', 'Paclitaxel', 'Gemcitabine', 'Tozasertib', 'BI-2536', 'Afatinib', 'Ruxolitinib', 'Cisplatin', 'NU7441', 'Axitinib', 'SCH772984', 'Ribociclib', 'GSK269962A', 'MK-2206', 'Sorafenib', 'Alisertib', 'Docetaxel'])

In [14]:
for i, m in enumerate(dt.keys()):
    for j, n in enumerate(dt.keys()):
        if i != j:
            if dt[m] == dt[n]:
                number_intersetction = 1
                tar_coeff[i,j] = number_intersetction
                tar_coeff[j,i] = number_intersetction

In [15]:
no_target = set(drugs) - set(dt.keys())
no_target

{'Galunisertib'}

In [16]:
labels = list(dt.keys())
labels.append("Galunisertib")

In [17]:
tar_codf = pd.DataFrame(data = tar_coeff, index = labels, columns = labels)

In [18]:
tar_codf.to_csv("../data/pathway_shared_jaaks.csv", header = True, index = True)

In [19]:
tar_codf.iloc[0:5, 0:10]

Unnamed: 0,Venetoclax,Erlotinib,Vorinostat,Pictilisib,Alpelisib,Olaparib,AZD4547,Nilotinib,AZD6482,Sapitinib
Venetoclax,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Erlotinib,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
Vorinostat,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pictilisib,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
Alpelisib,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0


In [20]:
tar_codf.shape

(64, 64)

In [22]:
dt["Galunisertib"] = "other"
shared = []
for i, s in enumerate(tar_codf.index):
    for j in tar_codf.columns[i:]:
        if tar_codf.loc[s, j] != 0:
            shared.append(f'{s}\t{j}\t{tar_codf.loc[s,j]}\t{dt[s]}')
with open("../data/source_target_pathway.txt", 'w') as file:
    file.write("source\ttarget\tcoeff\n")
    file.writelines([v+"\n" for i,v in enumerate(shared) if i != len(shared)])

In [22]:
shared = []
for i, s in enumerate(tar_codf.index):
    for j in tar_codf.columns[i:]:
        if tar_codf.loc[s, j] != 0 and s != j:
            shared.append(f'{s}\t{j}\t{tar_codf.loc[s,j]}\t{dt[s]}')
with open("../data/source_target_pathway0self.txt", 'w') as file:
    file.write("source\ttarget\tcoeff\tpathway\n")
    file.writelines([v+"\n" for i,v in enumerate(shared) if i != len(shared)])

In [24]:
len(shared)

145