In [1]:
import pandas as pd
import numpy as np

In [2]:
info = pd.read_csv("../data/screened_compounds_rel_8.4.csv", header = 0)

In [3]:
info.columns

Index(['DRUG_ID', 'SCREENING_SITE', 'DRUG_NAME', 'SYNONYMS', 'TARGET',
       'TARGET_PATHWAY'],
      dtype='object')

In [4]:
info[0:5]

Unnamed: 0,DRUG_ID,SCREENING_SITE,DRUG_NAME,SYNONYMS,TARGET,TARGET_PATHWAY
0,1,MGH,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling
1,3,MGH,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling
2,5,MGH,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling
3,6,MGH,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling
4,9,MGH,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation


In [5]:
with open("../data/drugs_jaaks.txt") as file:
    drugs = [i.strip() for i in  file.readlines()]

In [6]:
drug_targets = pd.DataFrame(columns = info.columns)
for i in drugs:
    drug_targets = pd.concat([drug_targets, info[info["DRUG_NAME"] == i]])

In [7]:
drug_targets.shape

(100, 6)

In [12]:
drug_targets = drug_targets.drop_duplicates(subset = "DRUG_NAME")
drug_targets.reset_index(drop = True)
drug_targets.shape

(63, 6)

In [13]:
tar_coeff = np.zeros((len(drugs), len(drugs)))
for i in range(len(drugs)):
    tar_coeff[i,i] = 1

In [14]:
tar_coeff

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [16]:
dt = {drug_targets['DRUG_NAME'][i]:drug_targets['TARGET'][i] for i in drug_targets.index}

In [17]:
dt

{'Venetoclax': 'BCL2',
 'Erlotinib': 'EGFR',
 'Vorinostat': 'HDAC inhibitor Class I, IIa, IIb, IV',
 'Pictilisib': 'PI3K (class 1)',
 'Alpelisib': 'PI3Kalpha',
 'Olaparib': 'PARP1, PARP2',
 'AZD4547': 'FGRF1, FGFR2, FGFR3',
 'Nilotinib': 'ABL',
 'AZD6482': 'PI3Kbeta',
 'Sapitinib': 'EGFR, ERBB2, ERBB3',
 'Linsitinib': 'IGF1R',
 'Dactolisib': 'PI3K (class 1), MTORC1, MTORC2',
 'Wee1 Inhibitor': 'WEE1, CHEK1',
 'Dabrafenib': 'BRAF',
 'Taselisib': 'PI3K (beta sparing)',
 '5-Fluorouracil': 'Antimetabolite (DNA & RNA)',
 'Bortezomib': 'Proteasome',
 'LGK974': 'PORCN',
 'JQ1': 'BRD2, BRD3, BRD4, BRDT',
 'AZD8055': 'MTORC1, MTORC2',
 'Crizotinib': 'MET, ALK, ROS1',
 'BMS-754807': 'IGF1R, IR',
 'OSI-027': 'MTORC1, MTORC2',
 'Dasatinib': 'ABL, SRC, Ephrins, PDGFR, KIT',
 'PF-4708671': 'S6K1',
 'Palbociclib': 'CDK4, CDK6',
 'Nutlin-3a (-)': 'MDM2',
 'PD173074': 'FGFR1, FGFR2, FGFR3',
 'Trametinib': 'MEK1, MEK2',
 'KU-55933': 'ATM',
 'Camptothecin': 'TOP1',
 'Uprosertib': 'AKT1, AKT2, AKT3',
 'La

In [18]:
dt.keys()

dict_keys(['Venetoclax', 'Erlotinib', 'Vorinostat', 'Pictilisib', 'Alpelisib', 'Olaparib', 'AZD4547', 'Nilotinib', 'AZD6482', 'Sapitinib', 'Linsitinib', 'Dactolisib', 'Wee1 Inhibitor', 'Dabrafenib', 'Taselisib', '5-Fluorouracil', 'Bortezomib', 'LGK974', 'JQ1', 'AZD8055', 'Crizotinib', 'BMS-754807', 'OSI-027', 'Dasatinib', 'PF-4708671', 'Palbociclib', 'Nutlin-3a (-)', 'PD173074', 'Trametinib', 'KU-55933', 'Camptothecin', 'Uprosertib', 'Lapatinib', 'Doramapimod', 'Oxaliplatin', 'Luminespib', 'Temozolomide', 'SB505124', 'Vinorelbine', 'RO-3306', 'SB216763', 'ZM447439', 'AZD7762', 'Navitoclax', 'Entinostat', 'Irinotecan', 'MK-1775', 'Paclitaxel', 'Gemcitabine', 'Tozasertib', 'BI-2536', 'Afatinib', 'Ruxolitinib', 'Cisplatin', 'NU7441', 'Axitinib', 'SCH772984', 'Ribociclib', 'GSK269962A', 'MK-2206', 'Sorafenib', 'Alisertib', 'Docetaxel'])

In [28]:
for i, m in enumerate(dt.keys()):
    for j, n in enumerate(dt.keys()):
        if i != j:
            number_intersetction = len(set(dt[m].split(',')) & set(dt[n].split(',')))
            tar_coeff[i,j] = number_intersetction
            tar_coeff[j,i] = number_intersetction

In [29]:
no_target = set(drugs) - set(dt.keys())
no_target

{'Galunisertib'}

In [45]:
labels = list(dt.keys())
labels.append("Galunisertib")

In [46]:
tar_codf = pd.DataFrame(data = tar_coeff, index = labels, columns = labels)

In [47]:
tar_codf.to_csv("../data/target_shared_jaaks.csv", header = True, index = True)

In [48]:
tar_codf

Unnamed: 0,Venetoclax,Erlotinib,Vorinostat,Pictilisib,Alpelisib,Olaparib,AZD4547,Nilotinib,AZD6482,Sapitinib,...,NU7441,Axitinib,SCH772984,Ribociclib,GSK269962A,MK-2206,Sorafenib,Alisertib,Docetaxel,Galunisertib
Venetoclax,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Erlotinib,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Vorinostat,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pictilisib,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Alpelisib,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MK-2206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Sorafenib,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Alisertib,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Docetaxel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [49]:
tar_codf.shape

(64, 64)

In [58]:
shared = []
for i, s in enumerate(tar_codf.index):
    for j in tar_codf.columns[i:]:
        if tar_codf.loc[s, j] != 0:
            shared.append(f'{s}\t{j}\t{tar_codf.loc[s,j]}')
with open("../data/source_target_co.txt", 'w') as file:
    file.write("source\ttarget\tcoeff\n")
    file.writelines([v+"\n" for i,v in enumerate(shared) if i != len(shared)])

In [59]:
shared = []
for i, s in enumerate(tar_codf.index):
    for j in tar_codf.columns[i:]:
        if tar_codf.loc[s, j] != 0 and s != j:
            shared.append(f'{s}\t{j}\t{tar_codf.loc[s,j]}')
with open("../data/source_target_co0self.txt", 'w') as file:
    file.write("source\ttarget\tcoeff\n")
    file.writelines([v+"\n" for i,v in enumerate(shared) if i != len(shared)])