# DRUGBANK VERSION #
Want to check if all of the drug combinations with toxicity that we're using have drug target and pathway intersections

In [2]:
# Import everything needed
from scipy import stats
from toxicity_ranking import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scikit_posthocs as sp

In [3]:
drug_syntox_df = pd.read_csv('data_processed/drugbank_syntox_known.csv')

# File names for reactome pathways and drugbank target csv
drug_target_fp = 'data_processed/drugbank_drug_targets.csv'
reactome_lowest_pw_fp = 'data_processed/reactome_lowest_pathways_homo_sapiens.csv'
reactome_all_pw_fp = 'data_processed/reactome_all_pathways_homo_sapiens.csv'

drug_target_df = pd.read_csv(drug_target_fp)
lowest_pw_df = pd.read_csv(reactome_lowest_pw_fp)
all_pw_df = pd.read_csv(reactome_all_pw_fp)

drug_target_df['drug_name'] = drug_target_df['drug_name'].str.lower()

drug_target_df

Unnamed: 0,drug_name,target_name,target_DrugBank_ID,GenBank_Protein_ID,GenBank_Gene_ID,UniProtKB_ID,GenAtlas_ID,HGNC_ID,SMILES
0,lepirudin,Prothrombin,BE0000048,339641.0,M17262,P00734,F2,HGNC:3535,
1,cetuximab,Epidermal growth factor receptor,BE0000767,757924.0,X00588,P00533,EGFR,HGNC:3236,
2,cetuximab,Low affinity immunoglobulin gamma Fc region re...,BE0000901,31322.0,X16863,O75015,FCGR3B,HGNC:3620,
3,cetuximab,Complement C1q subcomponent subunit A,BE0002094,4894854.0,AF135157,P02745,C1QA,HGNC:1241,
4,cetuximab,Complement C1q subcomponent subunit B,BE0002095,573114.0,X03084,P02746,C1QB,HGNC:1242,
...,...,...,...,...,...,...,...,...,...
19430,lotilaner,"Gaba-gated chloride channel, putative",BE0010256,,,E0W492,,,CC1=C(SC(=C1)C1=NO[C@@](C1)(C1=CC(Cl)=C(Cl)C(C...
19431,(r)-9b,Activated CDC42 kinase 1,BE0000772,8850245.0,L13738,Q07912,TNK2,HGNC:19297,
19432,lovotibeglogene autotemcel,,BE0010962,,,,,,
19433,efbemalenograstim alfa,Granulocyte colony-stimulating factor receptor,BE0000793,31697.0,X55721,Q99062,CSF3R,HGNC:2439,


Check:
- If all drugs in drugcomb and drugbank intersection with known toxicity have drug target information

In [5]:
# all drugs in syntox
all_syntox_drugs = set(drug_syntox_df['drug_row'].values).union(set(drug_syntox_df['drug_col'].values))
print("Num of drugs in the syntox intersection with KNOWN toxicity: " + str(len(all_syntox_drugs)))

drugs_with_no_drug_target_info = []

# Do all these drugs in syntox have drug target information
for drug in all_syntox_drugs:
    drug_target_subset = drug_target_df[drug_target_df['drug_name'] == drug]
    if drug_target_subset.shape[0] == 0:
        drugs_with_no_drug_target_info.append(drug)

print("Num of drugs in syntox intersection WITHOUT target info: " + str(len(drugs_with_no_drug_target_info)))
print(sorted(drugs_with_no_drug_target_info))

Num of drugs in the syntox intersection with KNOWN toxicity: 701
Num of drugs in syntox intersection WITHOUT target info: 47
['aminophenazone', 'artemether', 'bendamustine', 'buflomedil', 'canrenone', 'carboxyamidotriazole', 'caroverine', 'chlorphenesin', 'cudc-101', 'cycloguanil', 'dabigatran', 'dexchlorpheniramine', 'doxifluridine', 'formestane', 'gadodiamide', 'idebenone', 'indisulam', 'ivermectin', 'ixazomib', 'lumefantrine', 'mebutamate', 'meprednisone', 'metaxalone', 'meticrane', 'mizoribine', 'naftopidil', 'nifenazone', 'otilonium', 'phensuximide', 'pidotimod', 'pinacidil', 'piperaquine', 'piracetam', 'pirarubicin', 'pirfenidone', 'proscillaridin', 'prulifloxacin', 'saracatinib', 'tiopronin', 'trimethobenzamide', 'triptolide', 'troxerutin', 'valspodar', 'vinpocetine', 'vorasidenib', 'win 55212-2', 'zaltoprofen']


FOUND SOME DRUG TARGETS MANUALLY THROUGH DRUGBANK DATABASE; ONES THAT DO NOT HAVE TARGETS ARE LISTED HERE
- Aminophenazone
- Buflomedil
- Canrenone
- Caroverine
- Chlorphenesin
- Dexchlorpheniramine
- Doxifluridine
- Gadodiamide
- Idebenone
- Meticrane
- Nifenazone
- Otilonium
- Phensuximide
- Pidotimod
- Pinacidil
- Piperaquine
- Piracetam
- Pirarubicin
- Proscillaridin
- Tiopronin
- Troxerutin
- Vinpocetine
- Zaltoprofen

In [8]:
# Create a list of dictionaries containing the manually added drug-target data
drug_target_data = [
    {
        'drug_name': 'artemether',
        'target_name': 'Sodium/potassium-transporting ATPase subunit alpha-1',
        'target_DrugBank_ID': 'BE0000732',
        'GenBank_Protein_ID': '219942',
        'GenBank_Gene_ID': 'D00099',
        'UniProtKB_ID': 'P05023',
        'GenAtlas_ID': 'ATP1A1',
        'HGNC_ID': 'HGNC:799',
    },
    {
        'drug_name': 'bendamustine',
        'target_name': 'DNA',
        'target_DrugBank_ID': np.nan,
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': np.nan,
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'carboxyamidotriazole',
        'target_name': 'Muscarinic acetylcholine receptor M5',
        'target_DrugBank_ID': 'BE0000247',
        'GenBank_Protein_ID': '177988',
        'GenBank_Gene_ID': 'M80333',
        'UniProtKB_ID': 'P08912',
        'GenAtlas_ID': 'CHRM5',
        'HGNC_ID': 'HGNC:1954',
    },
    {
        'drug_name': 'carboxyamidotriazole',
        'target_name': 'Interleukin-2',
        'target_DrugBank_ID': 'BE0001029',
        'GenBank_Protein_ID': '5729676',
        'GenBank_Gene_ID': 'J00264',
        'UniProtKB_ID': 'P60568',
        'GenAtlas_ID': 'IL2',
        'HGNC_ID': 'HGNC:6001',
    },
    {
        'drug_name': 'cudc-101',
        'target_name': 'Receptor tyrosine-protein kinase erbB-2',
        'target_DrugBank_ID': 'BE0000511',
        'GenBank_Protein_ID': '553282',
        'GenBank_Gene_ID': 'M11767',
        'UniProtKB_ID': 'P04626',
        'GenAtlas_ID': 'ERBB2',
        'HGNC_ID': 'HGNC:3430',
    },
    {
        'drug_name': 'cudc-101',
        'target_name': 'Epidermal growth factor receptor',
        'target_DrugBank_ID': 'BE0000767',
        'GenBank_Protein_ID': '757924',
        'GenBank_Gene_ID': 'X00588',
        'UniProtKB_ID': 'P00533',
        'GenAtlas_ID': 'EGFR',
        'HGNC_ID': 'HGNC:3236',
    },
    {
        'drug_name': 'cycloguanil',
        'target_name': 'Peptide deformylase, mitochondrial',
        'target_DrugBank_ID': 'BE0002201',
        'GenBank_Protein_ID': '11320944',
        'GenBank_Gene_ID': 'AF239156',
        'UniProtKB_ID': 'Q9HBH1',
        'GenAtlas_ID': 'PDF',
        'HGNC_ID': 'HGNC:30012',
    },
    {
        'drug_name': 'dabigatran',
        'target_name': 'Prothrombin',
        'target_DrugBank_ID': 'BE0000048',
        'GenBank_Protein_ID': '339641',
        'GenBank_Gene_ID': 'M17262',
        'UniProtKB_ID': 'P00734',
        'GenAtlas_ID': 'F2',
        'HGNC_ID': 'HGNC:3535',
    },
    {
        'drug_name': 'formestane',
        'target_name': 'Aromatase',
        'target_DrugBank_ID': 'BE0002090',
        'GenBank_Protein_ID': '179002',
        'GenBank_Gene_ID': 'M22246',
        'UniProtKB_ID': 'P11511',
        'GenAtlas_ID': 'CYP19A1',
        'HGNC_ID': 'HGNC:2594',
    },
    {
        'drug_name': 'indisulam',
        'target_name': 'Carbonic anhydrase 9',
        'target_DrugBank_ID': 'BE0002437',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': 'X66839',
        'UniProtKB_ID': 'Q16790',
        'GenAtlas_ID': 'CA9',
        'HGNC_ID': 'HGNC:1383',
    },
    {
        'drug_name': 'ivermectin',
        'target_name': 'Glutamate-gated chloride channel subunit beta',
        'target_DrugBank_ID': 'BE0027480',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'Q17328',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'ivermectin',
        'target_name': 'Glutamate-gated chloride channel alpha',
        'target_DrugBank_ID': 'BE0027481',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'G5EBR3',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'ixazomib', 
        'target_name': 'Proteasome subunit beta type-5',
        'target_DrugBank_ID': 'BE0002348',
        'GenBank_Protein_ID': '558526',
        'GenBank_Gene_ID': 'D29011',
        'UniProtKB_ID': 'P28074',
        'GenAtlas_ID': 'PSMB5',
        'HGNC_ID': 'HGNC:9542',
    },
    {
        'drug_name': 'lumefantrine',
        'target_name': 'Sodium/potassium-transporting ATPase subunit alpha-1',
        'target_DrugBank_ID': 'BE0000732',
        'GenBank_Protein_ID': '219942',
        'GenBank_Gene_ID': 'D00099',
        'UniProtKB_ID': 'P05023',
        'GenAtlas_ID': 'ATP1A1',
        'HGNC_ID': 'HGNC:799',
    },
    {
        'drug_name': 'mebutamate',
        'target_name': 'Gamma-aminobutyric acid receptor subunit gamma-3',
        'target_DrugBank_ID': 'BE0003594',
        'GenBank_Protein_ID': '1754749',
        'GenBank_Gene_ID': 'S82769',
        'UniProtKB_ID': 'Q99928',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': 'HGNC:4088',
    },
    {
        'drug_name': 'meprednisone',
        'target_name': 'Glucocorticoid receptor',
        'target_DrugBank_ID': 'BE0000794',
        'GenBank_Protein_ID': '31680',
        'GenBank_Gene_ID': 'X03225',
        'UniProtKB_ID': 'P04150',
        'GenAtlas_ID': 'NR3C1',
        'HGNC_ID': 'HGNC:7978',
    },
    {
        'drug_name': 'metaxalone', 
        'target_name': 'Amine oxidase [flavin-containing] A',
        'target_DrugBank_ID': 'BE0002198',
        'GenBank_Protein_ID': '187353',
        'GenBank_Gene_ID': 'M68840',
        'UniProtKB_ID': 'P21397',
        'GenAtlas_ID': 'MAOA',
        'HGNC_ID': 'HGNC:6833',
    },
    {
        'drug_name': 'mizoribine', 
        'target_name': 'Inosine-5\'-monophosphate dehydrogenase 1',
        'target_DrugBank_ID': 'BE0000761',
        'GenBank_Protein_ID': '307067',
        'GenBank_Gene_ID': 'J05272',
        'UniProtKB_ID': 'P20839',
        'GenAtlas_ID': 'IMPDH1',
        'HGNC_ID': 'HGNC:6052',
    },
    {
        'drug_name': 'naftopidil', 
        'target_name': 'Alpha-1A adrenergic receptor',
        'target_DrugBank_ID': 'BE0000501',
        'GenBank_Protein_ID': '433201',
        'GenBank_Gene_ID': 'D25235',
        'UniProtKB_ID': 'P35348',
        'GenAtlas_ID': 'ADRA1A',
        'HGNC_ID': 'HGNC:277',
    },
    {
        'drug_name': 'pirfenidone', 
        'target_name': 'Transforming growth factor beta-1 proprotein',
        'target_DrugBank_ID': 'BE0000984',
        'GenBank_Protein_ID': '1212989',
        'GenBank_Gene_ID': 'X05839',
        'UniProtKB_ID': 'P01137',
        'GenAtlas_ID': 'TGFB1',
        'HGNC_ID': 'HGNC:11766',
    },
    {
        'drug_name': 'prulifloxacin', 
        'target_name': 'DNA gyrase subunit B',
        'target_DrugBank_ID': 'BE0002388',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': 'X71437',
        'UniProtKB_ID': 'P0A0K8',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'prulifloxacin', 
        'target_name': 'DNA gyrase subunit A',
        'target_DrugBank_ID': 'BE0008385',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'P20831',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'prulifloxacin', 
        'target_name': 'DNA topoisomerase 4 subunit A',
        'target_DrugBank_ID': 'BE0000798',
        'GenBank_Protein_ID': '147106',
        'GenBank_Gene_ID': 'M58408',
        'UniProtKB_ID': 'P0AFI2',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': np.nan,
    },
    {
        'drug_name': 'saracatinib', 
        'target_name': 'Tyrosine-protein kinase ABL1',
        'target_DrugBank_ID': 'BE0000014',
        'GenBank_Protein_ID': '28237',
        'GenBank_Gene_ID': 'X16416',
        'UniProtKB_ID': 'P00519',
        'GenAtlas_ID': 'ABL1',
        'HGNC_ID': 'HGNC:76',
    },
    {
        'drug_name': 'saracatinib', 
        'target_name': 'Proto-oncogene tyrosine-protein kinase Src',
        'target_DrugBank_ID': 'BE0000838',
        'GenBank_Protein_ID': '10635153',
        'GenBank_Gene_ID': 'AL133293',
        'UniProtKB_ID': 'P12931',
        'GenAtlas_ID': 'SRC',
        'HGNC_ID': 'HGNC:11283',
    },
    {
        'drug_name': 'trimethobenzamide', 
        'target_name': 'D(2) dopamine receptor',
        'target_DrugBank_ID': 'DB00662',
        'GenBank_Protein_ID': '181432',
        'GenBank_Gene_ID': 'M30625',
        'UniProtKB_ID': 'P14416',
        'GenAtlas_ID': 'DRD2',
        'HGNC_ID': 'HGNC:3023',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Peroxiredoxin-5, mitochondrial',
        'target_DrugBank_ID': 'BE0000907',
        'GenBank_Protein_ID': '6523289',
        'GenBank_Gene_ID': 'AJ249483',
        'UniProtKB_ID': 'P30044',
        'GenAtlas_ID': 'PRDX5',
        'HGNC_ID': 'HGNC:9355',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Proto-oncogene c-Rel',
        'target_DrugBank_ID': 'BE0010782',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'Q04864',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': 'HGNC:9954',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Transcription factor RelB',
        'target_DrugBank_ID': 'BE0013795',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'Q01201',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': 'HGNC:9956',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Transcription factor p65',
        'target_DrugBank_ID': 'BE0005551',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'Q04206',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': 'HGNC:9955',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Nuclear factor NF-kappa-B p100 subunit',
        'target_DrugBank_ID': 'BE0002387',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': 'X61498',
        'UniProtKB_ID': 'Q00653',
        'GenAtlas_ID': 'NFKB2',
        'HGNC_ID': 'HGNC:7795',
    },
    {
        'drug_name': 'triptolide', 
        'target_name': 'Nuclear factor NF-kappa-B p105 subunit',
        'target_DrugBank_ID': 'BE0001086',
        'GenBank_Protein_ID': '189180',
        'GenBank_Gene_ID': 'M55643',
        'UniProtKB_ID': 'P19838',
        'GenAtlas_ID': 'NFKB1',
        'HGNC_ID': 'HGNC:7794',
    },
    {
        'drug_name': 'valspodar', 
        'target_name': 'Coagulation factor X',
        'target_DrugBank_ID': 'BE0000216',
        'GenBank_Protein_ID': '182841',
        'GenBank_Gene_ID': 'K03194',
        'UniProtKB_ID': 'P00742',
        'GenAtlas_ID': 'F10',
        'HGNC_ID': 'HGNC:3528',
    },
    {
        'drug_name': 'vorasidenib', 
        'target_name': 'Isocitrate dehydrogenase [NADP] cytoplasmic',
        'target_DrugBank_ID': 'BE0001251',
        'GenBank_Protein_ID': '3641398',
        'GenBank_Gene_ID': 'AF020038',
        'UniProtKB_ID': 'O75874',
        'GenAtlas_ID': 'IDH1',
        'HGNC_ID': 'HGNC:5382',
    },
    {
        'drug_name': 'vorasidenib', 
        'target_name': 'Isocitrate dehydrogenase [NADP], mitochondrial',
        'target_DrugBank_ID': 'BE0004527',
        'GenBank_Protein_ID': np.nan,
        'GenBank_Gene_ID': np.nan,
        'UniProtKB_ID': 'P48735',
        'GenAtlas_ID': np.nan,
        'HGNC_ID': 'HGNC:5383',
    },
    {
        'drug_name': 'vorasidenib', 
        'target_name': 'Isocitrate dehydrogenase [NAD] subunit gamma, mitochondrial',
        'target_DrugBank_ID': 'BE0000771',
        'GenBank_Protein_ID': '1167849',
        'GenBank_Gene_ID': 'Z68907',
        'UniProtKB_ID': 'P51553',
        'GenAtlas_ID': 'IDH3G',
        'HGNC_ID': 'HGNC:5386',
    },
    {
        'drug_name': 'vorasidenib', 
        'target_name': 'Isocitrate dehydrogenase [NAD] subunit beta, mitochondrial',
        'target_DrugBank_ID': 'BE0000559',
        'GenBank_Protein_ID': '2737886',
        'GenBank_Gene_ID': 'U49283',
        'UniProtKB_ID': 'O43837',
        'GenAtlas_ID': 'IDH3B',
        'HGNC_ID': 'HGNC:5385',
    },
    {
        'drug_name': 'vorasidenib', 
        'target_name': 'Isocitrate dehydrogenase [NAD] subunit alpha, mitochondrial',
        'target_DrugBank_ID': 'BE0000011',
        'GenBank_Protein_ID': '706839',
        'GenBank_Gene_ID': 'U07681',
        'UniProtKB_ID': 'P50213',
        'GenAtlas_ID': 'IDH3A',
        'HGNC_ID': 'HGNC:5384',
    },
    {
        'drug_name': 'win 55212-2', 
        'target_name': 'Cannabinoid receptor 2',
        'target_DrugBank_ID': 'BE0000095',
        'GenBank_Protein_ID': '407807',
        'GenBank_Gene_ID': 'X74328',
        'UniProtKB_ID': 'P34972',
        'GenAtlas_ID': 'CNR2',
        'HGNC_ID': 'HGNC:2160',
    },
    {
        'drug_name': 'win 55212-2', 
        'target_name': 'Cannabinoid receptor 1',
        'target_DrugBank_ID': 'BE0000061',
        'GenBank_Protein_ID': '29915',
        'GenBank_Gene_ID': 'X54937',
        'UniProtKB_ID': 'P50213',
        'GenAtlas_ID': 'CNR1',
        'HGNC_ID': 'HGNC:2159',
    },
]

manually_added_drug_target_df = pd.DataFrame(drug_target_data)

# If you need to append this to an existing DataFrame, use concat:
drug_target_df = pd.concat([drug_target_df, manually_added_drug_target_df], ignore_index=True)

drugs_with_no_drug_target_info = []

# Do all these drugs in syntox have drug target information
for drug in all_syntox_drugs:
    drug_target_subset = drug_target_df[drug_target_df['drug_name'] == drug]
    if drug_target_subset.shape[0] == 0:
        drugs_with_no_drug_target_info.append(drug)

print("Num of drugs in syntox intersection WITHOUT target info: " + str(len(drugs_with_no_drug_target_info)))
print(drugs_with_no_drug_target_info)

# Also want to know how many of the intersection drugs with targets have UniProt IDs
drugs_with_no_uniprot_info = set()
for drug in all_syntox_drugs:
    drug_target_subset = drug_target_df[drug_target_df['drug_name'] == drug]
    if drug_target_subset['UniProtKB_ID'].isnull().all():
        drugs_with_no_uniprot_info.add(drug)

print("Num of drugs in syntox intersection WITHOUT UniProt info: " + str(len(drugs_with_no_uniprot_info)))
print(drugs_with_no_uniprot_info) # Should include the 2 drugs without target info

# Slim down the drug_target_df to only include drugs in the intersection with known toxicity
syntoxtarg_drugs = all_syntox_drugs - set(drugs_with_no_drug_target_info)
syntoxtarg_df = drug_target_df[drug_target_df['drug_name'].isin(syntoxtarg_drugs)]
syntoxtarg_df.to_csv('data_processed/drugbank_syntoxtarg.csv', index=False)

Num of drugs in syntox intersection WITHOUT target info: 23
['piracetam', 'troxerutin', 'proscillaridin', 'dexchlorpheniramine', 'idebenone', 'chlorphenesin', 'caroverine', 'meticrane', 'aminophenazone', 'otilonium', 'pirarubicin', 'vinpocetine', 'canrenone', 'phensuximide', 'nifenazone', 'tiopronin', 'pinacidil', 'gadodiamide', 'zaltoprofen', 'doxifluridine', 'pidotimod', 'piperaquine', 'buflomedil']
Num of drugs in syntox intersection WITHOUT UniProt info: 46
{'gadodiamide', 'methoxsalen', 'otilonium', 'zaltoprofen', 'mechlorethamine', 'niclosamide', 'trioxsalen', 'meticrane', 'tiopronin', 'nifenazone', 'buflomedil', 'proscillaridin', 'telithromycin', 'busulfan', 'dexchlorpheniramine', 'temozolomide', 'deferasirox', 'pirarubicin', 'doxifluridine', 'chlorphenesin', 'vinpocetine', 'melphalan', 'bendamustine', 'furazolidone', 'doxycycline', 'idebenone', 'canrenone', 'daptomycin', 'caroverine', 'amphotericin b', 'altretamine', 'octreotide', 'praziquantel', 'chlorambucil', 'nedaplatin', '

Check if
- If all drugs in drugcomb and drugbank intersection with known toxicity have pathway information

In [9]:
drugs_with_no_lowest_pathway_info = []
drugs_with_no_all_pathway_info = []

# Read in the UniProt2Reactome.tsv file
lowest_pw_df = pd.read_csv('data_processed/reactome_lowest_pathways_homo_sapiens.csv')
all_pw_df = pd.read_csv('data_processed/reactome_all_pathways_homo_sapiens.csv')

syntox_drugs_with_uniprot_targets = all_syntox_drugs - drugs_with_no_uniprot_info
print(len(syntox_drugs_with_uniprot_targets))

syntoxtarg_lowestpw_df = pd.DataFrame(columns=['drug_name', 'target_name', 'target_DrugBank_ID', 'UniProtKB_ID', 'Reactome_ID', 'Pathway_Name'])
syntoxtarg_allpw_df = pd.DataFrame(columns=['drug_name', 'target_name', 'target_DrugBank_ID', 'UniProtKB_ID', 'Reactome_ID', 'Pathway_Name'])

for drug in syntox_drugs_with_uniprot_targets:
    drug_target_subset = drug_target_df[drug_target_df['drug_name'] == drug]
    for iter, targ_row in drug_target_subset.iterrows():
        targ_uniprot = targ_row['UniProtKB_ID']
        # lowestpw
        lowest_pw_subset = lowest_pw_df[lowest_pw_df['UniProtKB_ID'] == targ_uniprot]
        for iter, lowpw_row in lowest_pw_subset.iterrows():
            syntoxtarg_lowestpw_df.loc[len(syntoxtarg_lowestpw_df)] = [targ_row['drug_name'], \
                targ_row['target_name'], targ_row['target_DrugBank_ID'], targ_uniprot, \
                lowpw_row['Reactome_ID'], lowpw_row['Pathway_Name']]
        # allpw
        all_pw_subset = all_pw_df[all_pw_df['UniProtKB_ID'] == targ_uniprot]
        for iter, allpw_row in all_pw_subset.iterrows():
            syntoxtarg_allpw_df.loc[len(syntoxtarg_allpw_df)] = [targ_row['drug_name'], \
                targ_row['target_name'], targ_row['target_DrugBank_ID'], targ_uniprot, \
                allpw_row['Reactome_ID'], allpw_row['Pathway_Name']]

syntoxtarg_lowestpw_df.to_csv('data_processed/drugbank_syntoxtarg_lowestpw.csv', index=False)
syntoxtarg_allpw_df.to_csv('data_processed/drugbank_syntoxtarg_allpw.csv', index=False)

# Now we have the syntox-target-pathway data for the intersection of drugs with known toxicity
# How many unique drugs are in this syntoxtarg intersection?
print("Lowest pw unique drugs in syntoxtarg: " + str(len(set(syntoxtarg_lowestpw_df['drug_name'].values))))
print("All pw unique drugs in syntoxtarg: " + str(len(set(syntoxtarg_allpw_df['drug_name'].values))))

655
Lowest pw unique drugs in syntoxtarg: 596
All pw unique drugs in syntoxtarg: 596


In [10]:
# Filter out drug combinations that do not have target or all pathway information
filtered_combos_syntoxtargallpw = drug_syntox_df[drug_syntox_df['drug_row'].isin(syntox_drugs_with_uniprot_targets) & drug_syntox_df['drug_col'].isin(syntox_drugs_with_uniprot_targets)]
print("Shape of filtered_combos_syntoxtargallpw: " + str(filtered_combos_syntoxtargallpw.shape))

# Save the filtered drug_syntox_df
filtered_combos_syntoxtargallpw.to_csv('data_processed/drugbank_filtered_combos_syntox_known_targallpw.csv', index=False)

Shape of filtered_combos_syntoxtargallpw: (73769, 8)


For the whole syntoxtarg both low and all pathway intersections, what's the range of targets (1 min and max per drug) and pathways (1 min and max per drug)

In [11]:
# What's the max number of targets per drug?
print("Lowest pw max targets per drug: " + str(syntoxtarg_lowestpw_df.groupby('drug_name')['target_name'].nunique().max()))
print("All pw max targets per drug: " + str(syntoxtarg_allpw_df.groupby('drug_name')['target_name'].nunique().max()))

# What's the max number of pathways per drug?
print("Lowest pw max pathways per drug: " + str(syntoxtarg_lowestpw_df.groupby('drug_name')['Pathway_Name'].nunique().max()))
print("All pw max pathways per drug: " + str(syntoxtarg_allpw_df.groupby('drug_name')['Pathway_Name'].nunique().max()))

# For targets
print("Drugs with most targets (lowest pw):")
print(syntoxtarg_lowestpw_df.groupby('drug_name')['target_name'].nunique().nlargest(3))

print("\nDrugs with most targets (all pw):")
print(syntoxtarg_allpw_df.groupby('drug_name')['target_name'].nunique().nlargest(3))

# For pathways
print("\nDrugs with most pathways (lowest pw):")
print(syntoxtarg_lowestpw_df.groupby('drug_name')['Pathway_Name'].nunique().nlargest(3))

print("\nDrugs with most pathways (all pw):")
print(syntoxtarg_allpw_df.groupby('drug_name')['Pathway_Name'].nunique().nlargest(3))

Lowest pw max targets per drug: 216
All pw max targets per drug: 216
Lowest pw max pathways per drug: 700
All pw max pathways per drug: 1055
Drugs with most targets (lowest pw):
drug_name
fostamatinib    216
cannabidiol      41
aripiprazole     38
Name: target_name, dtype: int64

Drugs with most targets (all pw):
drug_name
fostamatinib    216
cannabidiol      41
aripiprazole     38
Name: target_name, dtype: int64

Drugs with most pathways (lowest pw):
drug_name
fostamatinib    700
dasatinib       188
ponatinib       181
Name: Pathway_Name, dtype: int64

Drugs with most pathways (all pw):
drug_name
fostamatinib    1055
dasatinib        349
quercetin        343
Name: Pathway_Name, dtype: int64
