# DeepTraSynergy Toxicity Score Analysis #
- DeepTraSynergy paper can be found: https://academic.oup.com/bioinformatics/article/39/8/btad438/7226508

In [2]:
# Import everything needed
from matplotlib.patches import Patch
from rdkit import Chem
from scipy import stats
from toxicity_ranking import *
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scikit_posthocs as sp
import seaborn as sns

DeepTraSynergy creates toxicity score embeddings by:
1. PPIN representation
2. SMILES representations
3. Creating a similarity score by computing inner product of drug embeddings

So let's examine now how well SMILES representation similarity scores would work as a potential toxicity score. How well does SMILES similarity differentiate between toxicity category distributions (Kruskal Wallis) and is there a relationship between SMILES similarity and toxicity (Jonckheere Terpestra Test)

In [3]:
drug_combos_df = pd.read_csv('data_processed/filtered_combos_syntox_known_targallpw.csv')
drug_combos_df

Unnamed: 0,drug_row,drug_col,cell_line_name,synergy_zip,synergy_loewe,synergy_bliss,synergy_hsa,toxicity_category
0,mefloquine,tamoxifen,TC-32,19.667559,2.263512,25.542924,2.896978,Moderate
1,chloroquine,tamoxifen,TC-32,8.836970,-0.671750,11.944238,-0.517536,Major
2,prochlorperazine,tamoxifen,TC-32,12.411371,1.358891,14.291624,0.880952,Moderate
3,fingolimod,tamoxifen,TC-32,8.007900,-0.764699,5.141360,-0.060902,Major
4,dasatinib,tamoxifen,TC-32,-9.601561,0.314544,-5.351887,0.710809,Moderate
...,...,...,...,...,...,...,...,...
32054,sunitinib,nilutamide,CBRC058,10.012172,7.477252,-6.487555,-8.811142,Moderate
32055,sunitinib,midostaurin,CBRC058,-15.618223,-46.106561,-80.053050,-73.693958,Moderate
32056,vincristine,midostaurin,CBRC058,10.768074,5.982307,8.609542,3.406801,Moderate
32057,ceritinib,ponatinib,RD,11.951861,-2.135860,11.637520,-1.042661,Major


There are SMILES strings in the DrugBank csv file. Collect all of the SMILES and then convert to Morgan Fingerprints, then compute the Tanimoto coefficient for drug combinatoins 

In [15]:
unique_drugs = set(drug_combos_df['drug_row'].unique())
unique_drugs = unique_drugs.union(set(drug_combos_df['drug_col'].unique()))
print(len(unique_drugs))

drugbank_df = pd.read_csv('data_processed/drugbank_drug_targets.csv')
# How many combinations in drugbank have SMILES that are not NaN?
print(drugbank_df['SMILES'].isna().sum())
print(drugbank_df.shape)


362
1857
(19435, 9)
