# DrugBank Drug Target and Pathway Analysis #

File for examining the relationship between drug targets/pathways and toxicity levels

In [1]:
# Import everything needed
from matplotlib.patches import Patch
from scipy import stats
from sklearn.metrics import r2_score
from statsmodels.stats.multitest import multipletests
from preprocessing_functions import *
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scikit_posthocs as sp
import seaborn as sns

In [2]:
drug_syntox_df = pd.read_csv("data_processed/drugbank_processed_combos_syntoxtargallpw_string.csv")
syntoxtarg_allpw_df = pd.read_csv('data_processed/drugbank_syntoxtarg_allpw.csv')
syntoxtarg_lowestpw_df = pd.read_csv('data_processed/drugbank_syntoxtarg_lowestpw.csv')

In [3]:
# Function to get the jaccard similarity between two sets of drug targets or pathways
def targ_pw_js(drug1, drug2, target_or_pathway_df, type='target'):
    if type not in ['target', 'pathway']:
        raise ValueError('type must be either target or pathway')
    if type == 'target':
        drug1_set = set(target_or_pathway_df[target_or_pathway_df['drug_name'] == drug1]['target_DrugBank_ID'])
        drug2_set = set(target_or_pathway_df[target_or_pathway_df['drug_name'] == drug2]['target_DrugBank_ID'])
    else:
        drug1_set = set(target_or_pathway_df[target_or_pathway_df['drug_name'] == drug1]['Reactome_ID'])
        drug2_set = set(target_or_pathway_df[target_or_pathway_df['drug_name'] == drug2]['Reactome_ID'])
    
    if len(drug1_set) == 0 or len(drug2_set) == 0:
        return None

    return jaccard_similarity(drug1_set, drug2_set)

# Test this function
print(targ_pw_js('ceritinib', 'levonorgestrel', syntoxtarg_allpw_df, type='target')) # Should be zero
print(targ_pw_js('testosterone', 'levonorgestrel', syntoxtarg_allpw_df, type='target')) # Should be positive
print(targ_pw_js('methotrexate', 'chlorphenesin', syntoxtarg_allpw_df, type='target')) # Should be None
print(targ_pw_js('mycophenolic acid', 'rucaparib', syntoxtarg_lowestpw_df, type='pathway')) # Should be zero
print(targ_pw_js('testosterone', 'levonorgestrel', syntoxtarg_allpw_df, type='pathway')) # Should be positive
print(targ_pw_js('methotrexate', 'chlorphenesin', syntoxtarg_allpw_df, type='pathway')) # Should be None


0.0
0.3333333333333333
None
0.0
0.71875
None


In [4]:
# Create a new column in the drug_syntox_df that contains the jaccard similarity between the target sets of the drug pair
drug_syntox_df['target_jaccard'] = drug_syntox_df.apply(lambda row: targ_pw_js(row['drug_row'], row['drug_col'], syntoxtarg_allpw_df, 'target'), axis=1)
drug_syntox_df['all_pw_jaccard'] = drug_syntox_df.apply(lambda row: targ_pw_js(row['drug_row'], row['drug_col'], syntoxtarg_allpw_df, 'pathway'), axis=1)
drug_syntox_df['lowest_pw_jaccard'] = drug_syntox_df.apply(lambda row: targ_pw_js(row['drug_row'], row['drug_col'], syntoxtarg_lowestpw_df, 'pathway'), axis=1)

# Test if any of these jaccard similarity distributions are normal 
print("Is the target jaccard similarity distribution normal? Normal test p-value: ", stats.normaltest(drug_syntox_df['target_jaccard']))
print("Is the all pathway jaccard similarity distribution normal? Normal test p-value: ", stats.normaltest(drug_syntox_df['all_pw_jaccard']))
print("Is the lowest pathway jaccard similarity distribution normal? Normal test p-value: ", stats.normaltest(drug_syntox_df['lowest_pw_jaccard']))

Is the target jaccard similarity distribution normal? Normal test p-value:  NormaltestResult(statistic=77954.50301086123, pvalue=0.0)
Is the all pathway jaccard similarity distribution normal? Normal test p-value:  NormaltestResult(statistic=55254.58142408516, pvalue=0.0)
Is the lowest pathway jaccard similarity distribution normal? Normal test p-value:  NormaltestResult(statistic=63641.36195402004, pvalue=0.0)


In [5]:
print("Original Drug Combination DataFrame Shape: ", drug_syntox_df.shape)
drug_syntox_df = drug_syntox_df.dropna(subset=['target_jaccard', 'all_pw_jaccard', 'lowest_pw_jaccard'])
print("Drug Combination DataFrame Shape After Dropping NaNs for Target/Pathway Analysis: ", drug_syntox_df.shape)

# target jaccard similarities
plt.hist(drug_syntox_df['target_jaccard'], bins=20)
plt.xlabel('Jaccard Similarity')
plt.ylabel('Frequency')
plt.title('Histogram of Drug Target Jaccard Similarities')
plt.savefig('results/jaccard_similarity_analysis/exploratory_distrib_stats/target_jaccard_hist_drugbank.png')
plt.close()

# pathway jaccard similarities
plt.hist(drug_syntox_df['all_pw_jaccard'], bins=20)
plt.xlabel('Jaccard Similarity')
plt.ylabel('Frequency')
plt.title('Histogram of All PWs Jaccard Similarities')
plt.savefig('results/jaccard_similarity_analysis/exploratory_distrib_stats/all_pw_jaccard_hist_drugbank.png')
plt.close()

# lowest pathway jaccard similarities
plt.hist(drug_syntox_df['lowest_pw_jaccard'], bins=20)
plt.xlabel('Jaccard Similarity')
plt.ylabel('Frequency')
plt.title('Histogram of Lowest PWs Jaccard Similarities')
plt.savefig('results/jaccard_similarity_analysis/exploratory_distrib_stats/lowest_pw_jaccard_hist_drugbank.png')
plt.close()

# Find the minimum, maximum, mean, standard deviation, and median of the jaccard similarity distributions, then write to a file
jaccard_stats = drug_syntox_df[['target_jaccard', 'all_pw_jaccard', 'lowest_pw_jaccard']].describe()
jaccard_stats.to_csv('results/jaccard_similarity_analysis/exploratory_distrib_stats/target_pathway_stats_drugbank.csv')

Original Drug Combination DataFrame Shape:  (62728, 15)
Drug Combination DataFrame Shape After Dropping NaNs for Target/Pathway Analysis:  (62728, 15)


- Kruskal Wallis Test
- Dunn Posthoc Test with Bonferroni Correction
- Jonckheere Terpestra Test
- ANOVA
- T Test with Bonferroni Correction

In [6]:
# Run Kruskal Wallis test on jaccard similarities with targeting Major, Moderate, and Minor toxicity

# Target jaccard
major_target = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Major']['target_jaccard'].dropna()
moderate_target = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Moderate']['target_jaccard'].dropna()
minor_target = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Minor']['target_jaccard'].dropna()
print(f'Major target jaccard: {len(major_target)}')
print(f'Moderate target jaccard: {len(moderate_target)}')
print(f'Minor target jaccard: {len(minor_target)}')

h_statistic_target_tox, p_value_target_tox = stats.kruskal(major_target, moderate_target, minor_target)
print(f'Kruskal-Wallis H statistic for target jaccard: {h_statistic_target_tox}')
print(f'P-value for target jaccard: {p_value_target_tox}')

# Filter drug_syntox_df for only Major, Moderate, and Minor toxicity categories and remove pairs with target jaccard similarity of None
drug_syntox_df_tox_target = drug_syntox_df[drug_syntox_df['toxicity_category'].isin(['Major', 'Moderate', 'Minor'])]
drug_syntox_df_tox_target = drug_syntox_df_tox_target[~drug_syntox_df_tox_target['target_jaccard'].isnull()]
dunn_target_tox = sp.posthoc_dunn(drug_syntox_df_tox_target, val_col='target_jaccard', group_col='toxicity_category', p_adjust='bonferroni')
print('Dunn post-hoc test for target jaccard:' + str(dunn_target_tox))

# jonkcheere terpestra test -- is there a trend in the overlap (jaccard similarity) as you increase toxicity from minor to major?
jaccard_major_samples = major_target.values.tolist()
jaccard_moderate_samples = moderate_target.values.tolist()
jaccard_minor_samples = minor_target.values.tolist()
jt_incr_target = jonckheere_terpestra_test([jaccard_minor_samples, jaccard_moderate_samples, jaccard_major_samples])
print("Increasing toxicity for Target overlap: ", jt_incr_target )
jt_decr_target = jonckheere_terpestra_test([jaccard_major_samples, jaccard_moderate_samples, jaccard_minor_samples])
print("Decreasing toxicity for Target overlap: ", jt_decr_target)

# Run an ANOVA on jaccard similarity for Major, Moderate, and Minor toxicity categories
f_statistic_anova_jaccard_tox_targ, p_value_anova_jaccard_tox_targ = stats.f_oneway(major_target, moderate_target, minor_target)
print(f'ANOVA F-statistic for target jaccard: {f_statistic_anova_jaccard_tox_targ}')
print(f'P-value for target jaccard: {p_value_anova_jaccard_tox_targ}')

# Run a T test on jaccard similarity between Major/Minor, Major/Moderate, and Moderate/Minor toxicity categories
major_minor_target = stats.ttest_ind(major_target, minor_target)
major_moderate_target = stats.ttest_ind(major_target, moderate_target)
moderate_minor_target = stats.ttest_ind(moderate_target, minor_target)

# Run bonferroni correction on the p-values
ttest_p_values_targ = [major_minor_target[1], major_moderate_target[1], moderate_minor_target[1]]
ttest_p_values_corrected_targ = multipletests(ttest_p_values_targ, method='bonferroni')
print(f'Major/Minor T-test: {major_minor_target[1]} Corrected: {ttest_p_values_corrected_targ[1][0]}')
print(f'Major/Moderate T-test: {major_moderate_target[1]} Corrected: {ttest_p_values_corrected_targ[1][1]}')
print(f'Moderate/Minor T-test: {moderate_minor_target[1]} Corrected: {ttest_p_values_corrected_targ[1][2]}')

###################################
# Pathway (lowest levels) jaccard
major_lowestpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Major']['lowest_pw_jaccard'].dropna()
moderate_lowestpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Moderate']['lowest_pw_jaccard'].dropna()
minor_lowestpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Minor']['lowest_pw_jaccard'].dropna()
print(f'Major lowest pathway jaccard: {len(major_lowestpw)}')
print(f'Moderate lowest pathway jaccard: {len(moderate_lowestpw)}')
print(f'Minor lowest pathway jaccard: {len(minor_lowestpw)}')

h_statistic_lowestpw_tox, p_value_lowestpw_tox = stats.kruskal(major_lowestpw, moderate_lowestpw, minor_lowestpw)
print(f'Kruskal-Wallis H statistic for lowest pathway jaccard: {h_statistic_lowestpw_tox}')
print(f'P-value for lowest pathway jaccard: {p_value_lowestpw_tox}')

# Filter drug_syntox_df for only Major, Moderate, and Minor toxicity categories and remove pairs with lowest pathway levels jaccard similarity of None
drug_syntox_df_tox_lowestpw = drug_syntox_df[drug_syntox_df['toxicity_category'].isin(['Major', 'Moderate', 'Minor'])]
drug_syntox_df_tox_lowestpw = drug_syntox_df_tox_lowestpw[~drug_syntox_df_tox_lowestpw['lowest_pw_jaccard'].isnull()]
dunn_lowestpw_tox = sp.posthoc_dunn(drug_syntox_df_tox_lowestpw, val_col='lowest_pw_jaccard', group_col='toxicity_category', p_adjust='bonferroni')
print('Dunn post-hoc test for lowest pathway jaccard:' + str(dunn_lowestpw_tox))

# jonkcheere terpestra test -- is there a trend in the overlap (jaccard similarity) as you increase toxicity from minor to major?
jaccard_major_samples = major_lowestpw.values.tolist()
jaccard_moderate_samples = moderate_lowestpw.values.tolist()
jaccard_minor_samples = minor_lowestpw.values.tolist()
jt_incr_lowestpw = jonckheere_terpestra_test([jaccard_minor_samples, jaccard_moderate_samples, jaccard_major_samples])
print("Increasing toxicity for Lowest Pathway overlap: ", jt_incr_lowestpw )
jt_decr_lowestpw = jonckheere_terpestra_test([jaccard_major_samples, jaccard_moderate_samples, jaccard_minor_samples])
print("Decreasing toxicity for Lowest Pathway overlap: ", jt_decr_lowestpw)

# Run an ANOVA on jaccard similarity for Major, Moderate, and Minor toxicity categories
f_statistic_anova_jaccard_tox_lowestpw, p_value_anova_jaccard_tox_lowestpw = stats.f_oneway(major_lowestpw, moderate_lowestpw, minor_lowestpw)
print(f'ANOVA F-statistic for lowest pathway jaccard: {f_statistic_anova_jaccard_tox_lowestpw}')
print(f'P-value for lowest pathway jaccard: {p_value_anova_jaccard_tox_lowestpw}')

# Run a T test on jaccard similarity between Major/Minor, Major/Moderate, and Moderate/Minor toxicity categories
major_minor_lowestpw = stats.ttest_ind(major_lowestpw, minor_lowestpw)
major_moderate_lowestpw = stats.ttest_ind(major_lowestpw, moderate_lowestpw)
moderate_minor_lowestpw = stats.ttest_ind(moderate_lowestpw, minor_lowestpw)

# Run bonferroni correction on the p-values
ttest_p_values_lowestpw = [major_minor_lowestpw[1], major_moderate_lowestpw[1], moderate_minor_lowestpw[1]]
ttest_p_values_corrected_lowestpw = multipletests(ttest_p_values_lowestpw, method='bonferroni')
print(f'Major/Minor T-test: {major_minor_lowestpw[1]} Corrected: {ttest_p_values_corrected_lowestpw[1][0]}')
print(f'Major/Moderate T-test: {major_moderate_lowestpw[1]} Corrected: {ttest_p_values_corrected_lowestpw[1][1]}')
print(f'Moderate/Minor T-test: {moderate_minor_lowestpw[1]} Corrected: {ttest_p_values_corrected_lowestpw[1][2]}')

###################################
# Pathway (all levels) jaccard
major_allpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Major']['all_pw_jaccard'].dropna()
moderate_allpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Moderate']['all_pw_jaccard'].dropna()
minor_allpw = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Minor']['all_pw_jaccard'].dropna()
print(f'Major all pathway jaccard: {len(major_allpw)}')
print(f'Moderate all pathway jaccard: {len(moderate_allpw)}')
print(f'Minor all pathway jaccard: {len(minor_allpw)}')

h_statistic_allpw_tox, p_value_allpw_tox = stats.kruskal(major_allpw, moderate_allpw, minor_allpw)
print(f'Kruskal-Wallis H statistic for all pathway jaccard: {h_statistic_allpw_tox}')
print(f'P-value for all pathway jaccard: {p_value_allpw_tox}')

# Filter drug_syntox_df for only Major, Moderate, and Minor toxicity categories and remove pairs with all pathway levels jaccard similarity of None
drug_syntox_df_tox_allpw = drug_syntox_df[drug_syntox_df['toxicity_category'].isin(['Major', 'Moderate', 'Minor'])]
drug_syntox_df_tox_allpw = drug_syntox_df_tox_allpw[~drug_syntox_df_tox_allpw['all_pw_jaccard'].isnull()]
dunn_allpw_tox = sp.posthoc_dunn(drug_syntox_df_tox_allpw, val_col='all_pw_jaccard', group_col='toxicity_category', p_adjust='bonferroni')
print('Dunn post-hoc test for all pathway jaccard:' + str(dunn_allpw_tox))

# jonkcheere terpestra test -- is there a trend in the overlap (jaccard similarity) as you increase toxicity from minor to major?
jaccard_major_samples = major_allpw.values.tolist()
jaccard_moderate_samples = moderate_allpw.values.tolist()
jaccard_minor_samples = minor_allpw.values.tolist()
jt_incr_allpw = jonckheere_terpestra_test([jaccard_minor_samples, jaccard_moderate_samples, jaccard_major_samples])
print("Increasing toxicity for All Pathway overlap: ", jt_incr_allpw )
jt_decr_allpw = jonckheere_terpestra_test([jaccard_major_samples, jaccard_moderate_samples, jaccard_minor_samples])
print("Decreasing toxicity for All Pathway overlap: ", jt_decr_allpw)

# Run an ANOVA on jaccard similarity for Major, Moderate, and Minor toxicity categories
f_statistic_anova_jaccard_tox_allpw, p_value_anova_jaccard_tox_allpw = stats.f_oneway(major_allpw, moderate_allpw, minor_allpw)
print(f'ANOVA F-statistic for all pathway jaccard: {f_statistic_anova_jaccard_tox_allpw}')
print(f'P-value for all pathway jaccard: {p_value_anova_jaccard_tox_allpw}')

# Run a T test on jaccard similarity between Major/Minor, Major/Moderate, and Moderate/Minor toxicity categories
major_minor_allpw = stats.ttest_ind(major_allpw, minor_allpw)
major_moderate_allpw = stats.ttest_ind(major_allpw, moderate_allpw)
moderate_minor_allpw = stats.ttest_ind(moderate_allpw, minor_allpw)

# Run bonferroni correction on the p-values
ttest_p_values_allpw = [major_minor_allpw[1], major_moderate_allpw[1], moderate_minor_allpw[1]]
ttest_p_values_corrected_allpw = multipletests(ttest_p_values_allpw, method='bonferroni')
print(f'Major/Minor T-test: {major_minor_allpw[1]} Corrected: {ttest_p_values_corrected_allpw[1][0]}')
print(f'Major/Moderate T-test: {major_moderate_allpw[1]} Corrected: {ttest_p_values_corrected_allpw[1][1]}')
print(f'Moderate/Minor T-test: {moderate_minor_allpw[1]} Corrected: {ttest_p_values_corrected_allpw[1][2]}')


with open('results/jaccard_similarity_analysis/targpw_tox_jaccard_drugbank.tsv', 'w') as f:
    f.write('Level\tTest\tTest statistic\tP-value\n')
    f.write('Target\tKruskal-Wallis\t{:.4e}\t{:.4e}\n'.format(h_statistic_target_tox, p_value_target_tox))
    f.write('Target\tDunn post-hoc\tMajor/Minor\t{:.4e}\n'.format(dunn_target_tox.iloc[0, 1]))
    f.write('Target\tDunn post-hoc\tMajor/Moderate\t{:.4e}\n'.format(dunn_target_tox.iloc[0, 2]))
    f.write('Target\tDunn post-hoc\tModerate/Minor\t{:.4e}\n'.format(dunn_target_tox.iloc[1, 2]))
    f.write('Target\tJonckheere-Terpstra Increasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_incr_target[0], jt_incr_target[1]))
    f.write('Target\tJonckheere-Terpstra Decreasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_decr_target[0], jt_decr_target[1]))
    f.write('Target\tANOVA\t{:.4e}\t{:.4e}\n'.format(f_statistic_anova_jaccard_tox_targ, p_value_anova_jaccard_tox_targ))
    f.write('Target\tT-test (bonf cor)\tMajor/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_targ[1][0]))
    f.write('Target\tT-test (bonf cor)\tMajor/Moderate\t{:.4e}\n'.format(ttest_p_values_corrected_targ[1][1]))
    f.write('Target\tT-test (bonf cor)\tModerate/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_targ[1][2]))
    f.write('Lowest pathway\tKruskal-Wallis\t{:.4e}\t{:.4e}\n'.format(h_statistic_lowestpw_tox, p_value_lowestpw_tox))
    f.write('Lowest pathway\tDunn post-hoc\tMajor/Minor\t{:.4e}\n'.format(dunn_lowestpw_tox.iloc[0, 1]))
    f.write('Lowest pathway\tDunn post-hoc\tMajor/Moderate\t{:.4e}\n'.format(dunn_lowestpw_tox.iloc[0, 2]))
    f.write('Lowest pathway\tDunn post-hoc\tModerate/Minor\t{:.4e}\n'.format(dunn_lowestpw_tox.iloc[1, 2]))
    f.write('Lowest pathway\tJonckheere-Terpstra Increasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_incr_lowestpw[0], jt_incr_lowestpw[1]))
    f.write('Lowest pathway\tJonckheere-Terpstra Decreasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_decr_lowestpw[0], jt_decr_lowestpw[1]))
    f.write('Lowest pathway\tANOVA\t{:.4e}\t{:.4e}\n'.format(f_statistic_anova_jaccard_tox_lowestpw, p_value_anova_jaccard_tox_lowestpw))
    f.write('Lowest pathway\tT-test (bonf cor)\tMajor/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_lowestpw[1][0]))
    f.write('Lowest pathway\tT-test (bonf cor)\tMajor/Moderate\t{:.4e}\n'.format(ttest_p_values_corrected_lowestpw[1][1]))
    f.write('Lowest pathway\tT-test (bonf cor)\tModerate/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_lowestpw[1][2]))
    f.write('All pathway\tKruskal-Wallis\t{:.4e}\t{:.4e}\n'.format(h_statistic_allpw_tox, p_value_allpw_tox))
    f.write('All pathway\tDunn post-hoc\tMajor/Minor\t{:.4e}\n'.format(dunn_allpw_tox.iloc[0, 1]))
    f.write('All pathway\tDunn post-hoc\tMajor/Moderate\t{:.4e}\n'.format(dunn_allpw_tox.iloc[0, 2]))
    f.write('All pathway\tDunn post-hoc\tModerate/Minor\t{:.4e}\n'.format(dunn_allpw_tox.iloc[1, 2]))
    f.write('All pathway\tJonckheere-Terpstra Increasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_incr_allpw[0], jt_incr_allpw[1]))
    f.write('All pathway\tJonckheere-Terpstra Decreasing toxicity\t{:.4e}\t{:.4e}\n'.format(jt_decr_allpw[0], jt_decr_allpw[1]))
    f.write('All pathway\tANOVA\t{:.4e}\t{:.4e}\n'.format(f_statistic_anova_jaccard_tox_allpw, p_value_anova_jaccard_tox_allpw))
    f.write('All pathway\tT-test (bonf cor)\tMajor/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_allpw[1][0]))
    f.write('All pathway\tT-test (bonf cor)\tMajor/Moderate\t{:.4e}\n'.format(ttest_p_values_corrected_allpw[1][1]))
    f.write('All pathway\tT-test (bonf cor)\tModerate/Minor\t{:.4e}\n'.format(ttest_p_values_corrected_allpw[1][2]))


Major target jaccard: 36864
Moderate target jaccard: 19074
Minor target jaccard: 6790
Kruskal-Wallis H statistic for target jaccard: 153.2579767633176
P-value for target jaccard: 5.253553895891229e-34
Dunn post-hoc test for target jaccard:                 Major         Minor      Moderate
Major     1.000000e+00  9.491539e-31  2.636158e-11
Minor     9.491539e-31  1.000000e+00  1.683787e-10
Moderate  2.636158e-11  1.683787e-10  1.000000e+00
Increasing toxicity for Target overlap:  (4.781792675743791, 8.686940817792532e-07)
Decreasing toxicity for Target overlap:  (-4.781792675743791, 0.9999991313059182)
ANOVA F-statistic for target jaccard: 103.13025237159775
P-value for target jaccard: 1.9256607654186592e-45
Major/Minor T-test: 2.6300651889591527e-24 Corrected: 7.890195566877457e-24
Major/Moderate T-test: 1.3518950787541079e-28 Corrected: 4.055685236262324e-28
Moderate/Minor T-test: 3.4102097428364726e-05 Corrected: 0.00010230629228509418
Major lowest pathway jaccard: 36864
Moderate low

- Violin Plots

In [7]:
colors = ['#20965D', '#FFBC42', '#D81159']
tox_order = ['Minor', 'Moderate', 'Major']
color_dict = dict(zip(tox_order, colors))
legend_elements = [
    Patch(facecolor=color_dict[cat], label = cat) for cat in tox_order
]

ax = sns.violinplot(data=drug_syntox_df, x='toxicity_category', y='target_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
ax.set(xlabel='', ylabel='Target Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/drugtargjs_v_toxcats_drugbank_violin.png', dpi=700)
plt.close()

ax = sns.violinplot(data=drug_syntox_df, x='toxicity_category', y='lowest_pw_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
ax.set(xlabel='', ylabel='Lowest PWs Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/lowestpwjs_v_toxcats_drugbank_violin.png', dpi=700)
plt.close()

ax = sns.violinplot(data=drug_syntox_df, x='toxicity_category', y='all_pw_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
ax.set(xlabel='', ylabel='All PWs Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/allpwjs_v_toxcats_drugbank_violin.png', dpi=700)
plt.close()


- Strip Plots

In [8]:
# Create scatter plots instead of violin plots

ax = sns.stripplot(data=drug_syntox_df, x='toxicity_category', y='target_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
sns.boxplot( # plot the mean line
    showmeans=True,
    meanline=True,
    meanprops={'color': 'k', 'ls': '-', 'lw': 1},
    medianprops={'visible': False},
    whiskerprops={'visible': False},
    zorder=10,
    x="toxicity_category",
    y="target_jaccard",
    data=drug_syntox_df,
    showfliers=False,
    showbox=False,
    showcaps=False,
    ax=ax
)
ax.set(xlabel='', ylabel='Target Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/drugtargjs_v_toxcats_drugbank_strip.png', dpi=700)
plt.close()

ax = sns.stripplot(data=drug_syntox_df, x='toxicity_category', y='lowest_pw_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
sns.boxplot( # plot the mean line
    showmeans=True,
    meanline=True,
    meanprops={'color': 'k', 'ls': '-', 'lw': 1},
    medianprops={'visible': False},
    whiskerprops={'visible': False},
    zorder=10,
    x="toxicity_category",
    y="lowest_pw_jaccard",
    data=drug_syntox_df,
    showfliers=False,
    showbox=False,
    showcaps=False,
    ax=ax
)
ax.set(xlabel='', ylabel='Lowest PWs Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/lowestpwjs_v_toxcats_drugbank_strip.png', dpi=700)
plt.close()

ax = sns.stripplot(data=drug_syntox_df, x='toxicity_category', y='all_pw_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
sns.boxplot( # plot the mean line
    showmeans=True,
    meanline=True,
    meanprops={'color': 'k', 'ls': '-', 'lw': 1},
    medianprops={'visible': False},
    whiskerprops={'visible': False},
    zorder=10,
    x="toxicity_category",
    y="all_pw_jaccard",
    data=drug_syntox_df,
    showfliers=False,
    showbox=False,
    showcaps=False,
    ax=ax
)
ax.set(xlabel='', ylabel='All PWs Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/allpwjs_v_toxcats_drugbank_strip.png', dpi=700)
plt.close()

- Correlation Scatter Plots (target jaccard similarity v Synergy Scores)
- R^2 value
- Best fit line
- Pearson correlation coefficient
- Spearman correlation coefficient

In [9]:
################### TARGET ###################
##############################################

x_targjs = drug_syntox_df['target_jaccard'].dropna()
y_targjs_bliss_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['synergy_bliss']
y_targjs_loewe_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['synergy_loewe']
y_targjs_hsa_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['synergy_hsa']
y_targjs_zip_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['synergy_zip']
y_targjs_smax_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['S_max']
y_targjs_smean_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['S_mean']
y_targjs_ssum_true = drug_syntox_df[drug_syntox_df['target_jaccard'].notna()]['S_sum']

####### BLISS ########
# Let's plot target jaccard similarity against bliss synergy scores
plt.scatter(x_targjs, y_targjs_bliss_true)

# Best fit line
z_targjs_bliss = np.polyfit(x_targjs, y_targjs_bliss_true, 1)
p_targjs_bliss = np.poly1d(z_targjs_bliss)
y_targjs_bliss_pred = p_targjs_bliss(x_targjs)
r_squared_targjs_bliss = r2_score(y_targjs_bliss_true, y_targjs_bliss_pred)
plt.plot(x_targjs, y_targjs_bliss_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_bliss:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('Bliss Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/bliss_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between target jaccard similarity and bliss synergy score
targjs_bliss_corr = x_targjs.corr(y_targjs_bliss_true)
targjs_bliss_spearman_corr = x_targjs.corr(y_targjs_bliss_true, method='spearman')

####### HSA ########
# Let's plot target jaccard similarity against hsa synergy scores
plt.scatter(x_targjs, y_targjs_hsa_true)

# Best fit line
z_targjs_hsa = np.polyfit(x_targjs, y_targjs_hsa_true, 1)
p_targjs_hsa = np.poly1d(z_targjs_hsa)
y_targjs_hsa_pred = p_targjs_hsa(x_targjs)
r_squared_targjs_hsa = r2_score(y_targjs_hsa_true, y_targjs_hsa_pred)
plt.plot(x_targjs, y_targjs_hsa_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_hsa:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('HSA Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/hsa_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between target jaccard similarity and hsa synergy score
targjs_hsa_corr = x_targjs.corr(y_targjs_hsa_true)
targjs_hsa_spearman_corr = x_targjs.corr(y_targjs_hsa_true, method='spearman')

####### LOEWE ########
# Let's plot target jaccard similarity against loewe synergy scores
plt.scatter(x_targjs, y_targjs_loewe_true)

# Best fit line
z_targjs_loewe = np.polyfit(x_targjs, y_targjs_loewe_true, 1)
p_targjs_loewe = np.poly1d(z_targjs_loewe)
y_targjs_loewe_pred = p_targjs_loewe(x_targjs)
r_squared_targjs_loewe = r2_score(y_targjs_loewe_true, y_targjs_loewe_pred)
plt.plot(x_targjs, y_targjs_loewe_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_loewe:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('Loewe Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/loewe_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between target jaccard similarity and loewe synergy score
targjs_loewe_corr = x_targjs.corr(y_targjs_loewe_true)
targjs_loewe_spearman_corr = x_targjs.corr(y_targjs_loewe_true, method='spearman')

####### ZIP ########
# Let's plot target jaccard similarity against zip synergy scores
plt.scatter(x_targjs, y_targjs_zip_true)

# Best fit line
z_targjs_zip = np.polyfit(x_targjs, y_targjs_zip_true, 1)
p_targjs_zip = np.poly1d(z_targjs_zip)
y_targjs_zip_pred = p_targjs_zip(x_targjs)
r_squared_targjs_zip = r2_score(y_targjs_zip_true, y_targjs_zip_pred)
plt.plot(x_targjs, y_targjs_zip_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_zip:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('ZIP Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/zip_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between target jaccard similarity and zip synergy score
targjs_zip_corr = x_targjs.corr(y_targjs_zip_true)
targjs_zip_spearman_corr = x_targjs.corr(y_targjs_zip_true, method='spearman')

####### S_MAX ########
# Let's plot target jaccard similarity against S_max synergy scores
plt.scatter(x_targjs, y_targjs_smax_true)

# Best fit line
z_targjs_smax = np.polyfit(x_targjs, y_targjs_smax_true, 1)
p_targjs_smax = np.poly1d(z_targjs_smax)
y_targjs_smax_pred = p_targjs_smax(x_targjs)
r_squared_targjs_smax = r2_score(y_targjs_smax_true, y_targjs_smax_pred)
plt.plot(x_targjs, y_targjs_smax_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_smax:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('S_max Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smax_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
targjs_smax_corr = x_targjs.corr(y_targjs_smax_true)
targjs_smax_spearman_corr = x_targjs.corr(y_targjs_smax_true, method='spearman')

####### S_MEAN ########
# Let's plot target jaccard similarity against S_mean synergy scores
plt.scatter(x_targjs, y_targjs_smean_true)

# Best fit line
z_targjs_smean = np.polyfit(x_targjs, y_targjs_smean_true, 1)
p_targjs_smean = np.poly1d(z_targjs_smean)
y_targjs_smean_pred = p_targjs_smean(x_targjs)
r_squared_targjs_smean = r2_score(y_targjs_smean_true, y_targjs_smean_pred)
plt.plot(x_targjs, y_targjs_smean_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_smean:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('S_mean Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smean_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
targjs_smean_corr = x_targjs.corr(y_targjs_smean_true)
targjs_smean_spearman_corr = x_targjs.corr(y_targjs_smean_true, method='spearman')

####### S_SUM ########
# Let's plot target jaccard similarity against S_sum synergy scores
plt.scatter(x_targjs, y_targjs_ssum_true)

# Best fit line
z_targjs_ssum = np.polyfit(x_targjs, y_targjs_ssum_true, 1)
p_targjs_ssum = np.poly1d(z_targjs_ssum)
y_targjs_ssum_pred = p_targjs_ssum(x_targjs)
r_squared_targjs_ssum = r2_score(y_targjs_ssum_true, y_targjs_ssum_pred)
plt.plot(x_targjs, y_targjs_ssum_pred, "r-", alpha=0.8, label=f'R² = {r_squared_targjs_ssum:.3f}')
plt.xlabel('Target Jaccard Similarity', fontsize=20)
plt.ylabel('S_sum Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/ssum_v_targjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
targjs_ssum_corr = x_targjs.corr(y_targjs_ssum_true)
targjs_ssum_spearman_corr = x_targjs.corr(y_targjs_ssum_true, method='spearman')


In [10]:
################### ALL PATHWAY ###################
###################################################

x_allpwjs = drug_syntox_df['all_pw_jaccard'].dropna()
y_allpwjs_bliss_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['synergy_bliss']
y_allpwjs_loewe_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['synergy_loewe']
y_allpwjs_hsa_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['synergy_hsa']
y_allpwjs_zip_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['synergy_zip']
y_allpwjs_smax_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_max']
y_allpwjs_smean_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_mean']
y_allpwjs_ssum_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_sum']

####### BLISS ########
# Let's plot all pathway jaccard similarity against bliss synergy scores
plt.scatter(x_allpwjs, y_allpwjs_bliss_true)

# Best fit line
z_allpwjs_bliss = np.polyfit(x_allpwjs, y_allpwjs_bliss_true, 1)
p_allpwjs_bliss = np.poly1d(z_allpwjs_bliss)
y_allpwjs_bliss_pred = p_allpwjs_bliss(x_allpwjs)
r_squared_allpwjs_bliss = r2_score(y_allpwjs_bliss_true, y_allpwjs_bliss_pred)
plt.plot(x_allpwjs, y_allpwjs_bliss_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_bliss:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('Bliss Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/bliss_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between all pathway jaccard similarity and bliss synergy score
allpwjs_bliss_corr = x_allpwjs.corr(y_allpwjs_bliss_true)
allpwjs_bliss_spearman_corr = x_allpwjs.corr(y_allpwjs_bliss_true, method='spearman')

####### HSA ########
# Let's plot all pathway jaccard similarity against hsa synergy scores
plt.scatter(x_allpwjs, y_allpwjs_hsa_true)

# Best fit line
z_allpwjs_hsa = np.polyfit(x_allpwjs, y_allpwjs_hsa_true, 1)
p_allpwjs_hsa = np.poly1d(z_allpwjs_hsa)
y_allpwjs_hsa_pred = p_allpwjs_hsa(x_allpwjs)
r_squared_allpwjs_hsa = r2_score(y_allpwjs_hsa_true, y_allpwjs_hsa_pred)
plt.plot(x_allpwjs, y_allpwjs_hsa_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_hsa:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('HSA Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/hsa_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between all pathway jaccard similarity and hsa synergy score
allpwjs_hsa_corr = x_allpwjs.corr(y_allpwjs_hsa_true)
allpwjs_hsa_spearman_corr = x_allpwjs.corr(y_allpwjs_hsa_true, method='spearman')

####### LOEWE ########
# Let's plot all pathway jaccard similarity against loewe synergy scores
plt.scatter(x_allpwjs, y_allpwjs_loewe_true)

# Best fit line
z_allpwjs_loewe = np.polyfit(x_allpwjs, y_allpwjs_loewe_true, 1)
p_allpwjs_loewe = np.poly1d(z_allpwjs_loewe)
y_allpwjs_loewe_pred = p_allpwjs_loewe(x_allpwjs)
r_squared_allpwjs_loewe = r2_score(y_allpwjs_loewe_true, y_allpwjs_loewe_pred)
plt.plot(x_allpwjs, y_allpwjs_loewe_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_loewe:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('Loewe Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/loewe_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between all pathway jaccard similarity and loewe synergy score
allpwjs_loewe_corr = x_allpwjs.corr(y_allpwjs_loewe_true)
allpwjs_loewe_spearman_corr = x_allpwjs.corr(y_allpwjs_loewe_true, method='spearman')

####### ZIP ########
# Let's plot all pathway jaccard similarity against zip synergy scores
plt.scatter(x_allpwjs, y_allpwjs_zip_true)

# Best fit line
z_allpwjs_zip = np.polyfit(x_allpwjs, y_allpwjs_zip_true, 1)
p_allpwjs_zip = np.poly1d(z_allpwjs_zip)
y_allpwjs_zip_pred = p_allpwjs_zip(x_allpwjs)
r_squared_allpwjs_zip = r2_score(y_allpwjs_zip_true, y_allpwjs_zip_pred)
plt.plot(x_allpwjs, y_allpwjs_zip_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_zip:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('ZIP Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/zip_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between all pathway jaccard similarity and zip synergy score
allpwjs_zip_corr = x_allpwjs.corr(y_allpwjs_zip_true)
allpwjs_zip_spearman_corr = x_allpwjs.corr(y_allpwjs_zip_true, method='spearman')


###### S_max #######
# Let's plot all pathway jaccard similarity against S_max synergy scores
y_allpwjs_smax_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_max']
plt.scatter(x_allpwjs, y_allpwjs_smax_true)

# Best fit line
z_allpwjs_smax = np.polyfit(x_allpwjs, y_allpwjs_smax_true, 1)
p_allpwjs_smax = np.poly1d(z_allpwjs_smax)
y_allpwjs_smax_pred = p_allpwjs_smax(x_allpwjs)
r_squared_allpwjs_smax = r2_score(y_allpwjs_smax_true, y_allpwjs_smax_pred)
plt.plot(x_allpwjs, y_allpwjs_smax_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_smax:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_max Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smax_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
allpwjs_smax_corr = x_allpwjs.corr(y_allpwjs_smax_true)
allpwjs_smax_spearman_corr = x_allpwjs.corr(y_allpwjs_smax_true, method='spearman')


###### S_mean #######
# Let's plot all pathway jaccard similarity against S_mean synergy scores
y_allpwjs_smean_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_mean']
plt.scatter(x_allpwjs, y_allpwjs_smean_true)

# Best fit line
z_allpwjs_smean = np.polyfit(x_allpwjs, y_allpwjs_smean_true, 1)
p_allpwjs_smean = np.poly1d(z_allpwjs_smean)
y_allpwjs_smean_pred = p_allpwjs_smean(x_allpwjs)
r_squared_allpwjs_smean = r2_score(y_allpwjs_smean_true, y_allpwjs_smean_pred)
plt.plot(x_allpwjs, y_allpwjs_smean_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_smean:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_mean Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smean_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
allpwjs_smean_corr = x_allpwjs.corr(y_allpwjs_smean_true)
allpwjs_smean_spearman_corr = x_allpwjs.corr(y_allpwjs_smean_true, method='spearman')


###### S_sum #######
# Let's plot all pathway jaccard similarity against S_sum synergy scores
y_allpwjs_ssum_true = drug_syntox_df[drug_syntox_df['all_pw_jaccard'].notna()]['S_sum']
plt.scatter(x_allpwjs, y_allpwjs_ssum_true)

# Best fit line
z_allpwjs_ssum = np.polyfit(x_allpwjs, y_allpwjs_ssum_true, 1)
p_allpwjs_ssum = np.poly1d(z_allpwjs_ssum)
y_allpwjs_ssum_pred = p_allpwjs_ssum(x_allpwjs)
r_squared_allpwjs_ssum = r2_score(y_allpwjs_ssum_true, y_allpwjs_ssum_pred)
plt.plot(x_allpwjs, y_allpwjs_ssum_pred, "r-", alpha=0.8, label=f'R² = {r_squared_allpwjs_ssum:.3f}')
plt.xlabel('All PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_sum Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/ssum_v_allpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
allpwjs_ssum_corr = x_allpwjs.corr(y_allpwjs_ssum_true)
allpwjs_ssum_spearman_corr = x_allpwjs.corr(y_allpwjs_ssum_true, method='spearman')


In [11]:
################### LOWEST PATHWAY ###################
######################################################
x_lowestpwjs = drug_syntox_df['lowest_pw_jaccard'].dropna()
y_lowestpwjs_bliss_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['synergy_bliss']
y_lowestpwjs_loewe_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['synergy_loewe']
y_lowestpwjs_hsa_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['synergy_hsa']
y_lowestpwjs_zip_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['synergy_zip']
y_lowestpwjs_smax_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_max']
y_lowestpwjs_smean_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_mean']
y_lowestpwjs_ssum_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_sum']

####### BLISS ########
# Let's plot lowest pathway jaccard similarity against bliss synergy scores
plt.scatter(x_lowestpwjs, y_lowestpwjs_bliss_true)

# Best fit line
z_lowestpwjs_bliss = np.polyfit(x_lowestpwjs, y_lowestpwjs_bliss_true, 1)
p_lowestpwjs_bliss = np.poly1d(z_lowestpwjs_bliss)
y_lowestpwjs_bliss_pred = p_lowestpwjs_bliss(x_lowestpwjs)
r_squared_lowestpwjs_bliss = r2_score(y_lowestpwjs_bliss_true, y_lowestpwjs_bliss_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_bliss_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_bliss:.3f}')
plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('Bliss Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/bliss_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient
lowestpwjs_bliss_corr = x_lowestpwjs.corr(y_lowestpwjs_bliss_true)
lowestpwjs_bliss_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_bliss_true, method='spearman')

####### HSA ########
# Let's plot lowest pathway jaccard similarity against hsa synergy scores
plt.scatter(x_lowestpwjs, y_lowestpwjs_hsa_true)

# Best fit line
z_lowestpwjs_hsa = np.polyfit(x_lowestpwjs, y_lowestpwjs_hsa_true, 1)
p_lowestpwjs_hsa = np.poly1d(z_lowestpwjs_hsa)
y_lowestpwjs_hsa_pred = p_lowestpwjs_hsa(x_lowestpwjs)
r_squared_lowestpwjs_hsa = r2_score(y_lowestpwjs_hsa_true, y_lowestpwjs_hsa_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_hsa_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_hsa:.3f}')
plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('HSA Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/hsa_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between lowest pathway jaccard similarity and hsa synergy score
lowestpwjs_hsa_corr = x_lowestpwjs.corr(y_lowestpwjs_hsa_true)
lowestpwjs_hsa_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_hsa_true, method='spearman')

####### LOEWE ########
# Let's plot lowest pathway jaccard similarity against loewe synergy scores
plt.scatter(x_lowestpwjs, y_lowestpwjs_loewe_true)

# Best fit line
z_lowestpwjs_loewe = np.polyfit(x_lowestpwjs, y_lowestpwjs_loewe_true, 1)
p_lowestpwjs_loewe = np.poly1d(z_lowestpwjs_loewe)
y_lowestpwjs_loewe_pred = p_lowestpwjs_loewe(x_lowestpwjs)
r_squared_lowestpwjs_loewe = r2_score(y_lowestpwjs_loewe_true, y_lowestpwjs_loewe_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_loewe_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_loewe:.3f}')

plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('Loewe Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/loewe_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between lowest pathway jaccard similarity and loewe synergy score
lowestpwjs_loewe_corr = x_lowestpwjs.corr(y_lowestpwjs_loewe_true)
lowestpwjs_loewe_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_loewe_true, method='spearman')

####### ZIP ########
# Let's plot lowest pathway jaccard similarity against zip synergy scores
plt.scatter(x_lowestpwjs, y_lowestpwjs_zip_true)

# Best fit line
z_lowestpwjs_zip = np.polyfit(x_lowestpwjs, y_lowestpwjs_zip_true, 1)
p_lowestpwjs_zip = np.poly1d(z_lowestpwjs_zip)
y_lowestpwjs_zip_pred = p_lowestpwjs_zip(x_lowestpwjs)
r_squared_lowestpwjs_zip = r2_score(y_lowestpwjs_zip_true, y_lowestpwjs_zip_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_zip_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_zip:.3f}')

plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('ZIP Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/zip_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between lowest pathway jaccard similarity and zip synergy score
lowestpwjs_zip_corr = x_lowestpwjs.corr(y_lowestpwjs_zip_true)
lowestpwjs_zip_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_zip_true, method='spearman')

###### S_max #######
# Let's plot lowest pathway jaccard similarity against S_max synergy scores
y_lowestpwjs_smax_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_max']
plt.scatter(x_lowestpwjs, y_lowestpwjs_smax_true)

# Best fit line
z_lowestpwjs_smax = np.polyfit(x_lowestpwjs, y_lowestpwjs_smax_true, 1)
p_lowestpwjs_smax = np.poly1d(z_lowestpwjs_smax)
y_lowestpwjs_smax_pred = p_lowestpwjs_smax(x_lowestpwjs)
r_squared_lowestpwjs_smax = r2_score(y_lowestpwjs_smax_true, y_lowestpwjs_smax_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_smax_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_smax:.3f}')
plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_max Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smax_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
lowestpwjs_smax_corr = x_lowestpwjs.corr(y_lowestpwjs_smax_true)
lowestpwjs_smax_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_smax_true, method='spearman')

###### S_mean #######
# Let's plot lowest pathway jaccard similarity against S_mean synergy scores
y_lowestpwjs_smean_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_mean']
plt.scatter(x_lowestpwjs, y_lowestpwjs_smean_true)

# Best fit line
z_lowestpwjs_smean = np.polyfit(x_lowestpwjs, y_lowestpwjs_smean_true, 1)
p_lowestpwjs_smean = np.poly1d(z_lowestpwjs_smean)
y_lowestpwjs_smean_pred = p_lowestpwjs_smean(x_lowestpwjs)
r_squared_lowestpwjs_smean = r2_score(y_lowestpwjs_smean_true, y_lowestpwjs_smean_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_smean_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_smean:.3f}')
plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_mean Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/smean_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
lowestpwjs_smean_corr = x_lowestpwjs.corr(y_lowestpwjs_smean_true)
lowestpwjs_smean_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_smean_true, method='spearman')

###### S_sum #######
# Let's plot lowest pathway jaccard similarity against S_sum synergy scores
y_lowestpwjs_ssum_true = drug_syntox_df[drug_syntox_df['lowest_pw_jaccard'].notna()]['S_sum']
plt.scatter(x_lowestpwjs, y_lowestpwjs_ssum_true)

# Best fit line
z_lowestpwjs_ssum = np.polyfit(x_lowestpwjs, y_lowestpwjs_ssum_true, 1)
p_lowestpwjs_ssum = np.poly1d(z_lowestpwjs_ssum)
y_lowestpwjs_ssum_pred = p_lowestpwjs_ssum(x_lowestpwjs)
r_squared_lowestpwjs_ssum = r2_score(y_lowestpwjs_ssum_true, y_lowestpwjs_ssum_pred)
plt.plot(x_lowestpwjs, y_lowestpwjs_ssum_pred, "r-", alpha=0.8, label=f'R² = {r_squared_lowestpwjs_ssum:.3f}')
plt.xlabel('Lowest PWs Jaccard Similarity', fontsize=20)
plt.ylabel('S_sum Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/jaccard_similarity_analysis/ssum_v_lowestpwjs_drugbank_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
lowestpwjs_ssum_corr = x_lowestpwjs.corr(y_lowestpwjs_ssum_true)
lowestpwjs_ssum_spearman_corr = x_lowestpwjs.corr(y_lowestpwjs_ssum_true, method='spearman')

In [12]:
# Write all the correlations and spearman correlations to a file
with open('results/jaccard_similarity_analysis/targpw_v_synergy_correlations_drugbank.tsv', 'w') as f:
    f.write('Synergy score\tLevel\tPearson correlation coefficient\tSpearman correlation\tR squared value\n')
    f.write('Bliss\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_bliss_corr, targjs_bliss_spearman_corr, r_squared_targjs_bliss))
    f.write('HSA\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_hsa_corr, targjs_hsa_spearman_corr, r_squared_targjs_hsa))
    f.write('Loewe\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_loewe_corr, targjs_loewe_spearman_corr, r_squared_targjs_loewe))
    f.write('ZIP\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_zip_corr, targjs_zip_spearman_corr, r_squared_targjs_zip))
    f.write('S_max\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_smax_corr, targjs_smax_spearman_corr, r_squared_targjs_smax))
    f.write('S_mean\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_smean_corr, targjs_smean_spearman_corr, r_squared_targjs_smean))
    f.write('S_sum\ttarget jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(targjs_ssum_corr, targjs_ssum_spearman_corr, r_squared_targjs_ssum))
    f.write('Bliss\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_bliss_corr, allpwjs_bliss_spearman_corr, r_squared_allpwjs_bliss))
    f.write('HSA\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_hsa_corr, allpwjs_hsa_spearman_corr, r_squared_allpwjs_hsa))
    f.write('Loewe\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_loewe_corr, allpwjs_loewe_spearman_corr, r_squared_allpwjs_loewe))
    f.write('ZIP\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_zip_corr, allpwjs_zip_spearman_corr, r_squared_allpwjs_zip))
    f.write('S_max\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_smax_corr, allpwjs_smax_spearman_corr, r_squared_allpwjs_smax))
    f.write('S_mean\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_smean_corr, allpwjs_smean_spearman_corr, r_squared_allpwjs_smean))
    f.write('S_sum\tall pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(allpwjs_ssum_corr, allpwjs_ssum_spearman_corr, r_squared_allpwjs_ssum))
    f.write('Bliss\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_bliss_corr, lowestpwjs_bliss_spearman_corr, r_squared_lowestpwjs_bliss))
    f.write('HSA\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_hsa_corr, lowestpwjs_hsa_spearman_corr, r_squared_lowestpwjs_hsa))
    f.write('Loewe\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_loewe_corr, lowestpwjs_loewe_spearman_corr, r_squared_lowestpwjs_loewe))
    f.write('ZIP\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_zip_corr, lowestpwjs_zip_spearman_corr, r_squared_lowestpwjs_zip))
    f.write('S_max\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_smax_corr, lowestpwjs_smax_spearman_corr, r_squared_lowestpwjs_smax))
    f.write('S_mean\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_smean_corr, lowestpwjs_smean_spearman_corr, r_squared_lowestpwjs_smean))
    f.write('S_sum\tlowest pathway jaccard similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(lowestpwjs_ssum_corr, lowestpwjs_ssum_spearman_corr, r_squared_lowestpwjs_ssum))
