# DDINTER SYNTOX ANALYSIS #
File to analyze relationship between synergy scores and toxicity levels

In [2]:
# Import everything needed
from matplotlib.patches import Patch
from scipy import stats
from sklearn.metrics import r2_score
from statsmodels.stats.multitest import multipletests
from preprocessing_functions import *
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scikit_posthocs as sp
import json
import os

In [3]:
# Use drug_syntox dataframe that was created by preprocessing all databases
drug_syntox_df = pd.read_csv("data_processed/ddinter_processed_combos_syntoxtargallpw_string.csv")

# total number of entries
total_entries = drug_syntox_df.shape[0]
print('Ddinter fully processed dataframe shape: ', drug_syntox_df.shape[0])

# compute major, moderate and minor values
num_entries_major = drug_syntox_df['toxicity_category'].value_counts()['Major']
num_entries_moderate = drug_syntox_df['toxicity_category'].value_counts()['Moderate']
num_entries_minor = drug_syntox_df['toxicity_category'].value_counts()['Minor']
pct_major = (num_entries_major / total_entries) * 100
pct_moderate = (num_entries_moderate / total_entries) * 100
pct_minor = (num_entries_minor / total_entries) * 100
print('Number of Major entries: ', f"{num_entries_major} ({pct_major:.1f}%)")
print('Number of Moderate entries: ', f"{num_entries_moderate} ({pct_moderate:.1f}%)")
print('Number of Minor entries: ', f"{num_entries_minor} ({pct_minor:.1f}%)")

# compute number of unique drugs
num_unique_drugs = set(drug_syntox_df['drug_row'].unique()).union(set(drug_syntox_df['drug_col'].unique()))
print('Number of unique drugs: ', len(num_unique_drugs))

# compute number of pairs
unique_major_pairs = set()
unique_moderate_pairs = set()
unique_minor_pairs = set()
for index, row in drug_syntox_df.iterrows():
    first_way = (row['drug_row'], row['drug_col'])
    second_way = (row['drug_col'], row['drug_row'])
    if row['toxicity_category'] == 'Major':
        if first_way not in unique_major_pairs and second_way not in unique_major_pairs:
            unique_major_pairs.add(first_way)
    elif row['toxicity_category'] == 'Moderate':
        if first_way not in unique_moderate_pairs and second_way not in unique_moderate_pairs:
            unique_moderate_pairs.add(first_way)
    elif row['toxicity_category'] == 'Minor':
        if first_way not in unique_minor_pairs and second_way not in unique_minor_pairs:
            unique_minor_pairs.add(first_way)
print('Number of unique Major pairs: ', len(unique_major_pairs))
print('Number of unique Moderate pairs: ', len(unique_moderate_pairs))
print('Number of unique Minor pairs: ', len(unique_minor_pairs))

Ddinter fully processed dataframe shape:  29064
Number of Major entries:  5776 (19.9%)
Number of Moderate entries:  22457 (77.3%)
Number of Minor entries:  831 (2.9%)
Number of unique drugs:  331
Number of unique Major pairs:  679
Number of unique Moderate pairs:  1920
Number of unique Minor pairs:  116


In [4]:
summary_stats_dddinter_dic = {
    'Total Entries': total_entries,
    'Major Toxicity Entries': f"{num_entries_major} ({pct_major:.1f}%)",
    'Moderate Toxicity Entries': f"{num_entries_moderate} ({pct_moderate:.1f}%)",
    'Minor Toxicity Entries': f"{num_entries_minor} ({pct_minor:.1f}%)",
    'Unique Drugs': len(num_unique_drugs),
    'Unique Major Toxicity Pairs': num_entries_major,
    'Unique Moderate Toxicity Pairs': num_entries_moderate,
    'Unique Minor Toxicity Pairs': num_entries_minor,
    'Number of Cell Lines': len(drug_syntox_df['cell_line_name'].unique()),

    'Min of Bliss': drug_syntox_df['synergy_bliss'].min(),
    'Max of Bliss': drug_syntox_df['synergy_bliss'].max(),
    'Mean of Bliss': drug_syntox_df['synergy_bliss'].mean(),
    'Median of Bliss': drug_syntox_df['synergy_bliss'].median(),
    'Standard Deviation of Bliss': drug_syntox_df['synergy_bliss'].std(),

    'Min of HSA': drug_syntox_df['synergy_hsa'].min(),
    'Max of HSA': drug_syntox_df['synergy_hsa'].max(),
    'Mean of HSA': drug_syntox_df['synergy_hsa'].mean(),
    'Median of HSA': drug_syntox_df['synergy_hsa'].median(),
    'Standard Deviation of HSA': drug_syntox_df['synergy_hsa'].std(),

    'Min of Loewe': drug_syntox_df['synergy_loewe'].min(),
    'Max of Loewe': drug_syntox_df['synergy_loewe'].max(),
    'Mean of Loewe': drug_syntox_df['synergy_loewe'].mean(),
    'Median of Loewe': drug_syntox_df['synergy_loewe'].median(),
    'Standard Deviation of Loewe': drug_syntox_df['synergy_loewe'].std(),

    'Min of ZIP': drug_syntox_df['synergy_zip'].min(),
    'Max of ZIP': drug_syntox_df['synergy_zip'].max(),
    'Mean of ZIP': drug_syntox_df['synergy_zip'].mean(),
    'Median of ZIP': drug_syntox_df['synergy_zip'].median(),
    'Standard Deviation of ZIP': drug_syntox_df['synergy_zip'].std(),

    'Min of S_max': drug_syntox_df['S_max'].min(),
    'Max of S_max': drug_syntox_df['S_max'].max(),
    'Mean of S_max': drug_syntox_df['S_max'].mean(),
    'Median of S_max': drug_syntox_df['S_max'].median(),
    'Standard Deviation of S_max': drug_syntox_df['S_max'].std(),

    'Min of S_mean': drug_syntox_df['S_mean'].min(),
    'Max of S_mean': drug_syntox_df['S_mean'].max(),
    'Mean of S_mean': drug_syntox_df['S_mean'].mean(),
    'Median of S_mean': drug_syntox_df['S_mean'].median(),
    'Standard Deviation of S_mean': drug_syntox_df['S_mean'].std(),

    'Min of S_sum': drug_syntox_df['S_sum'].min(),
    'Max of S_sum': drug_syntox_df['S_sum'].max(),
    'Mean of S_sum': drug_syntox_df['S_sum'].mean(),
    'Median of S_sum': drug_syntox_df['S_sum'].median(),
    'Standard Deviation of S_sum': drug_syntox_df['S_sum'].std(),

    'Min of average shortest path between targets': drug_syntox_df['avg_short_path_btwn_targets'].min(),
    'Max of average shortest path between targets': drug_syntox_df['avg_short_path_btwn_targets'].max(),
    'Mean of average shortest path between targets': drug_syntox_df['avg_short_path_btwn_targets'].mean(),
    'Median of average shortest path between targets': drug_syntox_df['avg_short_path_btwn_targets'].median(),
    'Standard Deviation of average shortest path between targets': drug_syntox_df['avg_short_path_btwn_targets'].std()
}

def to_py(o):
    if isinstance(o, np.generic):
        o = o.item()
    if isinstance(o, np.ndarray):
        return o.tolist()
    if isinstance(o, float) and (math.isnan(o) or math.isinf(o)):
        return None
    return o

summary_stats_clean = {k: (v.item() if hasattr(v, "item") else v)
                       for k, v in summary_stats_dddinter_dic.items()}

with open("results/summary_stats_database/ddinter_summary_stats.json", "w") as f:
    json.dump(summary_stats_clean, f, indent=4)

In [5]:
# Create histogram for each synergy score: bliss, loewe, hsa, zip, s_max, s_mean, s_sum
drug_syntox_df.hist(column='synergy_bliss', bins=100)
plt.xlabel('Bliss synergy score')
plt.ylabel('Frequency')
plt.title('Bliss synergy score distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/bliss_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='synergy_loewe', bins=100)
plt.xlabel('Loewe synergy score')
plt.ylabel('Frequency')
plt.title('Loewe synergy score distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/loewe_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='synergy_hsa', bins=100)
plt.xlabel('HSA synergy score')
plt.ylabel('Frequency')
plt.title('HSA synergy score distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/hsa_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='synergy_zip', bins=100)
plt.xlabel('ZIP synergy score')
plt.ylabel('Frequency')
plt.title('ZIP synergy score distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/zip_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='S_max', bins=100)
plt.xlabel('S_max')
plt.ylabel('Frequency')
plt.title('S_max distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/smax_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='S_mean', bins=100)
plt.xlabel('S_mean')
plt.ylabel('Frequency')
plt.title('S_mean distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/smean_hist_ddinter.png')
plt.close()

drug_syntox_df.hist(column='S_sum', bins=100)
plt.xlabel('S_sum')
plt.ylabel('Frequency')
plt.title('S_sum distribution in DrugComb-DDInter intersection')
plt.savefig('results/synergy_score_distrib/ssum_hist_ddinter.png')
plt.close()

# Write the max, min, and normality test to four significant digits to a file, if less than 0.05, then not normal
with open('results/synergy_score_distrib/synergy_stats_ddinter.txt', 'w') as f:
    f.write('Max Bliss synergy: {:.4e}\n'.format(drug_syntox_df['synergy_bliss'].max()))
    f.write('Min Bliss synergy: {:.4e}\n'.format(drug_syntox_df['synergy_bliss'].min()))
    f.write('Bliss synergy normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['synergy_bliss'])[1]))
    f.write('Max HSA synergy: {:.4e}\n'.format(drug_syntox_df['synergy_hsa'].max()))
    f.write('Min HSA synergy: {:.4e}\n'.format(drug_syntox_df['synergy_hsa'].min()))
    f.write('HSA synergy normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['synergy_hsa'])[1]))
    f.write('Max Loewe synergy: {:.4e}\n'.format(drug_syntox_df['synergy_loewe'].max()))
    f.write('Min Loewe synergy: {:.4e}\n'.format(drug_syntox_df['synergy_loewe'].min()))
    f.write('Loewe synergy normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['synergy_loewe'])[1]))
    f.write('Max ZIP synergy: {:.4e}\n'.format(drug_syntox_df['synergy_zip'].max()))
    f.write('Min ZIP synergy: {:.4e}\n'.format(drug_syntox_df['synergy_zip'].min()))
    f.write('ZIP synergy normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['synergy_zip'])[1]))
    f.write('Max S_max: {:.4e}\n'.format(drug_syntox_df['S_max'].max()))
    f.write('Min S_max: {:.4e}\n'.format(drug_syntox_df['S_max'].min()))
    f.write('S_max normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['S_max'])[1]))
    f.write('Max S_mean: {:.4e}\n'.format(drug_syntox_df['S_mean'].max()))
    f.write('Min S_mean: {:.4e}\n'.format(drug_syntox_df['S_mean'].min()))
    f.write('S_mean normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['S_mean'])[1]))
    f.write('Max S_sum: {:.4e}\n'.format(drug_syntox_df['S_sum'].max()))
    f.write('Min S_sum: {:.4e}\n'.format(drug_syntox_df['S_sum'].min()))
    f.write('S_sum normality test: {:.4e}\n'.format(stats.normaltest(drug_syntox_df['S_sum'])[1]))

In [6]:
############ BLISS ##############
# Create histogram for synergy_bliss for only Major toxicity category in drug_syntox_df
drug_syntox_major = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Major']
drug_syntox_major['synergy_bliss'].hist(bins=100)
plt.xlabel('Synergy Bliss')
plt.ylabel('Frequency')
plt.title('Synergy Bliss Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/bliss_major_hist_ddinter.png')
plt.close()

# Create histogram for synergy_bliss for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Moderate']
drug_syntox_moderate['synergy_bliss'].hist(bins=100)
plt.xlabel('Synergy Bliss')
plt.ylabel('Frequency')
plt.title('Synergy Bliss Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/bliss_moderate_hist_ddinter.png')
plt.close()

# Create histogram for synergy_bliss for only Minor toxicity category in drug_syntox_df
drug_syntox_minor = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Minor']
drug_syntox_minor['synergy_bliss'].hist(bins=100)
plt.xlabel('Synergy Bliss')
plt.ylabel('Frequency')
plt.title('Synergy Bliss Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/bliss_minor_hist_ddinter.png')
plt.close()

############ HSA ##############
# Create histogram for synergy_HSA for only Major toxicity category in drug_syntox_df
drug_syntox_major['synergy_hsa'].hist(bins=100)
plt.xlabel('Synergy HSA')
plt.ylabel('Frequency')
plt.title('Synergy HSA Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/hsa_major_hist_ddinter.png')
plt.close()

# Create histogram for synergy_HSA for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['synergy_hsa'].hist(bins=100)
plt.xlabel('Synergy HSA')
plt.ylabel('Frequency')
plt.title('Synergy HSA Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/hsa_moderate_hist_ddinter.png')
plt.close()

# Create histogram for synergy_HSA for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['synergy_hsa'].hist(bins=100)
plt.xlabel('Synergy HSA')
plt.ylabel('Frequency')
plt.title('Synergy HSA Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/hsa_minor_hist_ddinter.png')
plt.close()

############ LOEWE ##############
# Create histogram for synergy_loewe for only Major toxicity category in drug_syntox_df
drug_syntox_major['synergy_loewe'].hist(bins=100)
plt.xlabel('Synergy Loewe')
plt.ylabel('Frequency')
plt.title('Synergy Loewe Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/loewe_major_hist_ddinter.png')
plt.close()

# Create histogram for synergy_loewe for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['synergy_loewe'].hist(bins=100)
plt.xlabel('Synergy Loewe')
plt.ylabel('Frequency')
plt.title('Synergy Loewe Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/loewe_moderate_hist_ddinter.png')
plt.close()

# Create histogram for synergy_loewe for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['synergy_loewe'].hist(bins=100)
plt.xlabel('Synergy Loewe')
plt.ylabel('Frequency')
plt.title('Synergy Loewe Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/loewe_minor_hist_ddinter.png')
plt.close()

############ ZIP ##############
# Create histogram for synergy_zip for only Major toxicity category in drug_syntox_df
drug_syntox_major['synergy_zip'].hist(bins=100)
plt.xlabel('Synergy ZIP')
plt.ylabel('Frequency')
plt.title('Synergy ZIP Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/zip_major_hist_ddinter.png')
plt.close()

# Create histogram for synergy_zip for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['synergy_zip'].hist(bins=100)
plt.xlabel('Synergy ZIP')
plt.ylabel('Frequency')
plt.title('Synergy ZIP Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/zip_moderate_hist_ddinter.png')
plt.close()

# Create histogram for synergy_zip for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['synergy_zip'].hist(bins=100)
plt.xlabel('Synergy ZIP')
plt.ylabel('Frequency')
plt.title('Synergy ZIP Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/zip_minor_hist_ddinter.png')
plt.close()

############ S_MAX ##############
# Create histogram for S_max for only Major toxicity category in drug_syntox_df
drug_syntox_major['S_max'].hist(bins=100)
plt.xlabel('S_max')
plt.ylabel('Frequency')
plt.title('S_max Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smax_major_hist_ddinter.png')
plt.close()

# Create histogram for S_max for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['S_max'].hist(bins=100)
plt.xlabel('S_max')
plt.ylabel('Frequency')
plt.title('S_max Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smax_moderate_hist_ddinter.png')
plt.close()

# Create histogram for S_max for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['S_max'].hist(bins=100)
plt.xlabel('S_max')
plt.ylabel('Frequency')
plt.title('S_max Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smax_minor_hist_ddinter.png')
plt.close()

############ S_MEAN ##############
# Create histogram for S_mean for only Major toxicity category in drug_syntox_df
drug_syntox_major['S_mean'].hist(bins=100)
plt.xlabel('S_mean')
plt.ylabel('Frequency')
plt.title('S_mean Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smean_major_hist_ddinter.png')
plt.close()

# Create histogram for S_mean for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['S_mean'].hist(bins=100)
plt.xlabel('S_mean')
plt.ylabel('Frequency')
plt.title('S_mean Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smean_moderate_hist_ddinter.png')
plt.close()

# Create histogram for S_mean for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['S_mean'].hist(bins=100)
plt.xlabel('S_mean')
plt.ylabel('Frequency')
plt.title('S_mean Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/smean_minor_hist_ddinter.png')
plt.close()

############ S_SUM ##############
# Create histogram for S_sum for only Major toxicity category in drug_syntox_df
drug_syntox_major['S_sum'].hist(bins=100)
plt.xlabel('S_sum')
plt.ylabel('Frequency')
plt.title('S_sum Distribution for Major Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/ssum_major_hist_ddinter.png')
plt.close()

# Create histogram for S_sum for only Moderate toxicity category in drug_syntox_df
drug_syntox_moderate['S_sum'].hist(bins=100)
plt.xlabel('S_sum')
plt.ylabel('Frequency')
plt.title('S_sum Distribution for Moderate Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/ssum_moderate_hist_ddinter.png')
plt.close()

# Create histogram for S_sum for only Minor toxicity category in drug_syntox_df
drug_syntox_minor['S_sum'].hist(bins=100)
plt.xlabel('S_sum')
plt.ylabel('Frequency')
plt.title('S_sum Distribution for Minor Toxicity')
plt.savefig('results/synergy_score_tox_categ_distrib/ssum_minor_hist_ddinter.png')
plt.close()

# Write Mean, Median, Variance, and Standard Deviation to 4 significant figures to a file
with open('results/synergy_score_distrib/ddinter_summary_stats.csv', 'w') as f:
    f.write('Synergy Score, Toxicity Category, Mean, Standard Deviation, Median, Variance\n')
    f.write('Bliss, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['synergy_bliss'].mean(),
        drug_syntox_df['synergy_bliss'].std(),
        drug_syntox_df['synergy_bliss'].median(),
        drug_syntox_df['synergy_bliss'].var()))
    f.write('Bliss, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['synergy_bliss'].mean(),
        drug_syntox_major['synergy_bliss'].std(),
        drug_syntox_major['synergy_bliss'].median(),
        drug_syntox_major['synergy_bliss'].var()))
    f.write('Bliss, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['synergy_bliss'].mean(),
        drug_syntox_moderate['synergy_bliss'].std(),
        drug_syntox_moderate['synergy_bliss'].median(),
        drug_syntox_moderate['synergy_bliss'].var()))
    f.write('Bliss, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['synergy_bliss'].mean(),
        drug_syntox_minor['synergy_bliss'].std(),
        drug_syntox_minor['synergy_bliss'].median(),
        drug_syntox_minor['synergy_bliss'].var()))
    f.write('HSA, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['synergy_hsa'].mean(),
        drug_syntox_df['synergy_hsa'].std(),
        drug_syntox_df['synergy_hsa'].median(),
        drug_syntox_df['synergy_hsa'].var()))
    f.write('HSA, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['synergy_hsa'].mean(),
        drug_syntox_major['synergy_hsa'].std(),
        drug_syntox_major['synergy_hsa'].median(),
        drug_syntox_major['synergy_hsa'].var()))
    f.write('HSA, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['synergy_hsa'].mean(),
        drug_syntox_moderate['synergy_hsa'].std(),
        drug_syntox_moderate['synergy_hsa'].median(),
        drug_syntox_moderate['synergy_hsa'].var()))
    f.write('HSA, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['synergy_hsa'].mean(),
        drug_syntox_minor['synergy_hsa'].std(),
        drug_syntox_minor['synergy_hsa'].median(),
        drug_syntox_minor['synergy_hsa'].var()))
    f.write('Loewe, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['synergy_loewe'].mean(),
        drug_syntox_df['synergy_loewe'].std(),
        drug_syntox_df['synergy_loewe'].median(),
        drug_syntox_df['synergy_loewe'].var()))
    f.write('Loewe, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['synergy_loewe'].mean(),
        drug_syntox_major['synergy_loewe'].std(),
        drug_syntox_major['synergy_loewe'].median(),
        drug_syntox_major['synergy_loewe'].var()))
    f.write('Loewe, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['synergy_loewe'].mean(),
        drug_syntox_moderate['synergy_loewe'].std(),
        drug_syntox_moderate['synergy_loewe'].median(),
        drug_syntox_moderate['synergy_loewe'].var()))
    f.write('Loewe, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['synergy_loewe'].mean(),
        drug_syntox_minor['synergy_loewe'].std(),
        drug_syntox_minor['synergy_loewe'].median(),
        drug_syntox_minor['synergy_loewe'].var()))
    f.write('ZIP, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['synergy_zip'].mean(),
        drug_syntox_df['synergy_zip'].std(),
        drug_syntox_df['synergy_zip'].median(),
        drug_syntox_df['synergy_zip'].var()))
    f.write('ZIP, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['synergy_zip'].mean(),
        drug_syntox_major['synergy_zip'].std(),
        drug_syntox_major['synergy_zip'].median(),
        drug_syntox_major['synergy_zip'].var()))
    f.write('ZIP, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['synergy_zip'].mean(),
        drug_syntox_moderate['synergy_zip'].std(),
        drug_syntox_moderate['synergy_zip'].median(),
        drug_syntox_moderate['synergy_zip'].var()))
    f.write('ZIP, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['synergy_zip'].mean(),
        drug_syntox_minor['synergy_zip'].std(),
        drug_syntox_minor['synergy_zip'].median(),
        drug_syntox_minor['synergy_zip'].var()))
    f.write('S_max, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['S_max'].mean(),
        drug_syntox_df['S_max'].std(),
        drug_syntox_df['S_max'].median(),
        drug_syntox_df['S_max'].var()))
    f.write('S_max, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['S_max'].mean(),
        drug_syntox_major['S_max'].std(),
        drug_syntox_major['S_max'].median(),
        drug_syntox_major['S_max'].var()))
    f.write('S_max, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['S_max'].mean(),
        drug_syntox_moderate['S_max'].std(),
        drug_syntox_moderate['S_max'].median(),
        drug_syntox_moderate['S_max'].var()))
    f.write('S_max, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['S_max'].mean(),
        drug_syntox_minor['S_max'].std(),
        drug_syntox_minor['S_max'].median(),
        drug_syntox_minor['S_max'].var()))
    f.write('S_mean, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['S_mean'].mean(),
        drug_syntox_df['S_mean'].std(),
        drug_syntox_df['S_mean'].median(),
        drug_syntox_df['S_mean'].var()))
    f.write('S_mean, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['S_mean'].mean(),
        drug_syntox_major['S_mean'].std(),
        drug_syntox_major['S_mean'].median(),
        drug_syntox_major['S_mean'].var()))
    f.write('S_mean, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['S_mean'].mean(),
        drug_syntox_moderate['S_mean'].std(),
        drug_syntox_moderate['S_mean'].median(),
        drug_syntox_moderate['S_mean'].var()))
    f.write('S_mean, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['S_mean'].mean(),
        drug_syntox_minor['S_mean'].std(),
        drug_syntox_minor['S_mean'].median(),
        drug_syntox_minor['S_mean'].var()))
    f.write('S_sum, All, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_df['S_sum'].mean(),
        drug_syntox_df['S_sum'].std(),
        drug_syntox_df['S_sum'].median(),
        drug_syntox_df['S_sum'].var()))
    f.write('S_sum, Major, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_major['S_sum'].mean(),
        drug_syntox_major['S_sum'].std(),
        drug_syntox_major['S_sum'].median(),
        drug_syntox_major['S_sum'].var()))
    f.write('S_sum, Moderate, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_moderate['S_sum'].mean(),
        drug_syntox_moderate['S_sum'].std(),
        drug_syntox_moderate['S_sum'].median(),
        drug_syntox_moderate['S_sum'].var()))
    f.write('S_sum, Minor, {:.4e}, {:.4e}, {:.4e}, {:.4e}\n'.format(
        drug_syntox_minor['S_sum'].mean(),
        drug_syntox_minor['S_sum'].std(),
        drug_syntox_minor['S_sum'].median(),
        drug_syntox_minor['S_sum'].var()))
    
    

- Kruskal Wallis Test
- Dunn Posthoc Test with Bonferroni Correction
- Jonckheere Terpestra Test
- ANOVA
- T Test with Bonferroni Correction

In [7]:
# Are there significant differences in the distributions of synergy scores between Major, Moderate, and Minor toxicity categories?

# Filter drug_syntox_df for only Major, Moderate, and Minor toxicity categories
drug_syntox_df = drug_syntox_df[drug_syntox_df['toxicity_category'].isin(['Major', 'Moderate', 'Minor'])]
# Create color dictonary for plotting
colors = ['#20965D', '#FFBC42', '#D81159']
tox_order = ['Minor', 'Moderate', 'Major']
color_dict = dict(zip(tox_order, colors))
legend_elements = [
    Patch(facecolor=color_dict[cat], label = cat) for cat in tox_order
]

# Total sample size (N) for effect size calculation
N_total = len(drug_syntox_df)
print(f"Total Sample Size (N) for analysis: {N_total}")

# Define dictonary holding synergy column name and abbreviated name
synergy_labels = {
    'Bliss': 'synergy_bliss',
    'HSA': 'synergy_hsa',
    'ZIP': 'synergy_zip',
    'Loewe': 'synergy_loewe',
    'S_max': 'S_max',
    'S_mean': 'S_mean',
    'S_sum': 'S_sum'
}

# Initialize an empty list to store the results from each iteration
final_results_database = []

for key, value in synergy_labels.items():
    print(60*'-')
    print(f"Key: {key}, Value: {value}")
    print(60*'-')

    # Filter for Major, Moderate, and Minor toxicity categories
    major = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Major'][value]
    moderate = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Moderate'][value]
    minor = drug_syntox_df[drug_syntox_df['toxicity_category'] == 'Minor'][value]

    # Run Kruskal-Wallis test on the synergy scores for Major, Moderate, and Minor toxicity categories
    h_statistic, p_value = stats.kruskal(major, moderate, minor)
    eta_sq_h = h_statistic / (N_total - 1)
    print(f'Kruskal-Wallis H statistic for {key} synergy: {h_statistic:.4f}')
    print(f'P-value for {key} synergy: {p_value:.4f}')
    print(f'Effect Size (Eta-Squared H): {eta_sq_h:.4f}')

    # Run Dunn post-hoc test on the synergy scores for Major, Moderate, and Minor toxicity categories with bonferroni correction
    dunn = sp.posthoc_dunn(drug_syntox_df, val_col=value, group_col='toxicity_category', p_adjust='bonferroni')
    # Cliff's Delta: Delta is calculated as P(X > Y) - P(Y > X).
    delta_major_minor = cliff_delta(major, minor)
    delta_major_moderate = cliff_delta(major, moderate)
    delta_moderate_minor = cliff_delta(moderate, minor)
    print(f'Dunn post-hoc test for {key} synergy: {dunn}')
    print(f"Cliff's Delta (Major vs Minor): {delta_major_minor:.4f}")
    print(f"Cliff's Delta (Major vs Moderate): {delta_major_moderate:.4f}")
    print(f"Cliff's Delta (Moderate vs Minor): {delta_moderate_minor:.4f}")

    # Run Jonckheere-Terpstra test if there is a monotonic trend in the synergy scores as toxicity increases or decreases
    major_samples = major.values.tolist()
    moderate_samples = moderate.values.tolist()
    minor_samples = minor.values.tolist()
    # Increasing toxicity: Minor < Moderate < Major
    increasing_tox_samples = [minor_samples, moderate_samples, major_samples]
    jt_incr = jonckheere_terpestra_test(increasing_tox_samples)
    r_incr = jt_incr[0] / np.sqrt(N_total)
    print(f"Jonckheere-Terpstra Increasing toxicity: Z={jt_incr[0]:.4f}, P={jt_incr[1]:.4e}, r={r_incr:.4f}")
    # Decreasing toxicity: Minor < Moderate < Major
    decreasing_tox_samples = [ major_samples, moderate_samples, minor_samples]
    jt_decr = jonckheere_terpestra_test(decreasing_tox_samples)
    r_decr = jt_decr[0] / np.sqrt(N_total)
    print(f"Jonckheere-Terpstra Decreasing toxicity: Z={jt_decr[0]:.4f}, P={jt_decr[1]:.4e}, r={r_decr:.4f}")

    # Run an ANOVA on the synergy scores for Major, Moderate, and Minor toxicity categoreis
    f_statistic_anova_tox, p_value_anova_tox = stats.f_oneway(major, moderate, minor)
    print(f'ANOVA F-statistic for {key}: {f_statistic_anova_tox}')
    print(f'P-value for {key}: {p_value_anova_tox}')

    # Run a T test between Major/Minor, Major/Moderate, and Moderate/Minor toxicity categories
    major_minor = stats.ttest_ind(major, minor)
    major_moderate = stats.ttest_ind(major, moderate)
    moderate_minor = stats.ttest_ind(moderate, minor)

    # Run bonferroni correction on the p-values
    ttest_p_values = [major_minor[1], major_moderate[1], moderate_minor[1]]
    ttest_p_values_corrected = multipletests(ttest_p_values, method='bonferroni')
    print(f'Major/Minor T-test: {major_minor[1]} Corrected: {ttest_p_values_corrected[1][0]}')
    print(f'Major/Moderate T-test: {major_moderate[1]} Corrected: {ttest_p_values_corrected[1][1]}')
    print(f'Moderate/Minor T-test: {moderate_minor[1]} Corrected: {ttest_p_values_corrected[1][2]}')

    # Structured data output
    # 1. Kruskal-Wallis Result
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Kruskal-Wallis',
        'Comparison': 'All Groups',
        'Test_Statistic': f'H={h_statistic:.4e}',
        'P_value': p_value,
        'P_value_Corrected': np.nan,
        'Effect_Size_Type': 'Eta-Squared H',
        'Effect_Size_Value': eta_sq_h
    })
    # 2. Dunn Post-Hoc Results (Major vs Minor, Major vs Moderate, Moderate vs Minor)
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Dunn Post-Hoc',
        'Comparison': 'Major vs Minor',
        'Test_Statistic': 'Dunn Z',
        'P_value': np.nan,
        'P_value_Corrected': dunn.loc['Major', 'Minor'],
        'Effect_Size_Type': "Cliff's Delta",
        'Effect_Size_Value': delta_major_minor
    })
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Dunn Post-Hoc',
        'Comparison': 'Major vs Moderate',
        'Test_Statistic': 'Dunn Z',
        'P_value': np.nan,
        'P_value_Corrected': dunn.loc['Major', 'Moderate'],
        'Effect_Size_Type': "Cliff's Delta",
        'Effect_Size_Value': delta_major_moderate
    })
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Dunn Post-Hoc',
        'Comparison': 'Moderate vs Minor',
        'Test_Statistic': 'Dunn Z',
        'P_value': np.nan,
        'P_value_Corrected': dunn.loc['Moderate', 'Minor'],
        'Effect_Size_Type': "Cliff's Delta",
        'Effect_Size_Value': delta_moderate_minor
    })
    # 3. Jonckheere-Terpstra Results
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Jonckheere-Terpstra',
        'Comparison': 'Increasing Trend (Minor->Major)',
        'Test_Statistic': f'Z={jt_incr[0]:.4e}',
        'P_value': jt_incr[1],
        'P_value_Corrected': np.nan,
        'Effect_Size_Type': 'r',
        'Effect_Size_Value': r_incr
    })
    final_results_database.append({
        'Synergy_Score': key,
        'Test_Type': 'Jonckheere-Terpstra',
        'Comparison': 'Decreasing Trend (Major->Minor)',
        'Test_Statistic': f'Z={jt_decr[0]:.4e}',
        'P_value': jt_decr[1],
        'P_value_Corrected': np.nan,
        'Effect_Size_Type': 'r',
        'Effect_Size_Value': r_decr
    })
    
    # Plot violin, strip, and box plots for synergy scores vs toxicity categories
    plt.figure()
    ax = sns.violinplot(data=drug_syntox_df, x='toxicity_category', y=value, palette=color_dict, hue='toxicity_category', order=tox_order)
    ax.set(xlabel='', ylabel=f'{key} Synergy Score')
    plt.xticks(fontsize=20)
    ax.yaxis.label.set_size(20)
    plt.tight_layout()
    plt.savefig(f'results/figure2/{key}_v_toxcats_ddinter_violin.png', dpi=700)
    plt.close()

    # Plot strip plot with boxplot overlay of synergy scores vs toxicity categories
    plt.figure()
    ax = sns.stripplot(data=drug_syntox_df, x='toxicity_category', y=value, palette=color_dict, hue='toxicity_category', order=tox_order)
    sns.boxplot( # plot the mean line
        showmeans=True,
        meanline=True,
        meanprops={'color': 'k', 'ls': '-', 'lw': 1},
        medianprops={'visible': False},
        whiskerprops={'visible': False},
        zorder=10,
        x="toxicity_category",
        y=value,
        data=drug_syntox_df,
        showfliers=False,
        showbox=False,
        showcaps=False,
        ax=ax
    )
    ax.set(xlabel='', ylabel=f'{key} Synergy Score')
    plt.xticks(fontsize=20)
    ax.yaxis.label.set_size(20)
    plt.tight_layout()
    plt.savefig(f'results/figure2/{key}_v_toxcats_ddinter_strip.png', dpi=700)
    plt.close()

    # Plot box plot of synergy scores vs toxicity categories
    plt.figure()
    ax = sns.boxplot(data=drug_syntox_df, x='toxicity_category', y=value, palette=color_dict, hue='toxicity_category', order=tox_order)
    ax.set(xlabel='', ylabel=f'{key} Synergy Score')
    plt.xticks(fontsize=20)
    ax.yaxis.label.set_size(20)
    plt.tight_layout()
    plt.savefig(f'results/figure2/{key}_v_toxcats_ddinter_box.png', dpi=700)
    plt.close()

results_df = pd.DataFrame(final_results_database)

# Display the first few rows of the database
print("\n--- Final Results Database (Head) ---")
print(results_df.head(20))

# Save the final database to a file
output_path = 'results/database_syntox_analysis/ddinter_synergy_toxicity_statistical_summary.csv'
results_df.to_csv(output_path, index=False)
print(f"\nFinal statistical summary saved to: {output_path}")

Total Sample Size (N) for analysis: 29064
------------------------------------------------------------
Key: Bliss, Value: synergy_bliss
------------------------------------------------------------
Kruskal-Wallis H statistic for Bliss synergy: 4.4914
P-value for Bliss synergy: 0.1059
Effect Size (Eta-Squared H): 0.0002
Dunn post-hoc test for Bliss synergy:              Major     Minor  Moderate
Major     1.000000  0.108875  0.990113
Minor     0.108875  1.000000  0.219358
Moderate  0.990113  0.219358  1.000000
Cliff's Delta (Major vs Minor): 0.0437
Cliff's Delta (Major vs Moderate): 0.0083
Cliff's Delta (Moderate vs Minor): 0.0368
Jonckheere-Terpstra Increasing toxicity: Z=1.6444, P=5.0042e-02, r=0.0096
Jonckheere-Terpstra Decreasing toxicity: Z=-1.6444, P=9.4996e-01, r=-0.0096
ANOVA F-statistic for Bliss: 0.23459134927728337
P-value for Bliss: 0.7908954807100184
Major/Minor T-test: 0.5478961116275101 Corrected: 1.0
Major/Moderate T-test: 0.7206489682688426 Corrected: 1.0
Moderate/Minor 

Plot the relationship between synergy scores, how do they relate to one another?

In [8]:
# Plot scatter plot of synergy_bliss vs. synergy_hsa
drug_syntox_df.plot.scatter(x='synergy_bliss', y='synergy_hsa')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('Synergy HSA', fontsize=20)
plt.title('Synergy HSA vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/hsa_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_bliss vs. synergy_loewe
drug_syntox_df.plot.scatter(x='synergy_bliss', y='synergy_loewe')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('Synergy Loewe', fontsize=20)
plt.title('Synergy Loewe vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/loewe_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_bliss vs. synergy_zip
drug_syntox_df.plot.scatter(x='synergy_bliss', y='synergy_zip')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('Synergy ZIP', fontsize=20)
plt.title('Synergy ZIP vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/zip_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_bliss vs. S_max
drug_syntox_df.plot.scatter(x='synergy_bliss', y='S_max')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('S_max', fontsize=20)
plt.title('S_max vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smax_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_bliss vs. S_mean
drug_syntox_df.plot.scatter(x='synergy_bliss', y='S_mean')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('S_mean', fontsize=20)
plt.title('S_mean vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smean_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_bliss vs. S_sum
drug_syntox_df.plot.scatter(x='synergy_bliss', y='S_sum')
plt.xlabel('Synergy Bliss', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. Synergy Bliss', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_bliss_ddinter.png')
plt.close()

# Plot scatter plot of synergy_hsa vs. synergy_loewe
drug_syntox_df.plot.scatter(x='synergy_hsa', y='synergy_loewe')
plt.xlabel('Synergy HSA', fontsize=20)
plt.ylabel('Synergy Loewe', fontsize=20)
plt.title('Synergy Loewe vs. Synergy HSA', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/loewe_v_hsa_ddinter.png')
plt.close()

# Plot scatter plot of synergy_hsa vs. synergy_zip
drug_syntox_df.plot.scatter(x='synergy_hsa', y='synergy_zip')
plt.xlabel('Synergy HSA', fontsize=20)
plt.ylabel('Synergy ZIP', fontsize=20)
plt.title('Synergy ZIP vs. Synergy HSA', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/zip_v_hsa_ddinter.png')
plt.close()

# Plot scatter plot of synergy_hsa vs. S_max
drug_syntox_df.plot.scatter(x='synergy_hsa', y='S_max')
plt.xlabel('Synergy HSA', fontsize=20)
plt.ylabel('S_max', fontsize=20)
plt.title('S_max vs. Synergy HSA', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smax_v_hsa_ddinter.png')
plt.close()

# Plot scatter plot of synergy_hsa vs. S_mean
drug_syntox_df.plot.scatter(x='synergy_hsa', y='S_mean')
plt.xlabel('Synergy HSA', fontsize=20)
plt.ylabel('S_mean', fontsize=20)
plt.title('S_mean vs. Synergy HSA', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smean_v_hsa_ddinter.png')
plt.close()

# Plot scatter plot of synergy_hsa vs. S_sum
drug_syntox_df.plot.scatter(x='synergy_hsa', y='S_sum')
plt.xlabel('Synergy HSA', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. Synergy HSA', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_hsa_ddinter.png')
plt.close()

# Plot scatter plot of synergy_loewe vs. synergy_zip
drug_syntox_df.plot.scatter(x='synergy_loewe', y='synergy_zip')
plt.xlabel('Synergy Loewe', fontsize=20)
plt.ylabel('Synergy ZIP', fontsize=20)
plt.title('Synergy ZIP vs. Synergy Loewe', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/zip_v_loewe_ddinter.png')
plt.close()

# Plot scatter plot of synergy_loewe vs. S_max
drug_syntox_df.plot.scatter(x='synergy_loewe', y='S_max')
plt.xlabel('Synergy Loewe', fontsize=20)
plt.ylabel('S_max', fontsize=20)
plt.title('S_max vs. Synergy Loewe', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smax_v_loewe_ddinter.png')
plt.close()

# Plot scatter plot of synergy_loewe vs. S_mean
drug_syntox_df.plot.scatter(x='synergy_loewe', y='S_mean')
plt.xlabel('Synergy Loewe', fontsize=20)
plt.ylabel('S_mean', fontsize=20)
plt.title('S_mean vs. Synergy Loewe', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smean_v_loewe_ddinter.png')
plt.close()

# Plot scatter plot of synergy_loewe vs. S_sum
drug_syntox_df.plot.scatter(x='synergy_loewe', y='S_sum')
plt.xlabel('Synergy Loewe', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. Synergy Loewe', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_loewe_ddinter.png')
plt.close()

# Plot scatter plot of synergy_zip vs. S_max
drug_syntox_df.plot.scatter(x='synergy_zip', y='S_max')
plt.xlabel('Synergy ZIP', fontsize=20)
plt.ylabel('S_max', fontsize=20)
plt.title('S_max vs. Synergy ZIP', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smax_v_zip_ddinter.png')
plt.close()

# Plot scatter plot of synergy_zip vs. S_mean
drug_syntox_df.plot.scatter(x='synergy_zip', y='S_mean')
plt.xlabel('Synergy ZIP', fontsize=20)
plt.ylabel('S_mean', fontsize=20)
plt.title('S_mean vs. Synergy ZIP', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smean_v_zip_ddinter.png')
plt.close()

# Plot scatter plot of synergy_zip vs. S_sum
drug_syntox_df.plot.scatter(x='synergy_zip', y='S_sum')
plt.xlabel('Synergy ZIP', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. Synergy ZIP', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_zip_ddinter.png')
plt.close()

# Plot scatter plot of S_max vs. S_mean
drug_syntox_df.plot.scatter(x='S_max', y='S_mean')
plt.xlabel('S_max', fontsize=20)
plt.ylabel('S_mean', fontsize=20)
plt.title('S_mean vs. S_max', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/smean_v_smax_ddinter.png')
plt.close()

# Plot scatter plot of S_max vs. S_sum
drug_syntox_df.plot.scatter(x='S_max', y='S_sum')
plt.xlabel('S_max', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. S_max', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_smax_ddinter.png')
plt.close()

# Plot scatter plot of S_mean vs. S_sum
drug_syntox_df.plot.scatter(x='S_mean', y='S_sum')
plt.xlabel('S_mean', fontsize=20)
plt.ylabel('S_sum', fontsize=20)
plt.title('S_sum vs. S_mean', fontsize=20)
plt.savefig('results/synergy_score_scatter_plots/ssum_v_smean_ddinter.png')
plt.close()

# for each pair of synergy scores, calculate the pearson correlation coefficient and write all to a file
synergy_scores = ['synergy_bliss', 'synergy_hsa', 'synergy_loewe', 'synergy_zip', 'S_max', 'S_mean', 'S_sum']
with open('results/synergy_score_scatter_plots/synergy_score_correlation_ddinter.tsv', 'w') as f:
    f.write('Synergy Score 1\tSynergy Score 2\tCorrelation Coefficient\tP-value\n')
    for score1, score2 in itertools.combinations(synergy_scores, 2):
        correlation = stats.pearsonr(drug_syntox_df[score1], drug_syntox_df[score2])
        f.write(f'{score1}\t{score2}\t{correlation[0]}\t{correlation[1]}\n')

In [9]:
# Get percentages for all toxicity categories by synergy score
synergistic_bliss = drug_syntox_df[drug_syntox_df['synergy_bliss'] > 0]
synergistic_hsa = drug_syntox_df[drug_syntox_df['synergy_hsa'] > 0]
synergistic_loewe = drug_syntox_df[drug_syntox_df['synergy_loewe'] > 0]
synergistic_zip = drug_syntox_df[drug_syntox_df['synergy_zip'] > 0]
synergistic_smax = drug_syntox_df[drug_syntox_df['S_max'] > 0]
synergistic_smean = drug_syntox_df[drug_syntox_df['S_mean'] > 0]
synergistic_ssum = drug_syntox_df[drug_syntox_df['S_sum'] > 0]
percentage_major_bliss_synergistic = len(synergistic_bliss[synergistic_bliss['toxicity_category'] == 'Major']) / len(synergistic_bliss) * 100
percentage_moderate_bliss_synergistic = len(synergistic_bliss[synergistic_bliss['toxicity_category'] == 'Moderate']) / len(synergistic_bliss) * 100
percentage_minor_bliss_synergistic = len(synergistic_bliss[synergistic_bliss['toxicity_category'] == 'Minor']) / len(synergistic_bliss) * 100
percentage_major_hsa_synergistic = len(synergistic_hsa[synergistic_hsa['toxicity_category'] == 'Major']) / len(synergistic_hsa) * 100
percentage_moderate_hsa_synergistic = len(synergistic_hsa[synergistic_hsa['toxicity_category'] == 'Moderate']) / len(synergistic_hsa) * 100
percentage_minor_hsa_synergistic = len(synergistic_hsa[synergistic_hsa['toxicity_category'] == 'Minor']) / len(synergistic_hsa) * 100
percentage_major_loewe_synergistic = len(synergistic_loewe[synergistic_loewe['toxicity_category'] == 'Major']) / len(synergistic_loewe) * 100
percentage_moderate_loewe_synergistic = len(synergistic_loewe[synergistic_loewe['toxicity_category'] == 'Moderate']) / len(synergistic_loewe) * 100
percentage_minor_loewe_synergistic = len(synergistic_loewe[synergistic_loewe['toxicity_category'] == 'Minor']) / len(synergistic_loewe) * 100
percentage_major_zip_synergistic = len(synergistic_zip[synergistic_zip['toxicity_category'] == 'Major']) / len(synergistic_zip) * 100
percentage_moderate_zip_synergistic = len(synergistic_zip[synergistic_zip['toxicity_category'] == 'Moderate']) / len(synergistic_zip) * 100
percentage_minor_zip_synergistic = len(synergistic_zip[synergistic_zip['toxicity_category'] == 'Minor']) / len(synergistic_zip) * 100
percentage_major_smax_synergistic = len(synergistic_smax[synergistic_smax['toxicity_category'] == 'Major']) / len(synergistic_smax) * 100
percentage_moderate_smax_synergistic = len(synergistic_smax[synergistic_smax['toxicity_category'] == 'Moderate']) / len(synergistic_smax) * 100
percentage_minor_smax_synergistic = len(synergistic_smax[synergistic_smax['toxicity_category'] == 'Minor']) / len(synergistic_smax) * 100
percentage_major_smean_synergistic = len(synergistic_smean[synergistic_smean['toxicity_category'] == 'Major']) / len(synergistic_smean) * 100
percentage_moderate_smean_synergistic = len(synergistic_smean[synergistic_smean['toxicity_category'] == 'Moderate']) / len(synergistic_smean) * 100
percentage_minor_smean_synergistic = len(synergistic_smean[synergistic_smean['toxicity_category'] == 'Minor']) / len(synergistic_smean) * 100
percentage_major_ssum_synergistic = len(synergistic_ssum[synergistic_ssum['toxicity_category'] == 'Major']) / len(synergistic_ssum) * 100
percentage_moderate_ssum_synergistic = len(synergistic_ssum[synergistic_ssum['toxicity_category'] == 'Moderate']) / len(synergistic_ssum) * 100
percentage_minor_ssum_synergistic = len(synergistic_ssum[synergistic_ssum['toxicity_category'] == 'Minor']) / len(synergistic_ssum) * 100
# Let's organize our data differently
# First, create a dictionary to hold all percentages
synergy_toxicity_data = {
    'Synergy Score': [],
    'Toxicity Category': [],
    'Percentage': []
}

# Populate the dictionary with your existing data
synergy_scores = ['Bliss', 'HSA', 'Loewe', 'ZIP', 'S_max', 'S_mean', 'S_sum']
toxicity_categories = ['Major', 'Moderate', 'Minor']

# Add Bliss data
for category, percentage in zip(toxicity_categories, [percentage_major_bliss_synergistic, percentage_moderate_bliss_synergistic, percentage_minor_bliss_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('Bliss')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add HSA data
for category, percentage in zip(toxicity_categories, [percentage_major_hsa_synergistic, percentage_moderate_hsa_synergistic, percentage_minor_hsa_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('HSA')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add Loewe data
for category, percentage in zip(toxicity_categories, [percentage_major_loewe_synergistic, percentage_moderate_loewe_synergistic, percentage_minor_loewe_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('Loewe')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add ZIP data
for category, percentage in zip(toxicity_categories, [percentage_major_zip_synergistic, percentage_moderate_zip_synergistic, percentage_minor_zip_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('ZIP')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add S_max data
for category, percentage in zip(toxicity_categories, [percentage_major_smax_synergistic, percentage_moderate_smax_synergistic, percentage_minor_smax_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('S_max')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add S_mean data
for category, percentage in zip(toxicity_categories, [percentage_major_smean_synergistic, percentage_moderate_smean_synergistic, percentage_minor_smean_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('S_mean')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Add S_sum data
for category, percentage in zip(toxicity_categories, [percentage_major_ssum_synergistic, percentage_moderate_ssum_synergistic, percentage_minor_ssum_synergistic]):
    synergy_toxicity_data['Synergy Score'].append('S_sum')
    synergy_toxicity_data['Toxicity Category'].append(category)
    synergy_toxicity_data['Percentage'].append(percentage)

# Convert to DataFrame for easier plotting
df = pd.DataFrame(synergy_toxicity_data)

# We're not using the grouped bar chart anymore
# Skipping directly to the stacked chart

# Create a stacked percentage bar chart with specified order and colors
plt.figure(figsize=(14, 8))
# Specify the order and color scheme
tox_order = ['Major', 'Moderate', 'Minor']
colors = ['#D81159', '#FFBC42', '#20965D']
color_dict = dict(zip(tox_order, colors))

# Create patch elements for the legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor=color_dict[cat], label=cat) for cat in tox_order
]

# Pivot and reorder columns
df_pivot = df.pivot(index='Synergy Score', columns='Toxicity Category', values='Percentage')
df_pivot = df_pivot[tox_order]  # Reorder columns to match desired stacking

# Plot with custom colors
ax = df_pivot.plot(kind='bar', stacked=True, figsize=(14, 8), color=colors)
plt.title('DDInter Toxicity Breakdown for Synergistic Combinations', fontsize=30)
plt.xlabel('Synergy Score', fontsize=20)
plt.ylabel('Percentage (%)', fontsize=20)
plt.xticks(rotation=45, fontsize=20)
plt.yticks(fontsize=20)

# Adjust the bottom margin to make room for the legend
plt.subplots_adjust(bottom=0.3)

# Replace the default legend with our custom one and place it further below the x-axis, centered
plt.legend(handles=legend_elements, title='Toxicity Category', fontsize=20, title_fontsize=20, 
           loc='upper center', bbox_to_anchor=(0.5, -0.3), ncol=3)

# Apply tight layout with adjusted margins
plt.tight_layout()

# Add percentage labels inside each segment
prev_heights = np.zeros(len(df_pivot))
for i, (category, values) in enumerate(df_pivot.items()):
    for j, v in enumerate(values):
        # Calculate contrast for better text visibility
        text_color = 'white' if category == 'Major' else 'black'
        plt.text(j, prev_heights[j] + v/2, f'{v:.1f}%', 
                 ha='center', va='center', fontsize=12, fontweight='bold',
                 color=text_color)
        prev_heights[j] += v

plt.tight_layout()
plt.savefig('results/figure2/synergy_score_toxicity_category_percentage_ddinter.png')
plt.close()

<Figure size 1400x800 with 0 Axes>