# DDInter GraphSynergy Toxicity Score Analysis #

- GraphSynergy paper can be found here: https://academic.oup.com/jamia/article/28/11/2336/6362567?login=true#305111898

In [10]:
# Import everything needed
from matplotlib.patches import Patch
from scipy import stats
from sklearn.metrics import r2_score
from statsmodels.stats.multitest import multipletests
from preprocessing_functions import *
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scikit_posthocs as sp
import seaborn as sns
import os

In [11]:
os.makedirs('results/two_hop_analysis/', exist_ok=True)

GraphSynergy creates toxicity score embeddings by:
1. Aggregating the all neighbors within 2 degrees of drugs' two_hop proteins on a PPIN
2. Creating a similarity score by computing the inner product of drug embeddings

So let's examine each of these options in how well toxicity category distributions are (Kruskal Wallis) and if there is a relationship in increasing order with the toxicity categories (Jonckheere Terpestra Test)

In [12]:
STRING_G = get_STRING_graph()

# Get all the drug combinations
drug_combos_df = pd.read_csv('data_processed/ddinter_processed_combos_syntoxtargallpw_string.csv')
targets_df = pd.read_csv('data_processed/ddinter_syntoxtarg_allpw_string.csv')

# Create a dataframe of drug combinations and their targets as well as the targets' neighbors within 2 hops
drug_combos_within_2_hops = {}
for index, row in drug_combos_df.iterrows():
    drugA = row['drug_row']
    drugB = row['drug_col']
    key = (drugA, drugB)
    if key not in drug_combos_within_2_hops:
        A_targets = set(targets_df[targets_df['drug_name'] == drugA]['STRING_ID'].dropna().values)
        B_targets = set(targets_df[targets_df['drug_name'] == drugB]['STRING_ID'].dropna().values)
        A_neighbors = set()
        B_neighbors = set()
        for target in A_targets:
            if target in STRING_G.nodes:
                A_neighbors.update(STRING_G.neighbors(target))
        for target in B_targets:
            if target in STRING_G.nodes:
                B_neighbors.update(STRING_G.neighbors(target))
        A_2nd_neighbors = set()
        B_2nd_neighbors = set()
        for neighbor in A_neighbors:
            if neighbor in STRING_G.nodes:
                A_2nd_neighbors.update(STRING_G.neighbors(neighbor))
        for neighbor in B_neighbors:
            if neighbor in STRING_G.nodes:
                B_2nd_neighbors.update(STRING_G.neighbors(neighbor))
        
        drug_combos_within_2_hops[(drugA, drugB)] = {
            'A_targets': A_targets,
            'B_targets': B_targets,
            'A_neighbors': A_neighbors,
            'B_neighbors': B_neighbors,
            'A_2nd_neighbors': A_2nd_neighbors,
            'B_2nd_neighbors': B_2nd_neighbors
        } 


Original shape of STRING edge list, physical detailed: (1477610, 6)


In [13]:
# For each drug combination, compute the jaccard similarity between the sets of neighborhood proteins of drug A and drug B
for index, row in drug_combos_df.iterrows():
    key = (row['drug_row'], row['drug_col'])
    # Get the full set of proteins within 2 hops of drug A and drug B
    A_full_within_2_hops = drug_combos_within_2_hops[key]['A_targets'].union(drug_combos_within_2_hops[key]['A_neighbors']).union(drug_combos_within_2_hops[key]['A_2nd_neighbors'])
    B_full_within_2_hops = drug_combos_within_2_hops[key]['B_targets'].union(drug_combos_within_2_hops[key]['B_neighbors']).union(drug_combos_within_2_hops[key]['B_2nd_neighbors'])
    # Compute the Jaccard similarity
    if len(A_full_within_2_hops.union(B_full_within_2_hops)) == 0:
        drug_combos_df.at[index, 'two_hop_jaccard'] = -1
        print('No targets found for drug combination', key)
        continue
    
    two_hop_jaccard = jaccard_similarity(A_full_within_2_hops, B_full_within_2_hops)
    drug_combos_df.at[index, 'two_hop_jaccard'] = two_hop_jaccard

In [14]:
# Test if jaccard similarity distribution is normal
print("Is the two hop JS distribution normal? Normal test p-value: ", stats.normaltest(drug_combos_df['two_hop_jaccard']))

# Look at histogram of two hop jaccard similarities
plt.hist(drug_combos_df['two_hop_jaccard'], bins=20)
plt.xlabel('2-Hop Neighbors Jaccard Similarity', fontsize=20)
plt.ylabel('Frequency')
plt.title('Histogram of T2-Hop Neighbors Jaccard Similarity')
plt.savefig('results/two_hop_analysis/twohopneighborjs_hist_ddinter.png')
plt.close()

# Save description of two hop jaccard similarities
drug_combos_df['two_hop_jaccard'].describe().to_csv('results/two_hop_analysis/twohopneighborjs_stats_ddinter.csv')

Is the two hop JS distribution normal? Normal test p-value:  NormaltestResult(statistic=6942.760534919494, pvalue=0.0)


- Kruskal Wallis Test
- Dunn Posthoc Test with Bonferroni Correction
- Jonckheere Terpestra Test
- ANOVA
- T Test with Bonferroni Correction

In [15]:
# Run Kruskal Wallis test on jaccard similarities with targeting Major, Moderate, and Minor toxicity

# Total sample size (N) for effect size calculation
N_total = len(drug_combos_df)
print(f"Total Sample Size (N) for analysis: {N_total}")

# Initialize an empty list to store the results from each iteration
final_results_database = []

# two_hop jaccard
major_twohop = drug_combos_df[drug_combos_df['toxicity_category'] == 'Major']['two_hop_jaccard'].dropna()
moderate_twohop = drug_combos_df[drug_combos_df['toxicity_category'] == 'Moderate']['two_hop_jaccard'].dropna()
minor_twohop = drug_combos_df[drug_combos_df['toxicity_category'] == 'Minor']['two_hop_jaccard'].dropna()
print(f'Major two_hop jaccard: {len(major_twohop)}')
print(f'Moderate two_hop jaccard: {len(moderate_twohop)}')
print(f'Minor two_hop jaccard: {len(minor_twohop)}')

h_statistic_twohop_tox, p_value_twohop_tox = stats.kruskal(major_twohop, moderate_twohop, minor_twohop)
eta_sq_h_twohop_tox = h_statistic_twohop_tox / (N_total - 1)
print(f'Kruskal-Wallis H statistic for target toxicity: {h_statistic_twohop_tox:.4f}')
print(f'P-value for target toxicity: {p_value_twohop_tox:.4f}')
print(f'Effect Size (Eta-Squared H): {eta_sq_h_twohop_tox:.4f}')

# Filter drug_combos_df for only Major, Moderate, and Minor toxicity categories and remove pairs with two_hop jaccard similarity of None
drug_combos_df_tox_twohop = drug_combos_df[drug_combos_df['toxicity_category'].isin(['Major', 'Moderate', 'Minor'])]
drug_combos_df_tox_twohop = drug_combos_df[~drug_combos_df_tox_twohop['two_hop_jaccard'].isnull()]
dunn_twohop_tox = sp.posthoc_dunn(drug_combos_df_tox_twohop, val_col='two_hop_jaccard', group_col='toxicity_category', p_adjust='bonferroni')
# Cliff's Delta: Delta is calculated as P(X > Y) - P(Y > X).
delta_major_minor = cliff_delta(major_twohop, minor_twohop)
delta_major_moderate = cliff_delta(major_twohop, moderate_twohop)
delta_moderate_minor = cliff_delta(moderate_twohop, minor_twohop)
print(f'Dunn post-hoc test: {dunn_twohop_tox}')
print(f"Cliff's Delta (Major vs Minor): {delta_major_minor:.4f}")
print(f"Cliff's Delta (Major vs Moderate): {delta_major_moderate:.4f}")
print(f"Cliff's Delta (Moderate vs Minor): {delta_moderate_minor:.4f}")

# jonkcheere terpestra test -- is there a trend in the overlap (jaccard similarity) as you increase toxicity from minor to major?
jaccard_major_samples = major_twohop.values.tolist()
jaccard_moderate_samples = moderate_twohop.values.tolist()
jaccard_minor_samples = minor_twohop.values.tolist()
# Increasing toxicity: Minor < Moderate < Major
jt_incr_twohop = jonckheere_terpestra_test([jaccard_minor_samples, jaccard_moderate_samples, jaccard_major_samples])
r_incr_twohop = jt_incr_twohop[0] / np.sqrt(N_total)
print(f"Jonckheere-Terpstra Increasing toxicity: Z={jt_incr_twohop[0]:.4f}, P={jt_incr_twohop[1]:.4e}, r={r_incr_twohop:.4f}")
# Decreasing toxicity: Minor < Moderate < Major
jt_decr_twohop = jonckheere_terpestra_test([jaccard_major_samples, jaccard_moderate_samples, jaccard_minor_samples])
r_decr_twohop = jt_decr_twohop[0] / np.sqrt(N_total)
print(f"Jonckheere-Terpstra Decreasing toxicity: Z={jt_decr_twohop[0]:.4f}, P={jt_decr_twohop[1]:.4e}, r={r_decr_twohop:.4f}")

# Run an ANOVA on two_hop jaccard similarity for Major, Moderate, and Minor toxicity categories
f_statistic_anova_twohop_tox, p_value_anova_twohop_tox = stats.f_oneway(major_twohop, moderate_twohop, minor_twohop)
print(f'ANOVA F-statistic for two_hop jaccard similarity: {f_statistic_anova_twohop_tox}')
print(f'P-value for two_hop jaccard similarity: {p_value_anova_twohop_tox}')

# Run a T test on two_hop jaccard similarity between Major/Minor, Major/Moderate, and Moderate/Minor toxicity categories
major_minor_twohop = stats.ttest_ind(major_twohop, minor_twohop)
major_moderate_twohop = stats.ttest_ind(major_twohop, moderate_twohop)
moderate_minor_twohop = stats.ttest_ind(moderate_twohop, minor_twohop)

# Run bonferroni correction on the p-values
ttest_p_values = [major_minor_twohop[1], major_moderate_twohop[1], moderate_minor_twohop[1]]
ttest_p_values_corrected = multipletests(ttest_p_values, method='bonferroni')
print(f'Major/Minor T-test: {major_minor_twohop[1]} Corrected: {ttest_p_values_corrected[1][0]}')
print(f'Major/Moderate T-test: {major_moderate_twohop[1]} Corrected: {ttest_p_values_corrected[1][1]}')
print(f'Moderate/Minor T-test: {moderate_minor_twohop[1]} Corrected: {ttest_p_values_corrected[1][2]}')

# Structured data output
# 1. Kruskal-Wallis Result
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Kruskal-Wallis',
    'Comparison': 'All Groups',
    'Test_Statistic': f'H={h_statistic_twohop_tox:.4e}',
    'P_value': p_value_twohop_tox,
    'P_value_Corrected': np.nan,
    'Effect_Size_Type': 'Eta-Squared H',
    'Effect_Size_Value': eta_sq_h_twohop_tox
})
# 2. Dunn Post-Hoc Results (Major vs Minor, Major vs Moderate, Moderate vs Minor)
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Dunn Post-Hoc',
    'Comparison': 'Major vs Minor',
    'Test_Statistic': 'Dunn Z',
    'P_value': np.nan,
    'P_value_Corrected': dunn_twohop_tox.loc['Major', 'Minor'],
    'Effect_Size_Type': "Cliff's Delta",
    'Effect_Size_Value': delta_major_minor
})
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Dunn Post-Hoc',
    'Comparison': 'Major vs Moderate',
    'Test_Statistic': 'Dunn Z',
    'P_value': np.nan,
    'P_value_Corrected': dunn_twohop_tox.loc['Major', 'Moderate'],
    'Effect_Size_Type': "Cliff's Delta",
    'Effect_Size_Value': delta_major_moderate
})
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Dunn Post-Hoc',
    'Comparison': 'Moderate vs Minor',
    'Test_Statistic': 'Dunn Z',
    'P_value': np.nan,
    'P_value_Corrected': dunn_twohop_tox.loc['Moderate', 'Minor'],
    'Effect_Size_Type': "Cliff's Delta",
    'Effect_Size_Value': delta_moderate_minor
})
# 3. Jonckheere-Terpstra Results
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Jonckheere-Terpstra',
    'Comparison': 'Increasing Trend (Minor->Major)',
    'Test_Statistic': f'Z={jt_incr_twohop[0]:.4e}',
    'P_value': jt_incr_twohop[1],
    'P_value_Corrected': np.nan,
    'Effect_Size_Type': 'r',
    'Effect_Size_Value': r_incr_twohop
})
final_results_database.append({
    'Toxicity Metric': '2-Hop Neighboring Proteins Jaccard',
    'Test_Type': 'Jonckheere-Terpstra',
    'Comparison': 'Decreasing Trend (Major->Minor)',
    'Test_Statistic': f'Z={jt_decr_twohop[0]:.4e}',
    'P_value': jt_decr_twohop[1],
    'P_value_Corrected': np.nan,
    'Effect_Size_Type': 'r',
    'Effect_Size_Value': r_decr_twohop
})

results_df = pd.DataFrame(final_results_database)

# Display the first few rows of the database
print("\n--- Final Results Database (Head) ---")
print(results_df.head(20))

# Save the final database to a file
output_path = 'results/two_hop_analysis/two_hop_targ_jaccard_ddinter.csv'
results_df.to_csv(output_path, index=False)
print(f"\nFinal statistical summary saved to: {output_path}")

Total Sample Size (N) for analysis: 29064
Major two_hop jaccard: 5776
Moderate two_hop jaccard: 22457
Minor two_hop jaccard: 831
Kruskal-Wallis H statistic for target toxicity: 101.8579
P-value for target toxicity: 0.0000
Effect Size (Eta-Squared H): 0.0035
Dunn post-hoc test:                  Major     Minor      Moderate
Major     1.000000e+00  0.830098  1.813763e-22
Minor     8.300979e-01  1.000000  8.745228e-03
Moderate  1.813763e-22  0.008745  1.000000e+00
Cliff's Delta (Major vs Minor): -0.0367
Cliff's Delta (Major vs Moderate): -0.0835
Cliff's Delta (Moderate vs Minor): 0.0641
Jonckheere-Terpstra Increasing toxicity: Z=-8.1536, P=1.0000e+00, r=-0.0478
Jonckheere-Terpstra Decreasing toxicity: Z=8.1536, P=2.2204e-16, r=0.0478
ANOVA F-statistic for two_hop jaccard similarity: 35.000919347800014
P-value for two_hop jaccard similarity: 6.570102903395633e-16
Major/Minor T-test: 0.00025984970445992755 Corrected: 0.0007795491133797826
Major/Moderate T-test: 1.6710762658054505e-16 Correc

- Violin Plot

In [16]:
colors = ['#20965D', '#FFBC42', '#D81159']
tox_order = ['Minor', 'Moderate', 'Major']
color_dict = dict(zip(tox_order, colors))
legend_elements = [
    Patch(facecolor=color_dict[cat], label = cat) for cat in tox_order
]

ax = sns.violinplot(data=drug_combos_df, x='toxicity_category', y='two_hop_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
ax.set(xlabel='', ylabel='Neighbor Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/two_hop_analysis/twohop_js_v_toxcatwohop_ddinter_violin.png', dpi=700)
plt.close()

- Strip Plot

In [17]:
ax = sns.stripplot(data=drug_combos_df, x='toxicity_category', y='two_hop_jaccard', palette=color_dict, hue='toxicity_category', order=tox_order)
sns.boxplot( # plot the mean line
    showmeans=True,
    meanline=True,
    meanprops={'color': 'k', 'ls': '-', 'lw': 1},
    medianprops={'visible': False},
    whiskerprops={'visible': False},
    zorder=10,
    x="toxicity_category",
    y="two_hop_jaccard",
    data=drug_combos_df,
    showfliers=False,
    showbox=False,
    showcaps=False,
    ax=ax
)
ax.set(xlabel='', ylabel='Neighbor Jaccard Similarity')
plt.xticks(fontsize=20)
ax.yaxis.label.set_size(20)
plt.tight_layout()
plt.savefig('results/two_hop_analysis/twohop_js_v_toxcatwohop_ddinter_strip.png', dpi=700)
plt.close()

- Correlation Scatter Plots (Neighboring Proteins withing 2 Hops Jaccard Similarity v Synergy Scores)
- R^2 value
- Best fit line
- Pearson correlation coefficient
- Spearman correlation coefficient

In [18]:
x_twohop = drug_combos_df['two_hop_jaccard']
y_bliss_true = drug_combos_df['synergy_bliss']
y_loewe_true = drug_combos_df['synergy_loewe']
y_hsa_true = drug_combos_df['synergy_hsa']
y_zip_true = drug_combos_df['synergy_zip']
y_smax_true = drug_combos_df['S_max']
y_smean_true = drug_combos_df['S_mean']
y_ssum_true = drug_combos_df['S_sum']

####### BLISS ########
# Let's plot two hop jaccard similarity against bliss synergy scores
plt.scatter(x_twohop, y_bliss_true)

# Best fit line
z_twohop_bliss = np.polyfit(x_twohop, y_bliss_true, 1)
p_twohop_bliss = np.poly1d(z_twohop_bliss)
y_bliss_pred = p_twohop_bliss(x_twohop)
r_squared_twohop_bliss = r2_score(y_bliss_true, y_bliss_pred)
plt.plot(x_twohop, y_bliss_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_bliss:.3f}')
plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('Bliss Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/bliss_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between two hop jaccard similarity and bliss synergy score
two_hop_bliss_corr = x_twohop.corr(y_bliss_true)
two_hop_bliss_spearman_corr = x_twohop.corr(y_bliss_true, method='spearman')

####### HSA ########
# Let's plot two hop jaccard similarity against hsa synergy scores
plt.scatter(x_twohop, y_hsa_true)

# Best fit line
z_twohop_hsa = np.polyfit(x_twohop, y_hsa_true, 1)
p_twohop_hsa = np.poly1d(z_twohop_hsa)
y_hsa_pred = p_twohop_hsa(x_twohop)
r_squared_twohop_hsa = r2_score(y_hsa_true, y_hsa_pred)
plt.plot(x_twohop, y_hsa_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_hsa:.3f}')
plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('HSA Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/hsa_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between two hop jaccard similarity and hsa synergy score
two_hop_hsa_corr = x_twohop.corr(y_hsa_true)
two_hop_hsa_spearman_corr = x_twohop.corr(y_hsa_true, method='spearman')

####### LOEWE ########
# Let's plot two hop jaccard similarity against loewe synergy scores
plt.scatter(x_twohop, y_loewe_true)

# Best fit line
z_twohop_loewe = np.polyfit(x_twohop, y_loewe_true, 1)
p_twohop_loewe = np.poly1d(z_twohop_loewe)
y_loewe_pred = p_twohop_loewe(x_twohop)
r_squared_twohop_loewe = r2_score(y_loewe_true, y_loewe_pred)
plt.plot(x_twohop, y_loewe_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_loewe:.3f}')

plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('Loewe Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/loewe_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between two hop jaccard similarity and loewe synergy score
two_hop_loewe_corr = x_twohop.corr(y_loewe_true)
two_hop_loewe_spearman_corr = x_twohop.corr(y_loewe_true, method='spearman')

####### ZIP ########
# Let's plot two hop jaccard similarity against zip synergy scores
plt.scatter(x_twohop, y_zip_true)

# Best fit line
z_twohop_zip = np.polyfit(x_twohop, y_zip_true, 1)
p_twohop_zip = np.poly1d(z_twohop_zip)
y_zip_pred = p_twohop_zip(x_twohop)
r_squared_twohop_zip = r2_score(y_zip_true, y_zip_pred)
plt.plot(x_twohop, y_zip_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_zip:.3f}')

plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('ZIP Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/zip_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation coefficient between two hop jaccard similarity and zip synergy score
two_hop_zip_corr = x_twohop.corr(y_zip_true)
two_hop_zip_spearman_corr = x_twohop.corr(y_zip_true, method='spearman')

###### S_MAX ########
# Let's plot two hop jaccard similarity against S_max synergy scores
plt.scatter(x_twohop, y_smax_true)

# Best fit line
z_twohop_smax = np.polyfit(x_twohop, y_smax_true, 1)
p_twohop_smax = np.poly1d(z_twohop_smax)
y_smax_pred = p_twohop_smax(x_twohop)
r_squared_twohop_smax = r2_score(y_smax_true, y_smax_pred)
plt.plot(x_twohop, y_smax_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_smax:.3f}')

plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('S_max Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/smax_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
two_hop_smax_corr = x_twohop.corr(y_smax_true)
two_hop_smax_spearman_corr = x_twohop.corr(y_smax_true, method='spearman')

###### S_MEAN ########
# Let's plot two hop jaccard similarity against S_mean synergy scores
plt.scatter(x_twohop, y_smean_true)

# Best fit line
z_twohop_smean = np.polyfit(x_twohop, y_smean_true, 1)
p_twohop_smean = np.poly1d(z_twohop_smean)
y_smean_pred = p_twohop_smean(x_twohop)
r_squared_twohop_smean = r2_score(y_smean_true, y_smean_pred)
plt.plot(x_twohop, y_smean_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_smean:.3f}')

plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('S_mean Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/smean_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
two_hop_smean_corr = x_twohop.corr(y_smean_true)
two_hop_smean_spearman_corr = x_twohop.corr(y_smean_true, method='spearman')

###### S_SUM ########
# Let's plot two hop jaccard similarity against S_sum synergy scores
plt.scatter(x_twohop, y_ssum_true)

# Best fit line
z_twohop_ssum = np.polyfit(x_twohop, y_ssum_true, 1)
p_twohop_ssum = np.poly1d(z_twohop_ssum)
y_ssum_pred = p_twohop_ssum(x_twohop)
r_squared_twohop_ssum = r2_score(y_ssum_true, y_ssum_pred)
plt.plot(x_twohop, y_ssum_pred, "r-", alpha=0.8, label=f'R² = {r_squared_twohop_ssum:.3f}')

plt.xlabel('Neighborhood Jaccard Similarity', fontsize=20)
plt.ylabel('S_sum Synergy Score', fontsize=20)
plt.legend()
plt.tight_layout()
plt.savefig('results/two_hop_analysis/ssum_v_twohop_ddinter_scatter.png', dpi=700)
plt.close()

# Calculate Pearson/Spearman correlation
two_hop_ssum_corr = x_twohop.corr(y_ssum_true)
two_hop_ssum_spearman_corr = x_twohop.corr(y_ssum_true, method='spearman')

# Write all the correlations and spearman correlations to a file
with open('results/two_hop_analysis/two_hop_v_synergy_correlations_ddinter.tsv', 'w') as f:
    f.write('Synergy score\tLevel\tPearson correlation coefficient\tSpearman correlation\tR squared value\n')
    f.write('Bliss\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_bliss_corr, two_hop_bliss_spearman_corr,r_squared_twohop_bliss))
    f.write('HSA\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_hsa_corr, two_hop_hsa_spearman_corr, r_squared_twohop_hsa))
    f.write('Loewe\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_loewe_corr, two_hop_loewe_spearman_corr, r_squared_twohop_loewe))
    f.write('ZIP\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_zip_corr, two_hop_zip_spearman_corr, r_squared_twohop_zip))
    f.write('S_max\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_smax_corr, two_hop_smax_spearman_corr, r_squared_twohop_smax))
    f.write('S_mean\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_smean_corr, two_hop_smean_spearman_corr, r_squared_twohop_smean))
    f.write('S_sum\t2-Hop Neighborhood Jaccard Similarity\t{:.4e}\t{:.4e}\t{:.4e}\n'.format(two_hop_ssum_corr, two_hop_ssum_spearman_corr, r_squared_twohop_ssum))