In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
#from scipy.stats import pearsonr
#from scipy.stats import ttest_ind
#from scipy.stats import ttest_rel

Load Prodigy Dataset

In [2]:
prodigy = pd.read_csv('data/PRODIGY_dataset.csv')
#extract deltaG values and convert to Kd
T = 310 #Kelvin (37 C)
R = 0.001987 #kcal/mol*K
KD = np.exp(prodigy["DG"].to_numpy()/(T*R))
#logscale
KD = np.log10(KD)
max = np.max(KD)
min = np.min(KD)

Load Test Case KD Predictions

In [3]:
df_egfr = pd.read_csv('/Users/hollyhuber/Documents/structure_informed_cell_signaling2/egfr/data/predicted_kd.csv')
df_gpcr = pd.read_csv('/Users/hollyhuber/Documents/structure_informed_cell_signaling2/gpcr/data/predicted_kd.csv')

Compute Absolute Log Errors

In [4]:
errors = np.abs(np.log10(df_egfr['predicted_Kd(M)'].to_numpy())-np.log10(df_egfr['reported_Kd(M)'].to_numpy()))
errors_2 = np.abs(np.log10(df_gpcr['predicted_Kd(M)'].to_numpy())-np.log10(df_gpcr['reported_Kd(M)'].to_numpy()))
all_errors = np.concatenate((errors, errors_2))

Compute Random Absolute Log Errors

In [5]:
database_max = np.max(KD)
database_min = np.min(KD)
rng = np.random.default_rng(125083)
random_kd = np.multiply(rng.random(len(all_errors)),(database_max-database_min)) + database_min
ground_truth = np.concatenate((np.log10(df_egfr['reported_Kd(M)'].to_numpy()), np.log10(df_gpcr['reported_Kd(M)'].to_numpy())))
errors_random = np.abs(random_kd - ground_truth)

Make Into DF for saving as CSV

In [6]:
d = {"Uninformed Prior Error": errors_random, "ML Pipeline Error": all_errors}
df = pd.DataFrame(data=d)
df.to_csv("outputs/000_absolute_errors.csv")

Correlation Between AlphaFold Confidence and Error - Make into DF and Save as CSV

In [7]:
AF_confidence = df_egfr["AF_ranking_score"].to_numpy() #note, we only save EGFR data, as GPCR has no AF3 confidence, since it's not a predicted structure
AF_confidence2 = df_egfr["AF_ipTM"].to_numpy()
AF_confidence3 = df_egfr["AF_fraction_disordered"].to_numpy()
#save ranking score
d = {"AlphaFold3 Ranking Score": AF_confidence, "ml pipeline error": errors}
df = pd.DataFrame(data=d)
df.to_csv("outputs/000_kd_predictor_error_and_af3_ranking_score.csv")
#save iptm
d = {"AlphaFold3 ipTM": AF_confidence2, "ml pipeline error": errors}
df = pd.DataFrame(data=d)
df.to_csv("outputs/000_kd_predictor_error_and_af3_iptm.csv")
#save fraction disordered
d = {"AlphaFold3 fraction disordered": AF_confidence3, "ml pipeline error": errors}
df = pd.DataFrame(data=d)
df.to_csv("outputs/000_kd_predictor_error_and_af3_fraction_disordered.csv")