# Validation neutralization assays versus polyclonal fits

Compare actual measured neutralization values for specific mutants to the polyclonal fits.

In [None]:
# Imports
import warnings
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={"figure.dpi":300, "savefig.dpi":300})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

File paths for data:

In [None]:
validation_neuts_89F_path = "data/validation_neuts_89F.csv"
model_predictions_path = "results/antibody_escape/averages/89F_mut_effect.csv"

Read validation assay measurements and calculate log2 fold change IC50 values

In [None]:
# Read nuetralization data
validation_neuts_89F = pd.read_csv(validation_neuts_89F_path, na_filter=None)

# Calculate log2 ic50 fold change
WT_IC50 = validation_neuts_89F.loc[validation_neuts_89F["aa_substitutions"] == "Unmutated"].at[0, "ic50"]
validation_neuts_89F["log2 fold change ic50"] = np.log2(validation_neuts_89F["ic50"]/WT_IC50)

validation_neuts_89F

Now get the predictions by the averaged **polyclonal** model fits:

In [None]:
# Read model predictions
model_prediction_df = pd.read_csv(model_predictions_path)

# Create new column to match single mutant viruses
model_prediction_df["aa_substitutions"] = (
    model_prediction_df["wildtype"] + 
    model_prediction_df["site"].astype(str) + 
    model_prediction_df["mutant"]
)
model_prediction_df = (
    model_prediction_df.drop(columns=[
        "wildtype",
        "site",
        "mutant",
    ])
)

# Merge model predictions with measured ICs
validation_vs_prediction = (
    validation_neuts_89F.merge(
        model_prediction_df,
        how="left",
        on=["aa_substitutions"],
        validate="one_to_one",
    )
    .rename(columns={"aa_substitutions" : "Amino acid substitutions", "ic50_bound" : "Lower bound"})
    .fillna(0)
)

validation_vs_prediction

Calculate the Pearson correlation coefficient r and R^2 between the predicted IC50s from our models and the IC50s measured in validation assays for the antibody 8.9F.

In [None]:
# Calculate correlation between predicted and measured
r, p = sp.stats.pearsonr(
    x=validation_vs_prediction["log2 fold change ic50"], 
    y=validation_vs_prediction["escape_median"]
)
print(f"R={r}")
print(f"R^2={r**2}")

# Plot predicted vs measured
corr_chart = sns.relplot(
    data=validation_vs_prediction,
    x="log2 fold change ic50", 
    y="escape_median",
    hue="Amino acid substitutions",
    style="Lower bound",
    markers=["o", "s"],
    aspect=1.25,
    s=150,
    alpha=0.9,
    edgecolor="black",
)
corr_chart.axes[0,0].set_xlabel(
    "log2 fold change IC50\nmeasured by pseuodvirus neutralization assay", 
    weight="bold"
)
corr_chart.axes[0,0].set_ylabel(
    "escape score\npredicted by DMS (arbitrary units)", 
    weight="bold"
)
corr_chart.axes[0,0].set_xlim(-3.5, 9)
corr_chart.axes[0,0].set_ylim(-1.5, 4.5)
corr_chart.legend.get_texts()[0].set_weight("bold")
corr_chart.legend.get_texts()[0].set_fontsize(12)
corr_chart.legend.get_texts()[11].set_weight("bold")
corr_chart.legend.get_texts()[11].set_fontsize(12)
corr_chart.axes[0,0].text(
    -2.25, 
    3.5, 
    f"r={r:.2f}\nR^2={r**2:.2f}", 
    horizontalalignment="left",  
    weight="bold",
    fontsize=12,
)

# Label points
for i in range(0, validation_vs_prediction.shape[0]):
    x_pos = validation_vs_prediction.at[i, "log2 fold change ic50"]
    y_pos = validation_vs_prediction.at[i, "escape_median"]
    name = validation_vs_prediction.at[i, "Amino acid substitutions"]
    if name == "Unmutated":
        continue
    elif name == "Q149H":
        corr_chart.axes[0,0].text(
            x_pos-0.1, 
            y_pos-0.275, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
    elif name == "N119S":
        corr_chart.axes[0,0].text(
            x_pos+0.3, 
            y_pos+0.125, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )

    elif name == "Y129L":
        corr_chart.axes[0,0].text(
            x_pos+0.3, 
            y_pos-0.15, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
    elif name == "S138N":
        corr_chart.axes[0,0].text(
            x_pos+0.3, 
            y_pos, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
    elif name == "K126L":
        corr_chart.axes[0,0].text(
            x_pos+0.15, 
            y_pos+0.125, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
    elif name == "N89D":
        corr_chart.axes[0,0].text(
            x_pos+0.15, 
            y_pos+0.125, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
    else:
        corr_chart.axes[0,0].text(
            x_pos+0.15, 
            y_pos+0.125, 
            f"{name}", 
            horizontalalignment="left",  
            weight="bold",
            fontsize=8,
        )
        

# Set square ratio
corr_chart.axes[0,0].set_box_aspect(1)