# Single mutant pseudovirus titers versus functional scores

Compare actual measured values for specific mutants to the scores measured with DMS.

In [None]:
# Imports
import warnings
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={"figure.dpi":300, "savefig.dpi":300})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

File paths for data:

In [None]:
validation_titers_path = "data/single_mutant_functional_validations.csv"
functional_scores_path = "results/func_effects/averages/293T_entry_func_effects.csv"

In [None]:
# Read titer data
validation_titers = pd.read_csv(validation_titers_path, na_filter=None)

# Calculate log2 fold change
validation_titers["log2 fold change"] = np.log2(validation_titers["Fold change relative to unmutated median"])

validation_titers

Now get the functional scores measured by DMS:

In [None]:
# Read icXX model predictions
functional_scores_df = pd.read_csv(functional_scores_path)

# Create new column to match single mutant viruses
functional_scores_df["aa_substitutions"] = (
    functional_scores_df["wildtype"] + 
    functional_scores_df["site"].astype(str) + 
    functional_scores_df["mutant"]
)
functional_scores_df = (
    functional_scores_df.drop(columns=[
        "wildtype",
        "site",
        "mutant",
    ])
)

# Merge model predictions with measured titers
validation_vs_prediction = (
    validation_titers.merge(
        functional_scores_df,
        how="left",
        on=["aa_substitutions"],
        validate="many_to_one",
    )
    .rename(columns={"aa_substitutions" : "Amino acid substitutions"})
    .fillna(0)
)

validation_vs_prediction

In [None]:
r, p = sp.stats.pearsonr(
    x=validation_vs_prediction["effect"], 
    y=validation_vs_prediction["log2 fold change"]
)
print(f"R={r}")
print(f"R^2={r**2}")

# Plot predicted vs measured
corr_chart = sns.relplot(
    data=validation_vs_prediction,
    x="effect", 
    y="log2 fold change",
    hue="Amino acid substitutions",
    aspect=1.25,
    s=150,
    alpha=1,
)
corr_chart.axes[0,0].set_xlabel(
    "functional score\nmeasured by DMS", 
    weight="bold"
)
corr_chart.axes[0,0].set_ylabel(
    "log2 pseudovirus titer fold change\ncompared to unmutated LASV GP", 
    weight="bold"
)
corr_chart.axes[0,0].set_xlim(-2.5, 1.5)
corr_chart.axes[0,0].set_ylim(-8.5, 2.5)
corr_chart.axes[0,0].set_xticks([-2, -1, 0, 1, 2])
corr_chart.axes[0,0].set_yticks([-8, -6, -4, -2, 0, 2])
corr_chart.legend.set_alignment("left")
corr_chart.legend.set_title("Amino acid substitutions", prop={"weight" : "bold"})
corr_chart.axes[0,0].text(
    -2, 
    1, 
    f"r={r:.2f}\nR^2={r**2:.2f}", 
    horizontalalignment="left",  
    weight="bold",
    fontsize=12,
)

# Set square ratio
corr_chart.axes[0,0].set_box_aspect(1)