# Single mutant pseudovirus titers versus functional scores

Compare actual measured values for specific mutants to the scores measured with DMS.

In [None]:
# Imports
import os
import warnings
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#EE7733",
    "#CC3311",
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={"figure.dpi":300, "savefig.dpi":300})
sns.set_style("ticks")
sns.set_palette(tol_muted_adjusted)

# Suppress warnings
warnings.simplefilter("ignore")

File paths for data:

In [None]:
# this cell is tagged as `parameters` for papermill parameterization
validation_titers_path = None
functional_scores_path = None
out_dir = None
saved_image_path = None

In [None]:
# # Uncomment for running interactive
# validation_titers_path = "../data/single_mutant_functional_validations.csv"
# functional_scores_path = "../results/func_effects/averages/293T_entry_func_effects.csv"
# out_dir = "../results/validation_plots/"
# saved_image_path="../results/validation_plots/functional_validation_correlation.svg"

In [None]:
# Read titer data
validation_titers = pd.read_csv(validation_titers_path, na_filter=None)

# Calculate log2 fold change
validation_titers["log2 fold change"] = np.log2(validation_titers["Fold change relative to unmutated median"])

Now get the functional scores measured by DMS:

In [None]:
# Read icXX model predictions
functional_scores_df = pd.read_csv(functional_scores_path)

# Create new column to match single mutant viruses
functional_scores_df["aa_substitutions"] = (
    functional_scores_df["wildtype"] + 
    functional_scores_df["site"].astype(str) + 
    functional_scores_df["mutant"]
)
functional_scores_df = (
    functional_scores_df.drop(columns=[
        "wildtype",
        "site",
        "mutant",
    ])
)

# Merge model predictions with measured titers
validation_vs_prediction = (
    validation_titers.merge(
        functional_scores_df,
        how="left",
        on=["aa_substitutions"],
        validate="many_to_one",
    )
    .rename(columns={"aa_substitutions" : "Amino acid substitutions"})
    .fillna(0)
)

# Rename dictionary
rename = {
    "Unmutated" : "Unmutated (n=12)",
    "N89D" : "N89D (n=1)",
    "N119S" : "N119S (n=4)",
    "K125L" : "K125L (n=8)",
    "K126L" : "K126L (n=6)",
    "Y129L" : "Y129L (n=4)",
    "S135L" : "S135L (n=1)",
    "S138N" : "S138N (n=1)",
    "N148R" : "N148R (n=1)",
    "Q149H" : "Q149H (n=6)",
    "R257S" : "R257S (n=3)",
    "D401I" : "D401I (n=3)",
    "E404L" : "E404L (n=3)",
}

# Rename virus name
validation_vs_prediction["Amino acid substitutions"] = validation_vs_prediction["Amino acid substitutions"].map(rename)

In [None]:
r, p = sp.stats.pearsonr(
    x=validation_vs_prediction["effect"], 
    y=validation_vs_prediction["log2 fold change"]
)
print(f"R={r}")
print(f"R^2={r**2}")

# Plot predicted vs measured
fig, ax = plt.subplots(figsize=(3, 2.5))
corr_chart = sns.scatterplot(
    data=validation_vs_prediction,
    x="effect", 
    y="log2 fold change",
    hue="Amino acid substitutions",
    ax=ax,
    alpha=0.8,
    edgecolor="black",
    s=100,
)
# Change all spines
for axis in ["top", "bottom", "left", "right"]:
    corr_chart.spines[axis].set_linewidth(1)
corr_chart.tick_params(axis="both", length=4, width=1)
sns.despine()
corr_chart.set_xlabel(
    "functional score measured\nby DMS (arbitrary units)", 
    # weight="bold",
    fontsize=8,
)
corr_chart.set_ylabel(
    "log2 virus titer fold change\ncompared to unmutated LASV GP", 
    # weight="bold",
    fontsize=8,
)

corr_chart.set_xlim(-2.25, 1.25)
corr_chart.set_ylim(-9, 2.5)
corr_chart.set_xticks([-2, -1, 0, 1])
corr_chart.set_xticklabels(corr_chart.get_xticks(), size=8)
corr_chart.set_yticks([-8, -6, -4, -2, 0, 2])
corr_chart.set_yticklabels(corr_chart.get_yticks(), size=8)
sns.move_legend(
    corr_chart, 
    "upper left", 
    bbox_to_anchor=(1, 1),
    fontsize=8,
    markerscale=1,
    handletextpad=0.1,
    title="Amino acid\nsubstitutions",
    title_fontproperties = {
        "size" : 8, 
        "weight" : "bold",
    },
    frameon=False,
    borderaxespad=0.1,
)
corr_chart.text(
    -2, 
    1, 
    f"r={r:.2f}", 
    horizontalalignment="left",  
    weight="bold",
    fontsize=8,
)

# Add edges to legend markers to match scatter plot
for ha in corr_chart.legend_.legendHandles:
    ha.set_edgecolor("black")
    ha.set_linewidths(0.5)

# Set square ratio
corr_chart.set_box_aspect(1)

# Make output dir if doesn't exist
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

# Save fig
plt.savefig(saved_image_path)