# top escape neut curves

This script reads in fraction infectivity data from top escape neuts, fits neutralization curves, and plots them. It then merges the ic50 estimates with the DMS escape data, calculates relative fold changes, and saves it to the results directory.

In [None]:
import pandas as pd
import numpy as np
import neutcurve
import altair as alt
import scipy.stats
import re
import httpimport

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
antibody_order = [
    "12B2",
    "2D3",
    "4H3",
    "1A9",
    "1F2",
    "2B12",
]

neut_df = pd.read_csv(
    "../../data/experimental_data/neuts_data/250904_top_escape_neuts.csv"
)
display(neut_df)

In [None]:
def fit_neutcurve(df):
    return neutcurve.CurveFits(
        data=df,
        serum_col="serum",
        virus_col="virus",
        replicate_col="replicate",
        conc_col="concentration",
        fracinf_col="fraction infectivity",
        fixbottom=(0.0, 0.4),
    )

fit = fit_neutcurve(neut_df)

In [None]:
curves = []
for group, group_data in neut_df.groupby(["serum", "virus"]):
    curve = fit.getCurve(serum=group[0], virus=group[1], replicate="average")
    tmp_df = curve.dataframe()
    tmp_df["serum"] = group[0]
    tmp_df["virus"] = group[1]
    curves.append(tmp_df)

neutcurve_df = pd.concat(curves)

neutcurve_df = neutcurve_df.assign(
    upper=lambda x: x["measurement"] + x["stderr"],
    lower=lambda x: x["measurement"] - x["stderr"],
)

display(neutcurve_df.head(5))

In [None]:
def custom_sort_order(array):
    # Helper function to extract numerical part from mutation strings.
    def extract_number(virus):
        num = re.search(r"\d+", virus)
        return (
            int(num.group()) if num else 0
        )  

    array = sorted(
        array, key=extract_number
    )  # Sort array by the numerical value extracted.
    
    # Ensure Unmutated is at the front if it exists in the list.
    if "Unmutated" in array:
        array.remove("Unmutated")  # Remove 'Unmutated' from its current position.
        array.insert(0, "Unmutated")  # Insert 'Unmutated' at the beginning of the list.
    return array

colors_for_plot = ["#F57F20", "#5778a4"]

def custom_sort_key(item):
    # Extract the time value from the item string
    match = re.search(r"(\d+)min", item)
    if match:
        return int(match.group(1))
    return 0  # Return 0 for items without a time value

In [None]:
def plot_neut_curve(df):
    color_variable = 'virus'
    x_axis = alt.Axis(format=".0e", tickCount=2)
    scale = alt.Scale(type="log")

    ### Define color scale
    # If 'Unmutated' is present, set the color scale to include 'Unmutated' as the first color and make it black
    if "Unmutated" in df["virus"].unique():
        colors = ["black"] + colors_for_plot[: len(df["virus"].unique()) - 1]

        color_scale = alt.Color(
            color_variable,
            scale=alt.Scale(
                domain=custom_sort_order(df["virus"].unique()), range=colors
            ),
        )
    

    # make the line component of the plot which represents the inferred fraction infectivity
    line = (
        alt.Chart(df)
        .mark_line(size=2)
        .encode(
            x=(alt.X("concentration:Q").scale(scale).axis(x_axis).title(None)),
            y=(
                alt.Y("fit:Q")
                .title(None)
                .scale(alt.Scale(domain=[0, 1]))
                .axis(alt.Axis(values=[0, 0.5, 1]))
            ),
            color=color_scale,
        )
    )

    # make the circle component of the plot which represent the measured fraction infectivity
    circle = (
        alt.Chart(df)
        .mark_circle(size=40, opacity=1)
        .encode(
            x=(alt.X("concentration").scale(scale).axis(x_axis).title(None)),
            y=(alt.Y("measurement:Q").title(None)),
            color=color_scale,
        )
    )

    # make the error bar component of the plot which represents the error in the measured fraction infectivity
    error = (
        alt.Chart(df)
        .mark_errorbar(opacity=1)
        .encode(
            x="concentration",
            y=alt.Y("lower").title(None),
            y2="upper",
            color=color_scale,
        )
    )

    # combine the line, circle, and error components into one plot and set dimensions
    plot = error + line + circle
    plot = plot.properties(width=150, height=100)
    return plot

empty = []
for antibody in antibody_order:
    tmp_df = neutcurve_df.query('serum == @antibody')
    tmp_plot = plot_neut_curve(tmp_df)
    empty.append(tmp_plot.properties(title=antibody))

all_charts = alt.hconcat(*empty).resolve_scale(color='independent', y='shared', x='shared')
all_charts.display()


In [None]:
all_charts.save("../../results/figures/validations/top_escape_neutcurves_all.png", ppi=300)
all_charts.save("../../results/figures/validations/top_escape_neutcurves_all.svg")

In [None]:
# Now lets merge ic50 data with escape data from DMS

escape_df = pd.read_csv(
    "../../results/filtered_data/antibody_escape/combined/all_antibodies_escape_filtered.csv"
).assign(mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"])


fit_params = fit.fitParams(ics=[50, 90])

fit_params = fit_params[["serum", "virus", "ic50"]].copy()
fit_params["seq"] = fit_params.groupby("serum").cumcount()

df_wide = fit_params.pivot(index="serum", columns="seq", values=["virus", "ic50"])


df_wide.columns = [f"{col[0]}_{col[1]}" for col in df_wide.columns]
df_wide = df_wide.reset_index()
df_wide = (
    df_wide.rename(
        columns={
            "virus_1": "mutation",
            "ic50_0": "ic50_unmutated",
            "ic50_1": "ic50_mutant",
            "serum": "antibody",
        }
    )
    .drop(columns=["virus_0"])
    .assign(ic50_fold=lambda x: x.ic50_mutant / x.ic50_unmutated)
)

merged_df = pd.merge(df_wide, escape_df, on=['antibody','mutation'], how='left')
merged_df["ic50_fold"] = pd.to_numeric(merged_df["ic50_fold"], errors="coerce")
merged_df['log2_ic50'] = np.log2(merged_df['ic50_fold'])

merged_df['at_threshold'] = np.where(merged_df['ic50_mutant'] == 10, 'yes', 'no')

display(merged_df)
merged_df.to_csv('../../results/ic50_data/top_escape_ic50_estimates_merged_with_escape.csv')
