### Hendra and Nipah F neutralization curves

This script reads in fraction infectivity data for Hendra and Nipah F, calculates IC50s, and plots neutralization curves.

In [None]:
import pandas as pd
import neutcurve
import altair as alt
import re
import httpimport

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
antibody_order = ["12B2", "2D3", "4H3", "1A9", "1F2", "2B12"]

neut_df = pd.read_csv(
    "../../data/experimental_data/neuts_data/250904_Hendra_vs_Nipah_neuts.csv"
)
display(neut_df.head())

In [None]:
def fit_neutcurve(df):
    return neutcurve.curvefits.CurveFits(
        data=df,
        serum_col="serum",
        virus_col="virus",
        replicate_col="replicate",
        conc_col="concentration",
        fracinf_col="fraction infectivity",
    )


fit = fit_neutcurve(neut_df)



In [None]:
def get_ic_values(fits):
    fit_params = fits.fitParams(ics=[50, 80, 90])
    return fit_params
    #fit_params.to_csv(snakemake.output.fitParams, index=False)


fit_params = get_ic_values(fit)
display(fit_params)

for antibody in antibody_order:
    tmp_df = fit_params[fit_params["serum"] == antibody]
    ic50_nipah = tmp_df[tmp_df["virus"] == "Nipah"]["ic50"].values[0]
    print(f"{antibody}: Nipah IC50: {ic50_nipah:.2f}")
    ic50_hendra = tmp_df[tmp_df["virus"] == "Hendra"]["ic50"].values[0]
    print(f"{antibody}: {ic50_hendra/ic50_nipah:.1f}")

In [None]:
fit_params_hendra = fit_params[["serum", "virus", "ic50"]].copy()
fit_params_hendra["seq"] = fit_params_hendra.groupby("serum").cumcount()

df_wide = fit_params_hendra.pivot(index="serum", columns="seq", values=["virus", "ic50"])


df_wide.columns = [f"{col[0]}_{col[1]}" for col in df_wide.columns]
df_wide = df_wide.reset_index()
df_wide = (
    df_wide.rename(
        columns={
            "virus_1": "mutation",
            "ic50_0": "ic50_unmutated",
            "ic50_1": "ic50_mutant",
            "serum": "antibody",
        }
    )
    .drop(columns=["virus_0"])
    .assign(ic50_fold=lambda x: x.ic50_mutant / x.ic50_unmutated)
)

display(df_wide)
#df_wide.to_csv('../../results/ic50_data/ic50_fold_changes_hendra.csv', index=False)


In [None]:
def make_neutcurve_df(fit):
    curves = []  # initialize an empty list to store neutralization curve data
    # Loop over each serum type and retrieve the curve
    for serum in serum_list:
        for virus in virus_list:
            curve = fit.getCurve(serum=serum, virus=virus, replicate="average")
            neut_df = curve.dataframe()  # turn into a dataframe
            neut_df["serum"] = serum  # assign serum name to a column
            neut_df["virus"] = virus  # assign virus name to a column
            curves.append(neut_df)

    # Concatenate all the dataframes into one
    combined_curve = pd.concat(curves, axis=0)
    combined_curve["upper"] = combined_curve["measurement"] + combined_curve["stderr"]
    combined_curve["lower"] = combined_curve["measurement"] - combined_curve["stderr"]
    return combined_curve

serum_list = list(neut_df["serum"].unique())
virus_list = list(neut_df["virus"].unique())
neutcurve_df = make_neutcurve_df(fit)
display(neutcurve_df)


In [None]:
def custom_sort_order(array):
    # Helper function to extract numerical part from mutation strings.
    def extract_number(virus):
        num = re.search(r"\d+", virus)
        return (
            int(num.group()) if num else 0
        )  # Convert digits to integer, or 0 if none found.

    array = sorted(
        array, key=extract_number
    )  # Sort array by the numerical value extracted.

    # Ensure 'WT' (wild type) is the first element in the list if it exists.
    if "WT" in array:
        array.remove("WT")  # Remove 'WT' from its current position.
        array.insert(0, "WT")  # Insert 'WT' at the beginning of the list.
    return array

colors_for_plot2 = [
    "#5778a4",
    "#e49444",
    "#d1615d",
    "#85b6b2",
    "#6a9f58",
    "#e7ca60",
    "#a87c9f",
    "#f1a2a9",
    "#967662",
    "#b8b0ac",
]

colors_for_plot = ["#1C507B", "#F57F20"]

def custom_sort_key(item):
    # Extract the time value from the item string
    match = re.search(r"(\d+)min", item)
    if match:
        return int(match.group(1))
    return 0  # Return 0 for items without a time value


In [None]:
def plot_neut_curve(df):
    color_variable = 'virus'
    legend_title = "Virus"
    #title = "Concentration (µg/mL)"
    # Define the axis and scale for the plot
    x_axis = alt.Axis(format=".0e", tickCount=3)
    scale = alt.Scale(type="log")

    ### Define color scale
    # If 'WT' is present, set the color scale to include 'WT' as the first color and make it black
    if "WT" in df["virus"].unique():
        print("WT is present")
        colors = ["black"] + colors_for_plot[: len(df["virus"].unique()) - 1]

        color_scale = alt.Color(
            color_variable,
            scale=alt.Scale(
                domain=custom_sort_order(df["virus"].unique()), range=colors
            ),
            legend=alt.Legend(title=legend_title),
            #legend=None,
        )
    else:
        # If 'WT' is not present, set the color scale to include all colors
        # Get the unique values of the color variable
        unique_values = df[color_variable].unique()
        # Sort the unique values using the custom sorting function
        sorted_values = sorted(unique_values, key=custom_sort_key)

        color_scale = alt.Color(
            color_variable,
            scale=alt.Scale(domain=sorted_values, range=colors_for_plot),
            sort=sorted_values,
            legend=alt.Legend(title=legend_title),
            #legend=None
        )

    # make the line component of the plot which represents the inferred fraction infectivity
    line = (
        alt.Chart(df)
        .mark_line(size=2.5)
        .encode(
            x=(alt.X("concentration:Q").scale(scale).axis(x_axis).title(None)),
            y=(
                alt.Y("fit:Q")
                .title(None)
                .scale(alt.Scale(domain=[0, 1]))
                .axis(alt.Axis(values=[0, 0.5, 1]))
            ),
            color=color_scale,
        )
    )

    # make the circle component of the plot which represent the measured fraction infectivity
    circle = (
        alt.Chart(df)
        .mark_circle(size=40, opacity=1)
        .encode(
            x=(alt.X("concentration").scale(scale).axis(x_axis).title(None)),
            y=(alt.Y("measurement:Q").title(None)),
            color=color_scale,
        )
    )

    # make the error bar component of the plot which represents the error in the measured fraction infectivity
    error = (
        alt.Chart(df)
        .mark_errorbar(opacity=1)
        .encode(
            x="concentration",
            y=alt.Y("lower").title(None),
            y2="upper",
            color=color_scale,
        )
    )

    # combine the line, circle, and error components into one plot and set dimensions
    plot = error + line + circle
    plot = plot.properties(width=150, height=100)
    return plot

empty = []
for antibody in antibody_order:
    tmp_df = neutcurve_df[neutcurve_df["serum"] == antibody]
    tmp_plot = plot_neut_curve(tmp_df)
    empty.append(tmp_plot.properties(title=antibody))

all_charts = alt.hconcat(*empty)

all_charts.display()


In [None]:
all_charts.save(
    "../../results/figures/validations/NiV_vs_HeV_neutcurves.png", ppi=300
)
all_charts.save("../../results/figures/validations/NiV_vs_HeV_neutcurves.svg")
