# Analyze HA stability validations
Analyze validation experiments in which cells were infected with same amount of virus and infected into cells at different pHs.

In [1]:
import altair as alt

import numpy

import pandas as pd

Read the validation data.
We drop the first experiment since it has very high percent positive, too high to accurately estimate MOI (the three later experiments have more reasonable percent positive to apply Poisson formula to).

In [2]:
validation_data = (
    pd.read_excel(
        "H5_fluPB1flank_HAstability_validations.xlsx",
        sheet_name="processed",
    )
    .query("`HA variant`.notnull()")
    [["HA variant", "experiment", "percent positive", "pH"]]
    .assign(
        MOI=lambda x: -numpy.log(1 - x["percent positive"] / 100),
        experiment=lambda x: x["experiment"].astype(str) + " experiment",
        **{"HA variant": lambda x: x["HA variant"].str.replace("WT", "unmutated")},
    )
    .query("experiment != '2024-04-10 experiment'")
)

Read the DMS stability measurements and add to validation data:

In [3]:
dms_data = pd.read_csv("../../results/pH_stability/averages/pH_stability_mut_effect.csv")

validation_data = (
    validation_data
    .drop(columns="DMS effect", errors="ignore")
    .merge(
        dms_data.rename(
            columns={"mutation": "HA variant", "pH stability_median": "DMS effect"}
        )
        [["HA variant", "DMS effect"]],
        how="left",
        on="HA variant",
        validate="many_to_one",
    )
    .assign(
        **{"DMS effect": lambda x: x["DMS effect"].where(x["HA variant"] != "unmutated", 0)},
        labeled_variant=lambda x: x["HA variant"] + " (DMS effect: " + x["DMS effect"].map(lambda y: f"{y:.1f}") + ")",
    )
)

assert validation_data.notnull().all().all()

Now plot the validations experiments with full data:

In [4]:
color_range = ["#E69F00", "#999999", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]

validation_chart = (
    alt.Chart(
        validation_data.rename(
            columns={"MOI": "viral titer per cell", "percent positive": "percent cells infected"}
        )
    )
    .transform_fold(["percent cells infected", "viral titer per cell"], ["measurement", "value"])
    .encode(
        alt.X("pH", sort="descending"),
        alt.Y("value:Q", title=None),
        alt.Color("labeled_variant", title="HA variant", sort=alt.SortField("DMS effect"), scale=alt.Scale(range=color_range)),
        alt.Column("experiment", title=None,header=alt.Header(labelFontStyle="bold", labelPadding=1, labelFontSize=11), spacing=5),
        alt.Row("measurement:N", title=None, header=alt.Header(labelFontStyle="bold", labelPadding=1, labelFontSize=11), spacing=5)
    )
    .mark_line(point=True, strokeWidth=1, opacity=0.7)
    .resolve_scale(y="independent")
    .properties(height=110, width=166)
    .configure_axis(grid=False)
    .configure_point(size=50)
)

validation_chart.save("validation_chart.html")

validation_chart

Now compute viral titer per cell at the two lowest pHs (5.3 and 5.5) versus 6.9.
We do not use the pH 5.7 data as that is not low enough pH to strongly inactivate HA:

In [5]:
pHs = sorted(validation_data["pH"].unique().tolist())
top_pH = pHs[-1]
lower_pHs = pHs[: -2]

titer_data = (
    validation_data
    .pivot_table(
        index=["HA variant", "experiment", "DMS effect"],
        values="MOI",
        columns="pH",
    )
    .reset_index()
    .melt(
        id_vars=["HA variant", "experiment", "DMS effect", top_pH],
        value_vars=lower_pHs,
        value_name="MOI_at_pH",
    )
    .assign(
        frac_infectivity_retained=lambda x: x["MOI_at_pH"] / x[top_pH],
        pH=lambda x: "infectivity retained at pH " + x["pH"].astype(str),
    )
    .drop(columns=6.9)
)

Now plot these titer data:

In [6]:
titer_base = (
    alt.Chart(titer_data)
    .encode(
        alt.X("DMS effect", scale=alt.Scale(nice=False, padding=14), title="stability effect in DMS"),
        alt.Y("frac_infectivity_retained", scale=alt.Scale(type="log", nice=False, padding=14), title=None),
        alt.Color("HA variant", sort=alt.SortField("DMS effect"), legend=None, scale=alt.Scale(range=color_range)),
    )
)

titer_points = titer_base.mark_point(filled=True, size=70, opacity=1)

titer_text = titer_base.encode(text="HA variant").mark_text(
    size=9, dy=8, dx=2
)

titer_r = (
    titer_base
    .transform_regression("DMS effect", "frac_infectivity_retained", params=True)
    .transform_calculate(
        r=alt.expr.if_(
            alt.datum["coef"][1] >= 0,
            alt.expr.sqrt(alt.datum["rSquared"]),
            -alt.expr.sqrt(alt.datum["rSquared"]),
        ),
        label='"r = " + format(datum.r, ".2f")',
    )
    .mark_text(align="left", fontSize=12, opacity=1)
    .encode(x=alt.value(5), y=alt.value(10), text=alt.Text("label:N"), color=alt.value("black"))
)

titer_chart = (
    (titer_points + titer_text + titer_r)
    .properties(width=165, height=165)
    .facet(
        column=alt.Column("experiment", title=None,header=alt.Header(labelFontStyle="bold", labelPadding=3, labelFontSize=11)),
        row=alt.Row("pH", title=None, header=alt.Header(labelFontStyle="bold", labelPadding=1, labelFontSize=11)),
        spacing=5,
    )
    .resolve_scale(y="independent")
    .configure_axis(grid=False)
)

titer_chart.save("titer_chart.html")

titer_chart