# Average mutational escape for an antibody/serum

Import Python modules.
We use `polyclonal` for the averaging and plotting:

In [1]:
import pickle

import altair as alt

import pandas as pd

import polyclonal

_ = alt.data_transformers.disable_max_rows()

This notebook is parameterized by `papermill`.
The next cell is tagged as `parameters` to get the passed parameters.

In [2]:
# this cell is tagged parameters for `papermill` parameterization
site_numbering_map_csv = None
avg_pickle_file = None
escape_csv = None
icXX_csv = None
escape_html = None
icXX_html = None
params = None

In [3]:
# Parameters
params = {
    "alphabet": [
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
        "*",
    ],
    "escape_plot_kwargs": {
        "addtl_slider_stats": {"times_seen": 3},
        "addtl_tooltip_stats": ["sequential_site"],
        "avg_type": "median",
        "heatmap_max_at_least": 2,
        "heatmap_min_at_least": -2,
        "init_floor_at_zero": False,
        "init_site_statistic": "sum",
        "per_model_tooltip": True,
        "rename_stat_col": "Ephrin affinity",
        "scale_stat_col": -1,
        "site_zoom_bar_color_col": "region",
        "sites_to_show": {"include_range": [71, 602]},
    },
    "icXX": 90,
    "legend": "Interactive plot of how mutations affect antibody escape.\n\nUse the site zoom bar at the top to zoom in on specific sites. The line plot shows a summary statistic indicating escape at each site. The heat map shows escape for individual mutations, with parental amino-acid identities indicated by x and gray indicating non-measured mutations. Mouse over points for details.\n\nOptions at the bottom of the plot let you modify the display, such as by selecting how many different variants a mutation must be seen in to be shown (*minimum times_seen*), how many different experimental selections the mutation was measured in (*minimum n_models*), what site summary statistic to show, etc.\n\nYou can filter by the functional effects of mutations. Mutations removed by this filter are shown as dark gray squares in the heat map to distinguish unmeasured mutations from ones measured to be deleterious.\n\nThe minimum max of escape at site is useful to select the sites where mutations confer the most escape.\n",
    "plot_hide_stats": {
        "functional effect": {
            "csv": "results/func_effects/by_selection/LibB-230704-CHO-EFNB3_func_effects.csv",
            "csv_col": "functional_effect",
            "init": -3,
        }
    },
    "selections": ["LibA-230331-293T_E2-Bat_E2", "LibA-230331-293T-Bat_E2"],
    "title": "Receptor Affinity for Dimeric Bat EFNB2",
}
site_numbering_map_csv = "data/site_numbering_map.csv"
avg_pickle_file = (
    "results/antibody_escape/averages/Bat_EFNB2_Dimeric_polyclonal_model.pickle"
)
escape_csv = "results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_escape.csv"
icXX_csv = "results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_icXX.csv"
escape_html = (
    "results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_escape_nolegend.html"
)
icXX_html = "results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_icXX_nolegend.html"


Read the input data and parameters:

In [4]:
site_numbering_map = pd.read_csv(site_numbering_map_csv).rename(
    columns={"reference_site": "site"}
)

assert len(params["selections"]) == len(set(params["selections"]))

# read Polyclonal models into a data frame that can be passed to PolyclonalAverage
models_df = pd.DataFrame(
    [
        (
            s,
            pickle.load(
                open(
                    f"results/antibody_escape/by_selection/{s}_polyclonal_model.pickle",
                    "rb",
                )
            ),
        )
        for s in params["selections"]
    ],
    columns=["selection", "model"],
)

# read prob_escape means all into one data frame
prob_escape_means = pd.concat(
    [
        pd.read_csv(
            f"results/antibody_escape/by_selection/{s}_prob_escape_mean.csv"
        ).assign(selection=s)
        for s in params["selections"]
    ],
    ignore_index=True,
)

# get the plot kwargs
escape_plot_kwargs = params["escape_plot_kwargs"]

## Neutralization at concentrations used for each selection
For each selection going into the average, plot the average fraction neutralization (probability of escape) of variants with different numbers of mutations, both for the censored values used to fit the models and the uncensored values.
Note the concentrations **not** used in the model fits are shown fainter and in a different shape:

In [5]:
mean_prob_escape_chart = (
    alt.Chart(prob_escape_means)
    .encode(
        x=alt.X("concentration", scale=alt.Scale(type="log")),
        y=alt.Y(
            "probability escape",
            scale=alt.Scale(type="symlog", constant=0.04),
        ),
        column=alt.Column(
            "censored",
            title=None,
            header=alt.Header(labelFontWeight="bold", labelFontSize=10),
        ),
        row=alt.Row(
            "selection",
            title=None,
            header=alt.Header(labelFontWeight="bold", labelFontSize=10),
        ),
        color=alt.Color("n_substitutions"),
        tooltip=[
            alt.Tooltip(c, format=".3g") if c == "probability escape" else c
            for c in prob_escape_means.columns
        ],
        shape=alt.Shape("use_in_fit", scale=alt.Scale(domain=[True, False])),
        opacity=alt.Opacity(
            "use_in_fit", scale=alt.Scale(domain=[True, False], range=[0.9, 0.3])
        ),
    )
    .mark_line(point=True, size=0.75, opacity=0.8)
    .properties(width=230, height=145)
    .configure_axis(grid=False)
    .configure_point(size=50)
)

mean_prob_escape_chart

## Average escape
First build a `PolyclonalAverage`:

In [6]:
avg_model = polyclonal.PolyclonalAverage(models_df)

print(f"Saving the average model to {avg_pickle_file}")
with open(avg_pickle_file, "wb") as f:
    pickle.dump(avg_model, f)

Saving the average model to results/antibody_escape/averages/Bat_EFNB2_Dimeric_polyclonal_model.pickle


Correlation of escape across different selections:

In [7]:
avg_model.mut_escape_corr_heatmap()

Neutralization curves against unmutated protein (which reflect the wildtype activities, Hill coefficients, and non-neutralizable fractions):

In [8]:
avg_model.curves_plot()

Site line plots for the site escape for each individual selection (model) in the average.
This makes it easier to tell if one selection is an outlier before we plot the full averages below, and how correlated the selections are.
Note the plot is interactive: you can mouseover points and change the site metric shown.

In [9]:
try:
    times_seen = escape_plot_kwargs["addtl_slider_stats"]["times_seen"]
except KeyError:
    times_seen = 1

print(f"Making plots for {times_seen=}")

per_selection_site_escape = (
    avg_model.mut_escape_site_summary_df_replicates(min_times_seen=times_seen)
    .melt(
        id_vars=["selection", "site", "wildtype", "epitope"],
        value_vars=["mean", "total positive", "total negative"],
        var_name="site statistic",
        value_name="site escape",
    )
    .merge(
        site_numbering_map[["site", "sequential_site"]].assign(
            site=lambda x: x["site"].astype(type(avg_model.sites[0]))
        ),
        validate="many_to_one",
    )
)

site_statistic_selection = alt.selection_point(
    fields=["site statistic"],
    bind=alt.binding_select(
        name="site statistic",
        options=per_selection_site_escape["site statistic"].unique(),
    ),
    value="mean",
)

site_selection = alt.selection_point(fields=["site"], on="mouseover", empty=False)

per_selection_site_escape_chart_base = (
    alt.Chart(per_selection_site_escape)
    .encode(
        x=alt.X(
            "site",
            sort=alt.SortField("sequential_site"),
            axis=alt.Axis(labelOverlap=True),
            scale=alt.Scale(nice=False, zero=False),
        ),
        y="site escape",
        color="epitope",
        tooltip=[
            "site",
            alt.Tooltip("site escape", format=".2f"),
        ],
    )
    .properties(width=800, height=85)
    .add_params(site_statistic_selection, site_selection)
    .transform_filter(site_statistic_selection)
)

per_selection_site_escape_chart_lines = per_selection_site_escape_chart_base.mark_line(
    size=0.75
)

per_selection_site_escape_chart_points = per_selection_site_escape_chart_base.encode(
    size=alt.condition(site_selection, alt.value(75), alt.value(30)),
    strokeWidth=alt.condition(site_selection, alt.value(2), alt.value(0)),
).mark_circle(filled=True, stroke="orange")

per_selection_escape_chart = (
    (per_selection_site_escape_chart_lines + per_selection_site_escape_chart_points)
    .facet(
        facet=alt.Facet(
            "selection",
            title=None,
            header=alt.Header(labelPadding=0),
        ),
        columns=1,
        spacing=5,
    )
    .configure_axis(grid=False)
)

per_selection_escape_chart

Making plots for times_seen=3


Plot and save the mutation-escape values for the average model:

In [10]:
# first build up arguments used to format plot
escape_plot_kwargs = params["escape_plot_kwargs"]
plot_hide_stats = params["plot_hide_stats"]

escape_plot_kwargs["df_to_merge"] = [
    site_numbering_map[["site", "sequential_site", "region"]]
]

if "addtl_slider_stats" not in escape_plot_kwargs:
    escape_plot_kwargs["addtl_slider_stats"] = {}
if "addtl_slider_stats_hide_not_filter" not in escape_plot_kwargs:
    escape_plot_kwargs["addtl_slider_stats_hide_not_filter"] = []

for stat, stat_d in plot_hide_stats.items():
    escape_plot_kwargs["addtl_slider_stats"][stat] = stat_d["init"]
    escape_plot_kwargs["addtl_slider_stats_hide_not_filter"].append(stat)
    escape_plot_kwargs["df_to_merge"].append(
        pd.read_csv(stat_d["csv"]).rename(columns={stat_d["csv_col"]: stat})[
            ["site", "mutant", stat]
        ]
    )

print(f"Writing escape values to {escape_csv}")
avg_model.mut_escape_df.to_csv(escape_csv, index=False, float_format="%.4g")

escape_chart = avg_model.mut_escape_plot(**escape_plot_kwargs)
print(f"Writing escape chart to {escape_html}")
escape_chart.save(escape_html)

display(escape_chart)

Writing escape values to results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_escape.csv


Writing escape chart to results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_escape_nolegend.html


Plot and save the predicted change in neutralization induced by each mutation:

In [11]:
icXX = params["icXX"]
print(f"Getting predicted changes in IC{icXX}")

icXX_col = f"IC{icXX}"
log_fold_change_icXX_col = f"log2 fold change {icXX_col}"

print(f"Writing changes in ICXX to {icXX_csv}")
avg_model.mut_icXX_df(
    x=icXX / 100.0,
    icXX_col=icXX_col,
    log_fold_change_icXX_col=log_fold_change_icXX_col,
).to_csv(icXX_csv, index=False, float_format="%.4g")

icXX_chart = avg_model.mut_icXX_plot(
    x=icXX / 100.0,
    icXX_col=icXX_col,
    log_fold_change_icXX_col=log_fold_change_icXX_col,
    **escape_plot_kwargs,
)
print(f"Writing ICXX chart to {icXX_html}")
icXX_chart.save(icXX_html)

display(icXX_chart)

Getting predicted changes in IC90
Writing changes in ICXX to results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_icXX.csv


Writing ICXX chart to results/antibody_escape/averages/Bat_EFNB2_Dimeric_mut_icXX_nolegend.html
