### Antibody interface
Reads in antibody escape, residue distance, and interface data, and generates plots

In [None]:
import pandas as pd
import altair as alt
import httpimport

_ = alt.data_transformers.disable_max_rows()

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
entry_df = pd.read_csv(snakemake.input.entry_df)
escape_df_mean = pd.read_csv(snakemake.input.escape_df_mean)
escape_df = pd.read_csv(snakemake.input.escape_df)  
distances_df = pd.read_csv(snakemake.input.distances_df)
interface_df = pd.read_csv(snakemake.input.interface_df)

antibody_order = snakemake.params.antibody_order
min_dist_cutoff = snakemake.params.min_distance

In [None]:
# merge escape dataframe to the entry dataframe
merged_tmp = pd.merge(
    entry_df,
    escape_df_mean,
    on=['site','wildtype'],
    how='left'
)

# merge the distances to the merged entry and escape dataframe
merged_all = pd.merge(
    merged_tmp,
    distances_df,
    on=['site','antibody'],
    how='left'
).rename(columns={
    'wildtype_x': 'wildtype'}).drop(columns=['wildtype_y'])

In [None]:
# plot correlation between escape and distance to closest antibody residue and color by cell entry
escape_vs_dist_chart = (
    alt.Chart(merged_all.query("min_distance < 15 and mean_escape > 0"))
    .mark_circle(size=60, stroke="black", strokeWidth=0.75, opacity=1)
    .encode(
        x=alt.X(
            "min_distance",
            title=["Distance to closest", "antibody residue (Å)"],
            axis=alt.Axis(grid=True),
        ),
        y=alt.Y("mean_escape", title="Escape", axis=alt.Axis(grid=True)),
        column=alt.Column("antibody:N", title=None, sort=antibody_order),
    )
    .properties(width=200, height=200)
)

escape_vs_dist_chart.display()

In [None]:
# save the chart
escape_vs_dist_chart.save(snakemake.output.escape_vs_dist, ppi=300)
escape_vs_dist_chart.save(snakemake.output.escape_vs_dist_svg)

In [None]:
# now examine the effects of mutations that are directly in the antibody footprint

# first get the unique antibodies in the escape data
unique_antibodies_list = escape_df_mean["antibody"].unique().tolist()
print(f"Unique antibodies in escape data: {unique_antibodies_list}")

# iterate through each antibody and get the sites that are within the min_dist_cutoff
# and then get the effects of those sites from the entry dataframe
# this will create a new dataframe with the effects of the close sites for each antibody
# then merge this dataframe with the escape dataframe to get the escape effects for those sites
empty = []
for antibody in unique_antibodies_list:
    tmp_df = distances_df.query(
        f'antibody == "{antibody}" and min_distance < {min_dist_cutoff}'
    )
    close_sites = tmp_df["site"].unique().tolist()
    tmp_effect_df = entry_df[entry_df["site"].isin(close_sites)].assign(
        antibody=antibody
    )
    empty.append(tmp_effect_df)
close_sites_effects_df = pd.concat(empty, ignore_index=True)

# merge the close sites effects dataframe with the escape dataframe to get the escape effects for those sites
display(close_sites_effects_df)
merged_close_sites = pd.merge(
    close_sites_effects_df,
    escape_df_mean,
    on=["site", "antibody", "wildtype"],
    how="left",
)
display(merged_close_sites)


In [None]:
# make a boxplot of the effects of the close sites for each antibody
chart_boxplot = alt.Chart(merged_close_sites).mark_boxplot(extent="min-max", opacity=1, color="#B8B0AC").encode(
    x=alt.X("antibody:N", title=None, sort=antibody_order),
    y=alt.Y("effect:Q", title="cell entry in mAb footprint"),
    color=alt.Color("antibody:N", legend=None, sort=antibody_order)
).properties(
    width=alt.Step(25), height=150
)
chart_boxplot.display()


In [None]:
# save the chart
chart_boxplot.save(snakemake.output.interface_mean_entry, ppi=300)
chart_boxplot.save(snakemake.output.interface_mean_entry_svg)

In [None]:
### Now compare buried surface area to mean escape and plot
df_mean_escape = (
    escape_df.groupby(["antibody", "site", "wildtype"])
    .agg(mean_escape=("escape_mean", "mean"), mean_entry=("effect", "mean"))
    .reset_index()
    .round(2)
)
display(df_mean_escape.head(3))

# Merge df_mean_escape with interface_df, filter, fill NaN values, and calculate r-values
df_merged = pd.merge(interface_df, df_mean_escape, on=["antibody", "site"], how="outer")
df_filter = df_merged.query(
    "mean_escape > 0"
).copy()  # to avoid setting with a copy warning
df_filter[["BSA"]] = df_filter[["BSA"]].fillna(0.01)

# combine heavy and light chain interface data by summing BSA and taking the first value of mean_escape, mean_entry, and hydrogen_bond
combined_df = (
    df_filter.groupby(["antibody", "site", "wildtype"])
    .agg(
        mean_escape=("mean_escape", "first"),
        mean_entry=("mean_entry", "first"),
        sum_BSA=("BSA", "sum"),
        hydrogen_bond=("hydrogen_bond", "first"),
    )
    .reset_index()
)
display(combined_df.head(3))


In [None]:
# make scatter plots of BSA vs mean escape for each antibody

empty_charts = []

for antibody in unique_antibodies_list:
    chart_mean_escape = (
        alt.Chart(combined_df.query("antibody == @antibody"))
        .mark_point(size=125, filled=True, opacity=1, stroke="black", strokeWidth=1)
        .encode(
            y=alt.Y(
                "mean_escape:Q",  axis=alt.Axis(tickCount=2, labels=True)
            ),
            x=alt.X("sum_BSA",  axis=alt.Axis(tickCount=2, labels=True)),
            color=alt.Color("hydrogen_bond:N", title="Bond type w/ antibody"),
            tooltip=["antibody","site"],
        )
        .properties(width=150, height=150)
    )

    empty_charts.append(chart_mean_escape.properties(title=antibody))

chart_bsa_vs_escape = alt.hconcat(*empty_charts).resolve_scale(y="shared", x="shared")
chart_bsa_vs_escape.display()

In [None]:
chart_bsa_vs_escape.save(snakemake.output.interface_bsa_bonds, ppi=300)
chart_bsa_vs_escape.save(snakemake.output.interface_bsa_bonds_svg)