### Plot effect of polymorphisms on entry and escape
Read in csv that contains all variable sites calculated from find_variable_sites_from_alignments and plot how each of those mutations affects entry and escape.

In [None]:
import pandas as pd
import altair as alt
import httpimport
import numpy as np

_ = alt.data_transformers.disable_max_rows()

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()

import_theme_new()

In [None]:
# Input data
DMS_data = pd.read_csv(snakemake.input.entry_df)

hendra_poly_df = pd.read_csv(snakemake.input.hendra_poly_df)
nipah_poly_df = pd.read_csv(snakemake.input.nipah_poly_df)

escape_df = pd.read_csv(snakemake.input.escape_df)

# Params
antibody_order = snakemake.params.antibody_order

In [None]:
# Merge polymorphism data with DMS functional effects
def merge_seq_polymorphisms_with_functional_effects(DMS_data, seq_polymorphism_data):
    seq_polymorphism_data = seq_polymorphism_data.query("site >= 29 and site <= 481")
    merged_df = pd.merge(
        seq_polymorphism_data, DMS_data, how="left", on=["site", "wildtype", "mutant"]
    )
    return merged_df


hendra_merged = merge_seq_polymorphisms_with_functional_effects(
    DMS_data, hendra_poly_df
)
nipah_merged = merge_seq_polymorphisms_with_functional_effects(DMS_data, nipah_poly_df)

combined = pd.concat(
    [hendra_poly_df.assign(virus="Hendra"), nipah_poly_df.assign(virus="Nipah")],
    ignore_index=True,
).query("site >= 29 and site <= 481")
display(combined)

In [None]:
# find how many rows have NaN values in the effect column
display(nipah_merged.sort_values('effect'))
display(hendra_merged.sort_values('effect'))

In [None]:
# make a bar chart of the distribution of functional effects for each hendra and nipah mutation
def make_cell_entry_bar_chart(df):
    bar = (
        alt.Chart(df)
        .mark_rect(fill="#b8b0ac", stroke="black", strokeWidth=1)
        .encode(
            x=alt.X(
                "x1:Q",
                title=["Effects of mutations", "on Nipah F cell entry"],
                scale=alt.Scale(domain=[-4, 1]),
                axis=alt.Axis(tickCount=4),
            ),
            x2="x2:Q",
            y=alt.Y("count()", title="Mutation count", axis=alt.Axis(tickCount=4)),
        )
        .transform_bin(["x1", "x2"], field="effect", bin=alt.Bin(step=0.1))
    )
    return bar


# make the chart for nipah
nipah_mut_histogram = make_cell_entry_bar_chart(nipah_merged)

# make the chart for hendra
hendra_mut_histogram = make_cell_entry_bar_chart(hendra_merged)

# Combine the two histograms into a single chart
combined_histogram = alt.hconcat(
    nipah_mut_histogram, hendra_mut_histogram
).resolve_scale(x="shared", y="shared")
combined_histogram.display()


In [None]:
# Save the chart
combined_histogram.save(snakemake.output.cell_entry_plot_svg)
combined_histogram.save(snakemake.output.cell_entry_plot_png, ppi=300)


### Antibody Escape
Now do the same thing above from the functional effects data but with the antibody escape data.

In [None]:
# setup dataframe with mutation column
escape_df["mutation"] = (
    escape_df["wildtype"].astype(str)
    + escape_df["site"].astype(str)
    + escape_df["mutant"]
)

merged_escape = pd.merge(
    combined,
    escape_df,
    how="outer",
    on=["site", "wildtype", "mutant"],
)
# check to see which mutations are missing from the escape data

# replace empty strings
merged_escape["virus"] = merged_escape["virus"].fillna("Not observed")
display(merged_escape)

In [None]:
# plot escape data
nipah_chart = (
    alt.Chart(merged_escape.query("escape_mean > 0 & virus != 'Not observed'"))
    .mark_point(filled=True, strokeWidth=0.5, stroke="black", size=75, opacity=1)
    .encode(
        x=alt.X(
            "virus:O",
            title=None,
            sort=["Not observed", "Nipah", "Hendra"],
            axis=alt.Axis(title=None, labels=False, ticks=True),
        ),
        y=alt.Y("escape_mean", title="Escape", axis=alt.Axis(tickCount=4, grid=True)),
        xOffset="jitter:Q",
        color=alt.Color(
            "virus:N",
            title=None,
            sort=["Not observed", "Nipah", "Hendra"],
            scale=alt.Scale(range=["#1C507B", "#F57F20"]),
        ),
        column=alt.Column(
            "antibody",
            title="Antibody",
            spacing=0,
            sort=antibody_order,
            header=alt.Header(
                titleOrient="bottom",
                labelOrient="bottom",
                titlePadding=0,
                labelPadding=5,
            ),
        ),
        tooltip=["antibody", "mutation", "escape_mean"],
    )
    .transform_calculate(jitter="sqrt(-2*log(random()))*cos(2*PI*random())")
    .properties(width=40, height=75)
)

display(nipah_chart)
nipah_chart.save('../../nipah_escape_polymorphisms.png', ppi=300)
nipah_chart.save('../../nipah_escape_polymorphisms.svg')

In [None]:
# Save the chart
nipah_chart.save(snakemake.output.escape_plot_svg)
nipah_chart.save(snakemake.output.escape_plot_png, ppi=300)