### Plot mean effects of mutations by region
Plot the effects of mutations in different regions of F

In [None]:
import pandas as pd
import altair as alt
import httpimport

_ = alt.data_transformers.disable_max_rows()

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
# read in data
#mean_effects = pd.read_csv("../../results/filtered_data/cell_entry/Nipah_F_func_effects_filtered_mean.csv")

# for snakemake
mean_effects = pd.read_csv(snakemake.input.entry_df)

In [None]:
# function to assign amino acid type
def assign_aa_type(site_num):
    if site_num in ["D", "E"]:
        return "Negative"
    elif site_num in ["K", "R", "H"]:
        return "Positive"
    elif site_num in ["Q", "N", "S", "T"]:
        return "Hydrophilic"
    elif site_num in ["A", "I", "L", "M", "V"]:
        return "Hydrophobic"
    elif site_num in ["Y", "W", "F"]:
        return "Aromatic"
    elif site_num in ["C", "G", "P"]:
        return "Special"
    else:
        return "Other"

In [None]:
# Assign regions based on site numbers
def assign_region(site_num):
    if 75 <= site_num <= 99:
        return "HRC"
    elif 100 <= site_num <= 109:
        return "Cleavage"
    elif 110 <= site_num <= 129:
        return "Fusion Peptide"
    elif 130 <= site_num <= 181:
        return "HRA"
    elif 182 <= site_num <= 215:
        return "Central helix"
    elif 452 <= site_num <= 481:
        return "HRB"
    else:
        return "Other"

# Apply the region assignment to the mean_effects DataFrame
mean_effects["region"] = mean_effects["site"].apply(assign_region)
mean_effects["wildtype_type"] = mean_effects["wildtype"].apply(assign_aa_type)

display(mean_effects)


### Plotting

In [None]:
# Define custom color scale for regions
custom_scale = alt.Scale(
    domain=["Cleavage", "Fusion Peptide", "HRA", "HRB", "HRC", "Central helix", "Other"],
    range=["#5778a4", "#e49444", "#d1615d", "#85b6b2", "#6a9f58", "#a87c9f", "#b8b0ac"],
)
region_order = ["Cleavage", "Fusion Peptide", "HRA", "HRB", "HRC", "Central helix", "Other"]

In [None]:
# make a boxplot and scatter plot of mean effects by protein domain
boxplot_chart = (
    alt.Chart(mean_effects)
    .mark_boxplot(extent="min-max", opacity=1)
    .encode(
        x=alt.X("region", title=None, sort=region_order),
        y=alt.Y("effect:Q", title=None),
        color=alt.Color("region:N", legend=None, scale=custom_scale),
    )
    .properties(
        width=alt.Step(25),
        height=200,
    )
)

display(boxplot_chart)
# Save the boxplot
boxplot_chart.save(snakemake.output.mean_effects_boxplot_png, ppi=300)
boxplot_chart.save(snakemake.output.mean_effects_boxplot_svg)

### Make a chart showing the mean effects of mutations by site

In [None]:
# plot bar chart showing the mean effect of mutations on cell entry across the protein
mean_protein_effects_by_domain = (
    alt.Chart(mean_effects)
    .mark_bar()
    .encode(
        x=alt.X(
            "site:N",
            title="Site",
            axis=alt.Axis(
                labelAngle=0,
                values=[100, 200, 300, 400],
                # tickCount=11,
                # grid=True,
            ),
        ),
        y=alt.Y("effect:Q", title=["Mean Effect of", "Mutations on Cell Entry"]),
        color=alt.Color("region", title="Protein Domain", scale=custom_scale),
        tooltip=["site", "effect", "region"],
    )
    .properties(
        width=alt.Step(2),
        height=150,
    )
)

display(mean_protein_effects_by_domain)


# save
mean_protein_effects_by_domain.save(snakemake.output.mean_effects_site_bar_png, ppi=300)
mean_protein_effects_by_domain.save(snakemake.output.mean_effects_site_bar_svg)


### Now plot ranked effects of mutations by region

In [None]:
# add column to make labels that contain wildtype residue for plotting
mean_effects["wildtype_site"] = mean_effects["wildtype"] + mean_effects["site"].astype(
    str
)

In [None]:
# function to make barplots of mean entry effects for specified regions
def mean_entry_ranked_barplots(df, region_name):
    subset_df = df[df["region"].isin(region_name)]

    barchart = (
        alt.Chart(subset_df)
        .mark_bar()
        .encode(
            y=alt.Y("wildtype_site:N", sort="-x", title="Site"),
            x=alt.X("effect", title="Mean entry score of mutations"),
            color=alt.Color(
                "wildtype_type:N",
                title="Unmutated AA type",
                # scale=alt.Scale(scheme="oranges", domain=[0, 1.5]),
            ),
        )
    ).properties(width=100, height=alt.Step(10))
    return barchart

# function to make barplots of mean entry effects for specified sites
def mean_entry_ranked_barplots_by_site(df, site_list):
    subset_df = df[df["site"].isin(site_list)]

    barchart = (
        alt.Chart(subset_df)
        .mark_bar()
        .encode(
            y=alt.Y("wildtype_site:N", sort="-x", title="Site"),
            x=alt.X("effect", title="Mean entry score of mutations"),
            color=alt.Color(
                "wildtype_type:N",
                title="Unmutated AA type",
                # scale=alt.Scale(scheme="oranges", domain=[0, 1.5]),
            ),
        )
    ).properties(width=100, height=alt.Step(10))
    return barchart


In [None]:
# make barplot for cleavage site mutations
cleavage_bar = mean_entry_ranked_barplots(mean_effects, ["Cleavage"])

cleavage_bar.save(snakemake.output.cleavage_bar_png, ppi=300)
cleavage_bar.save(snakemake.output.cleavage_bar_svg)


In [None]:
# make barplot for fusion peptide mutations
fusion_bar = mean_entry_ranked_barplots(mean_effects, ["Fusion Peptide"])
fusion_bar.display()
fusion_bar.save(snakemake.output.fusion_bar_png, ppi=300)
fusion_bar.save(snakemake.output.fusion_bar_svg)

In [None]:
# make barplots for HRA region split into two plots for better visualization
HRA_top_bar = mean_entry_ranked_barplots_by_site(mean_effects, list(range(129,182)))
HRA_top_bar.save(snakemake.output.HRA_top_bar_png, ppi=300)
HRA_top_bar.save(snakemake.output.HRA_top_bar_svg)

HRA_bot_bar = mean_entry_ranked_barplots_by_site(mean_effects, list(range(182,216)))
HRA_bot_bar.save(snakemake.output.HRA_bot_bar_png, ppi=300)
HRA_bot_bar.save(snakemake.output.HRA_bot_bar_svg)

In [None]:
# make barplot for HRB mutations
HRB_bar = mean_entry_ranked_barplots(mean_effects, ["HRB"])
HRB_bar.display()
HRB_bar.save(snakemake.output.HRB_bar_png, ppi=300)
HRB_bar.save(snakemake.output.HRB_bar_svg)

In [None]:
# make barplot for HRC mutations
HRC_bar = mean_entry_ranked_barplots(mean_effects, ["HRC"])
HRC_bar.display()
HRC_bar.save(snakemake.output.HRC_bar_png, ppi=300)
HRC_bar.save(snakemake.output.HRC_bar_svg)