# Visualize receptor binding regions and distribution of scores for different GPC regions

In [None]:
# Imports
import os
import polyclonal
import pandas as pd
import altair as alt

# Plotting colors
# re-arranged for plot
tol_muted_adjusted = [
    "#AA4499",
    "#88CCEE",
    "#EE7733",
    "#117733", 
    "#999933", 
    "#1f78b4", 
    "#000000",
    "#DDDDDD",
    "#CC6677",
    "#44AA99",
    "#44AA99", 
    "#DDCC77", 
    "#882255",
    "#CC3311",
]

# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

In [None]:
# this cell is tagged as `parameters` for papermill parameterization
func_scores = None

min_times_seen = None
n_selections = None

html_dir = None
html_output = None

In [None]:
# # Uncomment for running interactive
# func_scores = "../results/func_effects/averages/293T_entry_func_effects.csv"

# min_times_seen = 2
# n_selections = 8

# html_dir = "../results/func_scores_distributions/"
# html_output = "../results/func_scores_distributions/func_scores_distributions.html"

In [None]:
# Filter functional scores based on min times seen and selection number
functional_scores = pd.read_csv(func_scores)

# Add dummy phenotype column
functional_scores["phenotype"] = "functional_effect"

# Rename effect column
functional_scores = functional_scores.rename(columns={"effect" : "effect on cell entry"})

In [None]:
# Plotting settings
alphabet = ['R','K','H','D','E','Q','N','S','T','Y','W','F','A','I','L','M','V','G','P','C','*']

addtl_tooltip_stats = [
    "n_selections",
    "times_seen",
]

addtl_slider_stats = {
    "times_seen" : 2,
    "n_selections" : 8,
}

## Heatmap of **alpha-dystroglycan** binding residues

In [None]:
# Alpha dystroglycan binding sites
DG_sites = [
    120,
    121,
    125,
    150, # supported by 151 and 125 interactions
    151,
    256,
    257,
    258,
]

DG_chart = polyclonal.plot.lineplot_and_heatmap(
    data_df=functional_scores,
    stat_col="effect on cell entry",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=False,
    init_site_statistic="mean",
    show_zoombar=False,
    show_lineplot=False,
    sites=DG_sites,
    plot_title="\u03B1-DG binding residues",
)

DG_chart

## Heatmap of **LAMP1** binding residues

In [None]:
# LAMP1 binding sites
LAMP1_sites = [
    92, # histidine triad
    93, # histidine triad
    172,
    173,
    188,
    192,
    195,
    197,
    198,
    200,
    201,
    202,
    204,
    206,
    207,
    211,
    216,
    230, # histidine triad
]

LAMP1_chart = polyclonal.plot.lineplot_and_heatmap(
    data_df=functional_scores,
    stat_col="effect on cell entry",
    category_col="phenotype",
    alphabet=alphabet,
    addtl_tooltip_stats=addtl_tooltip_stats,
    addtl_slider_stats=addtl_slider_stats,
    init_floor_at_zero=False,
    init_site_statistic="mean",
    show_zoombar=False,
    show_lineplot=False,
    sites=LAMP1_sites,
    plot_title="LAMP1 binding residues",
)

LAMP1_chart

## Distributions of functional scores for different regions

Plot the distribution of averaged functional scores per site (except stop codons) stratified by region of GPC to show more mutationally tolerant and intolerant regions.

In [None]:
# Filter functional scores minimum times seen and selections 
# and remove stop codons
functional_scores = (
    functional_scores.loc[
        (functional_scores["times_seen"] >= min_times_seen)
        &
        (functional_scores["n_selections"] >= n_selections)
        &
        (functional_scores["mutant"] != "*")
    ]
)
# Remove stop mutations and groupby site
functional_scores = (
    functional_scores
    .groupby("site")
    .aggregate({
        "wildtype" : "first",
        "effect on cell entry" : "mean",
    })
    .reset_index()
)

# Label regions 
functional_scores["region"] = (
    functional_scores.apply(
        lambda x: "SSP" if x["site"] <= 58 else ("GP1" if x["site"] <= 259 else "GP2"), axis=1
    )
)

# Add DG binding sites
DG_binding_sites = (
    functional_scores.loc[functional_scores["site"].isin(DG_sites)].copy()
)
DG_binding_sites["region"] = "\u03B1-DG binding sites"
functional_scores = (
    pd.concat([
        functional_scores, 
        DG_binding_sites,
    ], ignore_index = True)
)

# Add LAMP1 binding sites
LAMP1_binding_sites = (
    functional_scores.loc[functional_scores["site"].isin(LAMP1_sites)].copy()
)
LAMP1_binding_sites["region"] = "LAMP1 binding sites"
functional_scores = (
    pd.concat([
        functional_scores, 
        LAMP1_binding_sites,
    ], ignore_index = True)
)

# Add glycosylation sites
glycans = [79, 89, 99, 106, 119, 167, 224, 365, 373, 390, 395]
glycan_sites = (
    functional_scores.loc[functional_scores["site"].isin(glycans)].copy()
)
glycan_sites["region"] = "N-glycosylation sites"
functional_scores = (
    pd.concat([
        functional_scores, 
        glycan_sites,
    ], ignore_index = True)
)

In [None]:
# Plot score distrbutions for each region
distribution_plot = alt.Chart(
        functional_scores, title="Effect on cell entry for different GPC regions"
    ).mark_circle(opacity=0.35, size=75).encode(
    x=alt.X(
        "region:N",
        title="GPC region",
        sort=None,
        axis=alt.Axis(
            domainWidth=1,
        ),
    ),
    y=alt.Y(
        "effect on cell entry:Q",
        title="effect on cell entry",
        axis=alt.Axis(
            values=[-5,-4,-3,-2,-1,0,1],
            domainWidth=1,
        ),
    ),
    xOffset="jitter:Q",
    color=alt.Color(
        "region:N",
        scale=alt.Scale(
            domain=functional_scores["region"].unique().tolist(), 
            range=tol_muted_adjusted
        ),
    ).legend(None),
    tooltip=[
        "site",
        "wildtype",
        alt.Tooltip(
            "effect on cell entry", format=".2f", title="mean effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

# Plot median line
median_plot = alt.Chart(functional_scores).mark_tick(size=50, thickness=3).encode(
    x=alt.X(
        "region:N",
        title="GPC region",
        sort=None,
        axis=alt.Axis(
            domainWidth=1,
        ),
    ),
    y=alt.Y(
        "median(effect on cell entry):Q",
        axis=alt.Axis(
            values=[-5,-4,-3,-2,-1,0,1],
            domainWidth=1,
        ),
    ),
    tooltip=[
        "region:N",
        alt.Tooltip(
            "median(effect on cell entry):Q", format=".2f", title="median effect on cell entry",
        ),
    ],
    color=alt.Color("region:N", legend=None),
)

# Combine striplot and median
combined_plot = (
    (distribution_plot + median_plot)
    .configure_axis(
        grid=False,
        labelFontSize=16,
        titleFontSize=16,
        labelFontWeight="normal",
        titleFontWeight="normal",
    )
    .properties(
        width=400, 
        height=500,
    )
    .configure_title(
        fontSize=24,
    )
)

# Make output dir if doesn't exist
if not os.path.exists(html_dir):
    os.mkdir(html_dir)

print(f"Saving to {html_output}")
combined_plot.save(html_output)

combined_plot

Create **same** plot as above but reduce font sizes for a figure in a manuscript

In [None]:
# Plot score distrbutions for each region
distribution_plot = alt.Chart(
        functional_scores,
    ).mark_circle(opacity=0.35, size=11.25).encode(
    x=alt.X(
        "region:N",
        title="GPC region",
        sort=None,
        axis=alt.Axis(
            domainWidth=1,
        ),
    ),
    y=alt.Y(
        "effect on cell entry:Q",
        title="effect on cell entry",
        axis=alt.Axis(
            values=[-5,-4,-3,-2,-1,0,1],
            domainWidth=1,
        ),
    ),
    xOffset="jitter:Q",
    color=alt.Color(
        "region:N",
        scale=alt.Scale(
            domain=functional_scores["region"].unique().tolist(), 
            range=tol_muted_adjusted
        ),
    ).legend(None),
    tooltip=[
        "site",
        "wildtype",
        alt.Tooltip(
            "effect on cell entry", format=".2f", title="mean effect on cell entry"
        ),
    ],
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

# Plot median line
median_plot = alt.Chart(functional_scores).mark_tick(size=7.5, thickness=1).encode(
    x=alt.X(
        "region:N",
        title="GPC region",
        sort=None,
        axis=alt.Axis(
            domainWidth=1,
        ),
    ),
    y=alt.Y(
        "median(effect on cell entry):Q",
        axis=alt.Axis(
            values=[-5,-4,-3,-2,-1,0,1],
            domainWidth=1,
        ),
    ),
    tooltip=[
        "region:N",
        alt.Tooltip(
            "median(effect on cell entry):Q", format=".2f", title="median effect on cell entry",
        ),
    ],
    color=alt.Color("region:N", legend=None),
)

# Combine striplot and median
combined_plot = (
    (distribution_plot + median_plot)
    .configure_axis(
        grid=False,
        labelFontSize=8,
        titleFontSize=8,
        labelFontWeight="normal",
        titleFontWeight="normal",
    )
    .properties(
        width=60, 
        height=75,
    )
)

combined_plot