# Average mutation functional effects for an experiment

Import Python modules.
We use `polyclonal` for the plotting:

In [19]:
import altair as alt

import dms_variants.utils

import pandas as pd

import polyclonal.plot

This notebook is parameterized by `papermill`.
The next cell is tagged as `parameters` to get the passed parameters.

In [2]:
# this cell is tagged parameters for `papermill` parameterization
condition = None
site_numbering_map_csv = None
func_effects_csv = None
html = None
params = None

In [3]:
# Parameters
params = {
    "avg_method": "median",
    "legend": "Some text\n",
    "plot_kwargs": {
        "addtl_slider_stats": {"times_seen": 3},
        "heatmap_max_at_least": 1,
        "heatmap_min_at_least": -1,
        "init_floor_at_zero": False,
        "init_site_statistic": "mean",
        "site_zoom_bar_color_col": "region",
    },
    "selections": [
        "LibA-220210-293T_ACE2-1",
        "LibA-220210-293T_ACE2-2",
        "LibA-220302-293T_ACE2-1",
        "LibA-220302-293T_ACE2-2",
        "LibB-220302-293T_ACE2-1",
    ],
    "title": "Mutation effects on entry into 293T-ACE2 cells",
}
condition = "293T_ACE2_entry"
site_numbering_map_csv = "data/site_numbering_map.csv"
func_effects_csv = "results/func_effects/averages/293T_ACE2_entry_func_effects.csv"
html = "results/func_effects/averages/293T_ACE2_entry_func_effects_unformatted.html"

In [4]:
import os
os.chdir("../test_example")

Read the input data:

In [5]:
site_numbering_map = pd.read_csv(site_numbering_map_csv)

func_effects = pd.concat(
    [
        pd.read_csv(f"results/func_effects/by_selection/{s}_func_effects.csv").assign(
            selection=s
        )
        for s in params["selections"]
    ],
    ignore_index=True,
)

func_effects

Unnamed: 0,wildtype,site,mutant,times_seen,latent_phenotype_effect,functional_effect,selection
0,M,1,I,1.0,-0.3161,-0.2729,LibA-220210-293T_ACE2-1
1,M,1,K,1.0,-5.5970,-7.7060,LibA-220210-293T_ACE2-1
2,M,1,L,1.0,-2.8040,-4.7680,LibA-220210-293T_ACE2-1
3,M,1,M,,0.0000,0.0000,LibA-220210-293T_ACE2-1
4,M,1,V,3.0,-6.3400,-7.8620,LibA-220210-293T_ACE2-1
...,...,...,...,...,...,...,...
30898,T,998,S,2.0,-0.7729,-0.5625,LibB-220302-293T_ACE2-1
30899,T,998,T,,0.0000,0.0000,LibB-220302-293T_ACE2-1
30900,G,999,C,3.0,-0.2492,-0.1535,LibB-220302-293T_ACE2-1
30901,G,999,G,,0.0000,0.0000,LibB-220302-293T_ACE2-1


## Correlations among selections
C

In [30]:
# We compute for several times seen values, get those:
try:
    init_times_seen = params["plot_kwargs"]["addtl_slider_stats"]["times_seen"]
except KeyError:
    print("No times seen in params, using a value of 3")
    init_times_seen = 3

func_effects_tidy = (
    func_effects
    .assign(mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"])
    .melt(
        id_vars=["selection", "mutation", "times_seen"],
        value_vars=["latent_phenotype_effect", "functional_effect"],
        var_name="phenotype",
        value_name="effect",
    )
)

# do analysise for each "times_seen"
func_effects_tidy = pd.concat(
    [
        func_effects_tidy.query("times_seen >= @t").assign(min_times_seen=t)
        for t in [1, init_times_seen, 2 * init_times_seen]
    ]
)
     

corrs = (
    dms_variants.utils.tidy_to_corr(
        df=func_effects_tidy,
        sample_col="selection",
        label_col="mutation",
        value_col="effect",
        group_cols=["phenotype", "min_times_seen"],
    )
    .assign(r2=lambda x: x["correlation"]**2)
    .drop(columns="correlation")
    .assign(
        min_times_seen=lambda x: "min times seen " + x["min_times_seen"].astype(str)
    )
)

for phenotype, phenotype_corr in corrs.groupby("phenotype"):
    corr_chart = (
        alt.Chart(phenotype_corr)
        .encode(
            alt.X("selection_1", title=None),
            alt.Y("selection_2", title=None),
            column=alt.Column("min_times_seen", title=None),
            color=alt.Color("r2", scale=alt.Scale(zero=True)),
            tooltip=[
                alt.Tooltip(c, format=".3g") if c == "r2" else c
                for c in ["phenotype", "selection_1", "selection_2", "r2"]
            ],
        )
        .mark_rect(stroke="black")
        .properties(width=alt.Step(15), height=alt.Step(15), title=phenotype)
        .configure_axis(labelLimit=500)
    )

    display(corr_chart)

Unnamed: 0,phenotype,min_times_seen,selection_1,selection_2,r2
0,functional_effect,min times seen 1,LibA-220210-293T_ACE2-1,LibA-220210-293T_ACE2-1,1.0
1,functional_effect,min times seen 1,LibA-220210-293T_ACE2-2,LibA-220210-293T_ACE2-1,0.433534
2,functional_effect,min times seen 1,LibA-220302-293T_ACE2-1,LibA-220210-293T_ACE2-1,0.639929
3,functional_effect,min times seen 1,LibA-220302-293T_ACE2-2,LibA-220210-293T_ACE2-1,0.420645
4,functional_effect,min times seen 1,LibB-220302-293T_ACE2-1,LibA-220210-293T_ACE2-1,0.043495
