# Compare mutation effects on binding to human versus mouse Mxra8

In [1]:
import altair as alt

import pandas as pd

_ = alt.data_transformers.disable_max_rows()

In [2]:
# this cell is tagged parameters for `papermill` parameterization

entry_293T_human_Mxra8 = None
binding_human_Mxra8 = None
binding_mouse_Mxra8 = None
corr_chart_html = None
min_entry_293T_human_Mxra8 = None
min_entry_293T_human_Mxra8_std = None
min_mouse_Mxra8_binding_std = None
min_human_Mxra8_binding_std = None
min_times_seen = None

In [3]:
# Parameters
min_entry_293T_human_Mxra8 = -3
min_entry_293T_human_Mxra8_std = 2.25
min_mouse_Mxra8_binding_std = 2.25
min_human_Mxra8_binding_std = 2.5
min_times_seen = 2
entry_293T_human_Mxra8 = (
    "results/func_effects/averages/293T-Mxra8_entry_func_effects.csv"
)
binding_human_Mxra8 = "results/receptor_affinity/averages/human_Mxra8_mut_effect.csv"
binding_mouse_Mxra8 = "results/receptor_affinity/averages/mouse_Mxra8_mut_effect.csv"
addtl_annotations = "data/addtl_site_annotations.csv"
corr_chart_html = "results/compare_human_mouse_mxra8_binding.html"

import os
os.chdir("../")

## Read the data

In [20]:
# read the data

print(f"Reading cell entry from {entry_293T_human_Mxra8=}")
data_df = (
    pd.read_csv(entry_293T_human_Mxra8)
    .query("times_seen >= @min_times_seen")
    .query("effect_std <= @min_entry_293T_human_Mxra8_std")
    .assign(mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"])
    [["site", "wildtype", "mutant", "mutation", "effect"]]
    .rename(columns={"effect": "entry_293T_human_Mxra8"})
)

for name, f, std in [
    ("binding_human_Mxra8", binding_human_Mxra8, min_human_Mxra8_binding_std),
    ("binding_mouse_Mxra8", binding_mouse_Mxra8, min_mouse_Mxra8_binding_std),
]:
    print(f"Reading {name=} from {f=}")
    bind_df = (
        pd.read_csv(f)
        .query("times_seen >= @min_times_seen")
        .query("frac_models == 1")
        .query("`Mxra8 binding_std` <= @std")
        .rename(columns={"Mxra8 binding_median": name})
    )
    bind_rep_cols = bind_df.columns[11: ].tolist()
    bind_df = (
        bind_df
        .assign(
            label=lambda x: x.apply(
                lambda r: f"{r[name]:.2f} ({', '.join(str(round(r[c], 2)) for c in bind_rep_cols)})",
                axis=1,
            )
        )
        .rename(columns={"label": f"{name}_label"})
        [["mutation", name, f"{name}_label"]]
    )
    data_df = data_df.merge(
        bind_df, how="left", on=["mutation"], validate="1:1"
    )

print(f"Adding site annotations from {addtl_annotations=}")
data_df = data_df.merge(
    (
        pd.read_csv(addtl_annotations)
        .assign(site=lambda x: x["protein_site"].astype(str) + "(" + x["region"] + ")")
        [["site", "sequential_site", "region", "domain", "contacts"]]
        .rename(columns={"contacts": "Mxra8_contact"})
    ),
    on=["site"],
    validate="many_to_one",
    how="left",
)

print(f"Trimming {len(data_df)=} to only mutations with a binding measurement")
data_df = (
    data_df
    .query("binding_mouse_Mxra8.notnull() or binding_human_Mxra8.notnull()")
    .sort_values(["sequential_site", "mutation"])
    .reset_index(drop=True)
)
print(f"Trimmed to {len(data_df)=} mutations with a binding measurement")

Reading cell entry from entry_293T_human_Mxra8='results/func_effects/averages/293T-Mxra8_entry_func_effects.csv'
Reading name='binding_human_Mxra8' from f='results/receptor_affinity/averages/human_Mxra8_mut_effect.csv'
Reading name='binding_mouse_Mxra8' from f='results/receptor_affinity/averages/mouse_Mxra8_mut_effect.csv'
Adding site annotations from addtl_annotations='data/addtl_site_annotations.csv'
Trimming len(data_df)=18962 to only mutations with a binding measurement
Trimmed to len(data_df)=12731 mutations with a binding measurement


## Simple correlation of Mxra8 binding in the different cells

In [21]:
# plot the data

site_selection = alt.selection_point(on="mouseover", empty=False, fields=["site"])

mut_selection = alt.selection_point(on="mouseover", empty=False, fields=["mutation"])

min_entry_slider = alt.param(
    name="min_entry_slider",
    bind=alt.binding_range(
        min=data_df["entry_293T_human_Mxra8"].min(),
        max=0,
        name="minimum entry in 293T-human-Mxra8 cells",
    ),
    value=min_entry_293T_human_Mxra8,
)

corr_chart = (
    alt.Chart(data_df)
    .add_params(site_selection, mut_selection, min_entry_slider)
    .transform_filter(alt.datum["entry_293T_human_Mxra8"] >= min_entry_slider)
    .encode(
        alt.X("binding_human_Mxra8", title="binding human Mxra8"),
        alt.Y("binding_mouse_Mxra8", title="binding mouse Mxra8"),
        color=alt.condition(site_selection, alt.value("red"), alt.value("gray")),
        opacity=alt.condition(site_selection, alt.value(0.9), alt.value(0.15)),
        size=alt.condition(site_selection, alt.value(75), alt.value(55)),
        strokeWidth=alt.condition(mut_selection, alt.value(3), alt.value(0.7)),
        tooltip=[
            "mutation",
            alt.Tooltip("entry_293T_human_Mxra8", format=".2f", title="entry 293T-human-Mxra8"),
            alt.Tooltip("binding_human_Mxra8_label", title="binding human-Mxra8"),
            alt.Tooltip("binding_mouse_Mxra8_label", title="binding mouse-Mxra8"),
        ],
    )
    .mark_circle(stroke="black")
    .properties(width=300, height=300, title="binding to human vs mouse Mxra8")
    .configure_title(fontSize=18)
    .configure_axis(grid=False, titleFontSize=16, labelFontSize=12)
)

print(f"Saving to {corr_chart_html=}")
corr_chart.save(corr_chart_html)

corr_chart

Saving to corr_chart_html='results/compare_human_mouse_mxra8_binding.html'


## Plot sites where mutations have greatest effect

In [22]:
data_df

Unnamed: 0,site,wildtype,mutant,mutation,entry_293T_human_Mxra8,binding_human_Mxra8,binding_human_Mxra8_label,binding_mouse_Mxra8,binding_mouse_Mxra8_label,sequential_site,region,domain,Mxra8_contact
0,1(E3),S,A,S1(E3)A,-1.0250,0.04762,"0.05 (0.06, 0.03)",-0.11910,"-0.12 (-0.06, -0.18)",2.0,E3,E3,no
1,1(E3),S,C,S1(E3)C,-0.7132,-0.73310,"-0.73 (-0.61, -0.85)",-0.21170,"-0.21 (-0.44, 0.01)",2.0,E3,E3,no
2,1(E3),S,D,S1(E3)D,0.1852,-0.21540,"-0.22 (-0.21, -0.22)",0.02613,"0.03 (0.02, 0.04)",2.0,E3,E3,no
3,1(E3),S,E,S1(E3)E,0.3038,0.11340,"0.11 (-0.15, 0.37)",-0.33940,"-0.34 (-0.63, -0.05)",2.0,E3,E3,no
4,1(E3),S,F,S1(E3)F,0.2265,-0.45480,"-0.45 (-0.54, -0.37)",0.19810,"0.20 (0.12, 0.27)",2.0,E3,E3,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12726,439(E1),H,T,H439(E1)T,-0.6526,0.32660,"0.33 (0.31, 0.34)",-0.02852,"-0.03 (-0.16, 0.1)",988.0,E1,E1-cytoplasmic,no
12727,439(E1),H,W,H439(E1)W,-0.2051,-0.28620,"-0.29 (-0.64, 0.07)",0.23070,"0.23 (-0.03, 0.49)",988.0,E1,E1-cytoplasmic,no
12728,439(E1),H,Y,H439(E1)Y,-0.2293,-0.24560,"-0.25 (-0.29, -0.2)",-0.01344,"-0.01 (-0.12, 0.1)",988.0,E1,E1-cytoplasmic,no
12729,440(E1),*,Q,*440(E1)Q,-3.3990,-1.51300,"-1.51 (-2.55, -0.48)",0.13000,"0.13 (-0.02, 0.28)",,,,
