### bEFNB2 vs bEFNB3 correlations for F (this paper) and RBP (previous paper)
Read in DMS entry data, calculate correlations, and plot correlations with Altair

In [None]:
import pandas as pd
import altair as alt
import httpimport
import numpy as np
import scipy.stats

_ = alt.data_transformers.disable_max_rows()

In [None]:
# Import custom altair theme from remote github using httpimport module
def import_theme_new():
    with httpimport.github_repo("bblarsen-sci", "altair_themes", "main"):
        import main_theme

        @alt.theme.register("custom_theme", enable=True)
        def custom_theme():
            return main_theme.main_theme()


import_theme_new()

In [None]:
# read in F data
df_2 = pd.read_csv(snakemake.input.F_b2)
df_3 = pd.read_csv(snakemake.input.F_b3)

min_times_seen = snakemake.params.min_times_seen
max_effect_std = snakemake.params.max_effect_std

In [None]:
#df_2 = pd.read_csv('../../../results/func_effects/averages/Nipah-F-CHO-bEFNB2_func_effects.csv')
#df_3 = pd.read_csv('../../../results/func_effects/averages/Nipah-F-CHO-bEFNB3_func_effects.csv')

In [None]:
# merge ephrin b2 and b3 cell entry DMS data on site, wildtype, mutant
merged_effects = pd.merge(df_2, df_3, on=["site", "wildtype", "mutant"], suffixes=("_bEFNB2", "_bEFNB3"), how="outer")

# filter at least min_times_seen times in both data frames, and with effect std <= max_effect_std in both replicates
filtered_merged = merged_effects[
    (merged_effects['mutant'] != '*') &
    (merged_effects['times_seen_bEFNB2'] >= min_times_seen) &
    (merged_effects['times_seen_bEFNB3'] >= min_times_seen) &
    (merged_effects['effect_std_bEFNB2'] <= max_effect_std) &
    (merged_effects['effect_std_bEFNB3'] <= max_effect_std)
]

In [None]:
# function to plot correlation with pearson r value
def plot_corr_w_pearson_r(df, x, y, x_axis_title, y_axis_title, x_rvalue, y_rvalue, tooltip_list):
    tmp_df = df.copy()
    tmp_df = tmp_df.round(2)

    ##### calculate R value:
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
    tmp_df[x], tmp_df[y]
    )
    r_value = float(r_value)
    print(f"r_value: {r_value:.2f}")

    # Create a plot with Altair
    base = alt.Chart(tmp_df).encode(
        x=alt.X(x, title=x_axis_title),
        y=alt.Y(y, title=y_axis_title),
        tooltip=tooltip_list,
    )

    # Draw the circles
    chart = base.mark_circle(
        size=30,
        opacity=1,
        stroke='black',
        strokeWidth=0.5,
    ).encode()

    # Write the r value to text in upper left corner
    text = (
        alt.Chart(
            {
                "values": [
                    {
                        "x": x_rvalue,
                        "y": y_rvalue,
                        "text": f"r = {r_value:.2f}",
                    }
                ]
            }
        )
        .mark_text(
            dx=10,
            dy=0,
            align="left",
        )
        .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
    )
    combined_chart = chart + text
    combined_chart = combined_chart.properties(width=200, height=200)
    return combined_chart
    
x = "effect_bEFNB2"
y = "effect_bEFNB3"
x_axis_title = ["Mutation entry score", "into CHO-bEFNB2 cells"]
y_axis_title = ["Mutation entry score", "into CHO-bEFNB3 cells"]
tooltip_list = ['site','wildtype','mutant', x, y,'times_seen_bEFNB2','times_seen_bEFNB3']
x_rvalue = -4
y_rvalue = -0.1
corr_chart = plot_corr_w_pearson_r(filtered_merged, x, y, x_axis_title, y_axis_title, x_rvalue, y_rvalue, tooltip_list)
corr_chart.display()

In [None]:
# now plot correlation of average effects per site
filter_merged_agg = filtered_merged.groupby(['site']).agg(
    effect_bEFNB2_mean=('effect_bEFNB2', 'mean'),
    effect_bEFNB3_mean=('effect_bEFNB3', 'mean'),
).reset_index().round(2)

x="effect_bEFNB2_mean"
y="effect_bEFNB3_mean"
x_axis_title = ["Entry into CHO-bEFNB2 cells"]
y_axis_title = ["Entry into CHO-bEFNB3 cells"]
tooltip_list = ['site', x, y]
corr_chart_agg = plot_corr_w_pearson_r(filter_merged_agg, x, y, x_axis_title, y_axis_title, x_rvalue, y_rvalue, tooltip_list)
corr_chart_agg.display()

In [None]:
# display side by side and save
combined_chart = alt.hconcat(corr_chart, corr_chart_agg).resolve_scale(x='shared', y='shared')
display(combined_chart)
combined_chart.save(snakemake.output.F_corrs_plot_svg)
combined_chart.save(snakemake.output.F_corrs_plot_png, ppi=300)

### now do the exact same thing but with the RBP DMS data

In [None]:
# read in RBP data. This has already been filtered so don't need to do here
RBP_b2 = pd.read_csv(snakemake.input.RBP_b2)
RBP_b3 = pd.read_csv(snakemake.input.RBP_b3)

merged_RBP = pd.merge(RBP_b2, RBP_b3, on=["site", "wildtype", "mutant"], suffixes=("_bEFNB2", "_bEFNB3"), how="inner")
display(merged_RBP)

In [None]:
# plot correlation of RBP data
x = "effect_bEFNB2"
y = "effect_bEFNB3"
x_axis_title = ["RBP Mutation entry score", "into CHO-bEFNB2 cells"]
y_axis_title = ["RBP Mutation entry score", "into CHO-bEFNB3 cells"]
tooltip_list = [
    "site",
    "wildtype",
    "mutant",
    x,
    y,
    "times_seen_bEFNB2",
    "times_seen_bEFNB3",
]
x_rvalue = -4
y_rvalue = -0.1
corr_chart_RBP = plot_corr_w_pearson_r(
    merged_RBP, x, y, x_axis_title, y_axis_title, x_rvalue, y_rvalue, tooltip_list
)
corr_chart_RBP.display()


In [None]:
filter_merged_agg_RBP = (
    merged_RBP.groupby(["site"])
    .agg(
        effect_bEFNB2_mean=("effect_bEFNB2", "mean"),
        effect_bEFNB3_mean=("effect_bEFNB3", "mean"),
    )
    .reset_index()
    .round(2)
)

x = "effect_bEFNB2_mean"
y = "effect_bEFNB3_mean"
x_axis_title = ["Mean RBP mutation entry score", "into CHO-bEFNB2 cells"]
y_axis_title = ["Mean RBP mutation entry score", "into CHO-bEFNB3 cells"]
tooltip_list = ["site", x, y]
corr_chart_agg_RBP = plot_corr_w_pearson_r(
    filter_merged_agg_RBP,
    x,
    y,
    x_axis_title,
    y_axis_title,
    x_rvalue,
    y_rvalue,
    tooltip_list,
)
corr_chart_agg_RBP.display()


In [None]:
# display side by side and save
combined_chart_RBP = alt.hconcat(corr_chart_agg, corr_chart_agg_RBP).resolve_scale(
    x="shared", y="shared"
)
display(combined_chart_RBP)
combined_chart_RBP.save(snakemake.output.RBP_corrs_plot_svg)
combined_chart_RBP.save(snakemake.output.RBP_corrs_plot_png, ppi=300)