## Analyze stability effects

In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
# read in structure mapping
site_map = pd.read_csv('../data/site_numbering_map.csv')
site_map.head()

Unnamed: 0,sequential_site,reference_site,sequential_wt,region,rbs_region
0,1,1,Q,HA1,outside RBS
1,2,2,K,HA1,outside RBS
2,3,3,I,HA1,outside RBS
3,4,4,P,HA1,outside RBS
4,5,5,G,HA1,outside RBS


In [4]:
# read in data
stability_data = pd.read_csv('../results/stability/averages/stability_mut_effect.csv')
print(f'There are {len(stability_data)} stability measurements.')

stability_data.head()

There are 7373 stability measurements.


Unnamed: 0,epitope,site,wildtype,mutant,mutation,stability_mean,stability_median,stability_std,n_models,times_seen,frac_models,LibA-240928-pH,LibB-240928-pH
0,1,1,Q,A,Q1A,0.004237,0.004237,0.04109,2,5.5,1.0,0.03329,-0.02481
1,1,1,Q,C,Q1C,-0.0143,-0.0143,0.01123,2,4.5,1.0,-0.006359,-0.02224
2,1,1,Q,D,Q1D,-0.0219,-0.0219,0.007839,2,5.0,1.0,-0.02744,-0.01636
3,1,1,Q,E,Q1E,0.00689,0.00689,0.01096,2,7.0,1.0,0.01464,-0.000862
4,1,1,Q,F,Q1F,-0.001402,-0.001402,0.006532,2,6.5,1.0,0.003217,-0.006021


In [5]:
func_data = pd.read_csv('../results/func_effects/averages/MDCKSIAT1_entry_func_effects.csv')
print(f'There are {len(func_data)} cell entry measurements.')

func_data.head()

There are 10401 cell entry measurements.


Unnamed: 0,site,wildtype,mutant,effect,effect_std,times_seen,n_selections
0,1,Q,*,-4.945,0.0,16.25,4
1,1,Q,A,-0.1226,0.2296,7.5,4
2,1,Q,C,-0.5732,0.5667,5.75,4
3,1,Q,D,0.255,0.3448,6.5,4
4,1,Q,E,0.2941,0.0502,9.0,4


In [8]:
combined_data = pd.merge( 
    stability_data,
    func_data,
    on=['site', 'wildtype', 'mutant'], 
    how='inner',
    suffixes=('_stability', '_func')
).query(
    'times_seen_stability >= 2 and effect >= -3 and n_models >= 2'
)
combined_data.head()

Unnamed: 0,epitope,site,wildtype,mutant,mutation,stability_mean,stability_median,stability_std,n_models,times_seen_stability,frac_models,LibA-240928-pH,LibB-240928-pH,effect,effect_std,times_seen_func,n_selections
0,1,1,Q,A,Q1A,0.004237,0.004237,0.04109,2,5.5,1.0,0.03329,-0.02481,-0.1226,0.2296,7.5,4
1,1,1,Q,C,Q1C,-0.0143,-0.0143,0.01123,2,4.5,1.0,-0.006359,-0.02224,-0.5732,0.5667,5.75,4
2,1,1,Q,D,Q1D,-0.0219,-0.0219,0.007839,2,5.0,1.0,-0.02744,-0.01636,0.255,0.3448,6.5,4
3,1,1,Q,E,Q1E,0.00689,0.00689,0.01096,2,7.0,1.0,0.01464,-0.000862,0.2941,0.0502,9.0,4
4,1,1,Q,F,Q1F,-0.001402,-0.001402,0.006532,2,6.5,1.0,0.003217,-0.006021,-0.7141,0.6042,7.0,4


In [9]:
combined_data_ann = pd.merge(
    combined_data,
    site_map,
    left_on=['site', 'wildtype'], 
    right_on=['reference_site', 'sequential_wt'], 
).drop(
    columns=['sequential_site', 'reference_site', 'sequential_wt']
)
combined_data_ann.head()

Unnamed: 0,epitope,site,wildtype,mutant,mutation,stability_mean,stability_median,stability_std,n_models,times_seen_stability,frac_models,LibA-240928-pH,LibB-240928-pH,effect,effect_std,times_seen_func,n_selections,region,rbs_region
0,1,1,Q,A,Q1A,0.004237,0.004237,0.04109,2,5.5,1.0,0.03329,-0.02481,-0.1226,0.2296,7.5,4,HA1,outside RBS
1,1,1,Q,C,Q1C,-0.0143,-0.0143,0.01123,2,4.5,1.0,-0.006359,-0.02224,-0.5732,0.5667,5.75,4,HA1,outside RBS
2,1,1,Q,D,Q1D,-0.0219,-0.0219,0.007839,2,5.0,1.0,-0.02744,-0.01636,0.255,0.3448,6.5,4,HA1,outside RBS
3,1,1,Q,E,Q1E,0.00689,0.00689,0.01096,2,7.0,1.0,0.01464,-0.000862,0.2941,0.0502,9.0,4,HA1,outside RBS
4,1,1,Q,F,Q1F,-0.001402,-0.001402,0.006532,2,6.5,1.0,0.003217,-0.006021,-0.7141,0.6042,7.0,4,HA1,outside RBS


### Mean stability effects across HA

In [13]:
mean_df = combined_data_ann.assign(
    mean_stability=combined_data_ann.groupby('site')['stability_mean'].transform('mean')
)[['site', 'wildtype', 'mean_stability', 'region', 'rbs_region']].drop_duplicates()

site_to_i = {site: i for i, site in enumerate(mean_df['site'].unique())}
mean_df = mean_df.assign(_stat_site_order=lambda x: x["site"].map(site_to_i))

chart = (
    alt.Chart(mean_df)
        .mark_line(opacity=1, stroke='#58593F', size=1.5)
        .encode(
            alt.X(
                "site:O",
                sort=alt.EncodingSortField(field="_stat_site_order", order="ascending"), 
                title='Site',
                axis=alt.Axis(
                    labelAngle=0,
                    values=[100, 200, 300, 400, 500],
                    tickCount=5,
                    titleFontSize=16,
                    titleFontWeight='normal',
                    labelFontSize=16,
                    labelFontWeight='normal',
                    grid=True
                )
            ),
            alt.Y(
                "mean_stability:Q", 
                title=["Mean effect on", "stability"],
                scale=alt.Scale(domain=[-1, 0.25]),
                axis=alt.Axis(
                    grid=False,
                    values=[-1, -0.5, 0],
                    titleFontSize=16,
                    titleFontWeight='normal',
                    labelFontSize=16,
                    labelFontWeight='normal',
                )
            ),
            tooltip=['wildtype', "site", "mean_stability", "region"],
        )
).properties(
    width=400,
    height=100
)
chart.display()