In [1]:
import pandas as pd 
import altair as alt
import numpy as np
import sys

sys.path.append('../analysis/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
stability_validation = pd.read_csv('data/250430_stability.csv')
stability_validation.head()

Unnamed: 0,virus,replicate,pH_treatment,GFP_fraction_positive,GFP_fraction_retained
0,S205Y,1,neutral,28.1,1.0
1,S205Y,1,5.7,0.091,0.003
2,S205Y,1,5.5,0.026,0.001
3,S205Y,1,5.3,0.041,0.001
4,R220T,1,neutral,22.6,1.0


In [3]:
stability_data = pd.read_csv('../results/stability/averages/stability_mut_effect.csv').assign(
    mutation = lambda x: x['wildtype'] + x['site'].astype(str) + x['mutant']
)
stability_data.head()

Unnamed: 0,epitope,site,wildtype,mutant,mutation,stability_mean,stability_median,stability_std,n_models,times_seen,frac_models,LibA-240928-pH,LibB-240928-pH
0,1,1,Q,A,Q1A,0.004237,0.004237,0.04109,2,5.5,1.0,0.03329,-0.02481
1,1,1,Q,C,Q1C,-0.0143,-0.0143,0.01123,2,4.5,1.0,-0.006359,-0.02224
2,1,1,Q,D,Q1D,-0.0219,-0.0219,0.007839,2,5.0,1.0,-0.02744,-0.01636
3,1,1,Q,E,Q1E,0.00689,0.00689,0.01096,2,7.0,1.0,0.01464,-0.000862
4,1,1,Q,F,Q1F,-0.001402,-0.001402,0.006532,2,6.5,1.0,0.003217,-0.006021


In [4]:
summary = (
    stability_validation.groupby(['virus', 'pH_treatment'])
    .agg(
        mean_infectivity_retained=('GFP_fraction_retained', 'mean'),
        se=('GFP_fraction_retained', lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
    )
    .reset_index()
).merge(
    stability_data[['mutation', 'stability_mean']], 
    left_on='virus', 
    right_on='mutation', 
    how='left'
)

# Calculate lower and upper bounds for error bars (mean ± 1SE)
summary['lower'] = summary['mean_infectivity_retained'] - summary['se']
summary['upper'] = summary['mean_infectivity_retained'] + summary['se']

summary['stability_mean'] = summary['stability_mean'].fillna(0) # for wildtype
summary['mean_infectivity_retained'] = summary['mean_infectivity_retained'].clip(lower=1e-2)
summary['lower'] = summary['lower'].clip(lower=1e-2)
summary['upper'] = summary['upper'].clip(lower=1e-2)

summary.head()

Unnamed: 0,virus,pH_treatment,mean_infectivity_retained,se,mutation,stability_mean,lower,upper
0,G404R,5.3,0.01,0.0,G404R,-0.3386,0.01,0.01
1,G404R,5.5,0.01,0.0015,G404R,-0.3386,0.01,0.01
2,G404R,5.7,1.375,0.077,G404R,-0.3386,1.298,1.452
3,G404R,neutral,1.0,0.0,G404R,-0.3386,1.0,1.0
4,K189E,5.3,0.03,0.002,K189E,-0.004046,0.028,0.032


In [5]:
lineplot = alt.Chart(summary).mark_line(
    color='#80B1D3', 
    size=2,
    point={"filled": True, "size": 80, "color": '#80B1D3', "stroke": 'black', "strokeWidth": 0.5}
).encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3']
    ),
    y=alt.Y(
        'mean_infectivity_retained',
        title=['Fraction infectivity', 'retained'],
        scale=alt.Scale(type='log', domain=[0.01, 2])
    ),
    color=alt.Color(
        legend=None
    )
).properties(
    height=200,
    width=200
)

# error bars: mean ± 1SE
error_bars = alt.Chart(summary).mark_errorbar().encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3']
    ),
    y=alt.Y(
        'lower:Q', 
        title=['Fraction infectivity', 'retained'],
        scale=alt.Scale(type='log', domain=[0.01, 2])
    ),
    y2='upper:Q',
)

(lineplot + error_bars).properties(
    width=90,
    height=120
).facet(
    facet=alt.Facet(
        'virus', 
        sort=['N165H', 'S205Y', 'R220T', 'R229I', 'S107E', 'G404R', 'K189E', 'unmutated'],
        title=None
    ),
    columns=4
).resolve_scale(
    x='independent',
    y='independent'
).configure_header(
    labelFontSize=16,
    labelFontWeight='bold'
)

In [18]:
subset_df = summary.query(
    'virus in ["unmutated", "N165H", "S205Y", "R220T", "R229I"]'
)

lineplot = alt.Chart(subset_df).mark_line(
    color='#80B1D3', 
    size=2,
    point={"filled": True, "size": 80, "color": '#80B1D3', "stroke": 'black', "strokeWidth": 0.5}
).encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3']
    ),
    y=alt.Y(
        'mean_infectivity_retained',
        title=['Fraction infectivity', 'retained'],
        scale=alt.Scale(type='log', domain=[0.01, 2])
    ),
    color=alt.Color(
        'virus:O',
        scale=alt.Scale(
            domain=["N165H", "S205Y", "R220T", "R229I", "unmutated"],
            range=['#F28E2B', '#4E79A7', '#E15759', '#76B7B2', '#BAB0AC']
        ),
        legend=None,
    ),
).properties(
    height=200,
    width=200
)

# error bars: mean ± 1SE
error_bars = alt.Chart(subset_df).mark_errorbar().encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3']
    ),
    y=alt.Y(
        'lower:Q', 
        title=['Fraction infectivity', 'retained'],
        scale=alt.Scale(type='log', domain=[0.01, 2])
    ),
    y2='upper:Q',
)

(lineplot + error_bars).properties(
    width=90,
    height=90
).facet(
    facet=alt.Facet(
        'virus', 
        sort=['N165H', 'S205Y', 'R220T', 'R229I', 'unmutated'],
        title=None
    ),
    columns=3
).resolve_scale(
    x='shared',
    y='shared'
).configure_header(
    labelFontSize=16,
    labelFontWeight='bold'
)

In [6]:
validation_merged = pd.merge(
    stability_validation,
    stability_data[['mutation', 'stability_mean']],
    left_on=['virus'],
    right_on=['mutation'],
    how='left'
).fillna(0)

validation_merged['GFP_fraction_retained'] = validation_merged['GFP_fraction_retained'].clip(lower=1e-2)

validation_merged.head()

Unnamed: 0,virus,replicate,pH_treatment,GFP_fraction_positive,GFP_fraction_retained,mutation,stability_mean
0,S205Y,1,neutral,28.1,1.0,S205Y,-0.7182
1,S205Y,1,5.7,0.091,0.01,S205Y,-0.7182
2,S205Y,1,5.5,0.026,0.01,S205Y,-0.7182
3,S205Y,1,5.3,0.041,0.01,S205Y,-0.7182
4,R220T,1,neutral,22.6,1.0,R220T,-0.6935


In [14]:
alt.Chart(subset_df).mark_line(point=True).encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3']
    ),
    y=alt.Y(
        'mean_infectivity_retained',
        title='Fraction infectivity retained',
        scale=alt.Scale(type='log')
    ),
    color='virus'
).properties(
    height=200,
    width=200
)

In [6]:
alt.Chart(validation_merged).mark_bar().encode(
    x=alt.X(
        'pH_treatment', 
        title='pH treatment', 
        sort=['neutral', '5.7', '5.5', '5.3'],
    ),
    y=alt.Y(
        'GFP_fraction_retained',
        title=['Fraction infectivity', 'retained'],
        scale=alt.Scale(type='log', domain=[0.01, 2]),
    ),
    color=alt.Color(
        'virus', 
        legend=None
    )
).properties(
    height=125,
    width=90
).facet(
    facet=alt.Facet(
        'virus', 
        sort=['N165H', 'S205Y', 'R220T', 'R229I', 'S107E', 'G404R', 'K189E', 'unmutated'],
        title=None
    ),
    columns=4
).resolve_scale(
    x='independent',
    y='independent'
).configure_header(
    labelFontSize=16,
    labelFontWeight='bold'
)

In [7]:
alt.Chart(validation_merged.query('pH_treatment == "5.7"')).mark_circle(
    size=70, strokeWidth=1, stroke='black'
).encode(
    x=alt.X(
        'stability_mean',
        title='Stability effect in DMS'
    ),
    y=alt.Y(
        'GFP_fraction_retained',
        title='Fraction infectivity retained',
        scale=alt.Scale(type='log')     
    ),
    color='virus'
).properties(
    width=200,
    height=200
)