In [1]:
import pandas as pd 
import altair as alt
import numpy as np
import sys 

sys.path.append('../analysis/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
cell_entry_titers = pd.read_csv('data/250410_titers.csv')
cell_entry_titers.head()

Unnamed: 0,virus,biological_rep,technical_rep,target_cell,media,row,num_cells_infect,percent_positive_green,ul_of_virus,TU_ul
0,F195Y,1,1,MDCK-SIAT1-PB1,NAM,A,46500,1.83,50.0,17.17665
1,F195Y,1,2,MDCK-SIAT1-PB1,NAM,A,46500,1.5,50.0,14.055683
2,D225N,1,1,MDCK-SIAT1-PB1,NAM,A,46500,0.2,50.0,1.861862
3,D225N,1,2,MDCK-SIAT1-PB1,NAM,A,46500,0.24,50.0,2.234683
4,S193F,1,1,MDCK-SIAT1-PB1,NAM,A,46500,0.16,50.0,1.489192


In [3]:
func_data = pd.read_csv('../results/func_effects/averages/MDCKSIAT1_entry_func_effects.csv').assign(
    mutation = lambda x: x['wildtype'] + x['site'].astype(str) + x['mutant']
)

func_data.head()

Unnamed: 0,site,wildtype,mutant,effect,effect_std,times_seen,n_selections,mutation
0,1,Q,*,-4.945,0.0,16.25,4,Q1*
1,1,Q,A,-0.1226,0.2296,7.5,4,Q1A
2,1,Q,C,-0.5732,0.5667,5.75,4,Q1C
3,1,Q,D,0.255,0.3448,6.5,4,Q1D
4,1,Q,E,0.2941,0.0502,9.0,4,Q1E


In [4]:
summary = (
    cell_entry_titers.groupby('virus')
    .agg(
        mean_titer=('TU_ul', 'mean'),
        se=('TU_ul', lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
    )
    .reset_index()
).merge(
    func_data[['effect', 'mutation']], left_on='virus', right_on='mutation', how='left'
)

# Calculate lower and upper bounds for error bars (mean ± 1SE)
summary['lower'] = summary['mean_titer'] - summary['se']
summary['upper'] = summary['mean_titer'] + summary['se']

summary['effect'] = summary['effect'].fillna(0) # for wildtype

summary

Unnamed: 0,virus,mean_titer,se,effect,mutation,lower,upper
0,D186G,29.515375,2.847018,-2.185,D186G,26.668357,32.362393
1,D225N,2.258153,0.31936,-3.314,D225N,1.938794,2.577513
2,F195Y,12.857637,1.757744,-4.207,F195Y,11.099893,14.61538
3,G404R,18833.535895,1318.13377,-0.1932,G404R,17515.402125,20151.669665
4,K131I,953.617051,64.354091,-1.944,K131I,889.262961,1017.971142
5,K140I,21102.198465,934.250229,0.2189,K140I,20167.948236,22036.448694
6,K189E,69974.772682,9245.163455,-0.2025,K189E,60729.609228,79219.936137
7,N165H,77435.55557,10103.667071,-1.057,N165H,67331.888499,87539.222641
8,PE09-I140K,31.79054,1.84745,0.0,,29.94309,33.63799
9,PE09-WT,1212.806943,365.35071,0.0,,847.456232,1578.157653


In [5]:
summary_ma22 = summary.query(
    'virus not in ["no HA", "PE09-WT", "PE09-I140K"]'
)

r_value = np.log(summary_ma22['mean_titer']).corr(summary_ma22['effect'])
r_text = f"r = {r_value:.2f}"

# Points for the means
points = alt.Chart(summary_ma22).mark_circle(
    color='#D9D9D9',
    size=90, 
    opacity=1, 
    stroke='black', 
    strokeWidth=1
).encode(
    x=alt.X(
        'effect:Q', 
        title=['Cell entry effect', 'predicted by DMS'],
        axis=alt.Axis(
            grid=False,
            tickCount=3,
        ),
    ),
    y=alt.Y(
        'mean_titer:Q', 
        title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)'], 
        scale=alt.Scale(type='log'),
        axis=alt.Axis(
            format="~e",
            grid=False,
            tickCount=3,
        ),
    ),
    tooltip=['virus', 'effect', 'mean_titer', 'se']
).properties(
    width=200,
    height=200
)

# Error bars: mean ± 1SE
error_bars = alt.Chart(summary_ma22).mark_errorbar().encode(
    x='effect:Q',
    y=alt.Y('lower:Q', title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)']),
    y2='upper:Q',
)

r_label = alt.Chart(pd.DataFrame({'text': [r_text]})).mark_text(
    align='left',
    baseline='top',
    fontSize=16,
    fontWeight='normal',
    color='black'
).encode(
    text='text:N',
    x=alt.value(5), 
    y=alt.value(5)
)

scatter_plot = alt.layer(points, error_bars, r_label)
scatter_plot

In [6]:
ordered_viruses = (
    summary.query('virus not in ["no HA", "PE09-WT", "PE09-I140K"]')
    .sort_values('mean_titer')
    .virus.tolist()
)

scatter = alt.Chart(
    summary.query('virus not in ["no HA", "PE09-WT", "PE09-I140K"]')
).mark_circle(
    size=90,
    color='#D9D9D9',
    opacity=1, 
    stroke='black', 
    strokeWidth=1
).encode(
    x=alt.X(
        "virus:O",
        sort=ordered_viruses,
        title='',
        axis=alt.Axis(
            grid=False,
        ),
    ),
    y=alt.Y(
        'mean_titer:Q', 
        title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)'], 
        scale=alt.Scale(type='log'),
        axis=alt.Axis(
            format="~e",
            grid=False,
            tickCount=3,
        ),
    ),
).properties(
    height=200,
    width=400
)

# Error bars: mean ± 1SE
error_bars = alt.Chart(
    summary.query('virus not in ["no HA", "PE09-WT", "PE09-I140K"]')
).mark_errorbar().encode(
    x=alt.X('virus:O', sort=ordered_viruses),
    y=alt.Y('lower:Q', title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)']),
    y2='upper:Q',
)

hline = alt.Chart(pd.DataFrame({'y': [1e2, 1e4]})).mark_rule(
        color='black',
        size=1.25,
        opacity=1,
        strokeDash=[5,5]
).encode(y='y:Q')

hline + scatter + error_bars

In [7]:
scatter = alt.Chart(
    summary.query('virus in ["D186G", "D225N", "F195Y", "S193F", "unmutated"]')
).mark_circle(
    size=90,
    opacity=1, 
    stroke='black', 
    strokeWidth=1
).encode(
    x=alt.X(
        "virus:O",
        title='',
        axis=alt.Axis(
            grid=False,
            tickCount=3,
        ),
    ),
    y=alt.Y(
        'mean_titer:Q', 
        title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)'], 
        scale=alt.Scale(type='log'),
        axis=alt.Axis(
            format="~e",
            grid=False,
            tickCount=3,
        ),
    ),
    color=alt.Color(
        'virus:O',
        scale=alt.Scale(
            domain=['D186G', 'D225N', 'F195Y', 'S193F', 'unmutated'],
            range=['#E78AC3', '#E78AC3', '#E78AC3', '#E78AC3', '#D9D9D9']
        ),
        legend=None,
    ),
).properties(
    height=200,
    width=200
)

# Error bars: mean ± 1SE
error_bars = alt.Chart(
    summary.query('virus in ["D186G", "D225N", "F195Y", "S193F", "unmutated"]')
).mark_errorbar().encode(
    x='virus:O',
    y=alt.Y('lower:Q', title=['Titers for PB1flank-eGFP virus', '(transcription units per ul)']),
    y2='upper:Q',
)

hline = alt.Chart(pd.DataFrame({'y': [1e2, 1e4]})).mark_rule(
        color='black',
        size=1.25,
        opacity=1,
        strokeDash=[5,5]
).encode(y='y:Q')

hline + scatter + error_bars

In [8]:
amino_acid_map = {
    'K140I': 'I',
    'PE09-I140K': 'K',
    'PE09-WT': 'I',
    'unmutated': 'K'
}

background = {
    'K140I': 'A/Massachusetts/18/2022',
    'PE09-I140K': 'A/Perth/16/2009',
    'PE09-WT': 'A/Perth/16/2009',
    'unmutated': 'A/Massachusetts/18/2022'
}

epistasis_df = summary.query(
    'virus in ["K140I", "unmutated", "PE09-WT", "PE09-I140K"]'
)

epistasis_df = epistasis_df.assign(
    amino_acid=epistasis_df['virus'].map(amino_acid_map),
    background=epistasis_df['virus'].map(background)
).assign(
    virus_label = epistasis_df['virus'].replace({
        'K140I': 'K140I',
        'unmutated': 'unmutated',
        'PE09-WT': 'unmutated',
        'PE09-I140K': 'I140K'
    })
)
epistasis_df

Unnamed: 0,virus,mean_titer,se,effect,mutation,lower,upper,amino_acid,background,virus_label
5,K140I,21102.198465,934.250229,0.2189,K140I,20167.948236,22036.448694,I,A/Massachusetts/18/2022,K140I
8,PE09-I140K,31.79054,1.84745,0.0,,29.94309,33.63799,K,A/Perth/16/2009,I140K
9,PE09-WT,1212.806943,365.35071,0.0,,847.456232,1578.157653,I,A/Perth/16/2009,unmutated
18,unmutated,18500.657713,1238.368143,0.0,,17262.289569,19739.025856,K,A/Massachusetts/18/2022,unmutated


In [9]:
1212.806943 / 31.790540

38.149932118171

In [24]:
lines = alt.Chart(
    epistasis_df
).mark_line(
    size=2.5,
    point={"filled": True, "size": 60}
).encode(
    x=alt.X(
        "amino_acid:O",
        sort=['I', 'K'],
        title='',
        axis=alt.Axis(
            grid=False,
            labelAngle=0,
            labelBaseline='top',
            labelPadding=3   
        ),
    ),
    y=alt.Y(
        'mean_titer:Q', 
        title=['Mean titer (TU/ul)'], 
        scale=alt.Scale(type='log', domain=[1, 5e4]),
        axis=alt.Axis(
            format="~e",
            grid=False,
            tickCount=4,
        ),
    ),
    color=alt.Color(
        'background:O',
        scale=alt.Scale(
            domain=['A/Perth/16/2009', 'A/Massachusetts/18/2022'],
            range=['#FF9D9A', '#86BCB6'],
        ),
        legend=None
    ),
    detail='background:O'
)

# Error bars: mean ± 1SE
error_bars = alt.Chart(
    epistasis_df
).mark_errorbar().encode(
    x=alt.X(
        "amino_acid:O",
        sort=['I', 'K'],
    ),
    y=alt.Y('lower:Q', title=['Mean titer (TU/ul)']),
    y2='upper:Q',
)

(lines + error_bars).properties(
    width=80,
    height=175
).facet(
    facet=alt.Facet(
        'background', 
        sort=['A/Perth/16/2009', 'A/Massachusetts/18/2022'],
        title=None
    ),
    columns=4
).resolve_scale(
    x='independent',
    y='independent'
).configure_header(
    labelFontSize=16,
    labelFontWeight='bold'
)

In [3]:
moi_data = pd.read_csv('data/250416_moi_test.csv')
moi_data.head()

Unnamed: 0,IP_well,MOI,virus,signal_to_bg
0,40000.0,1.0,MA22-WT,0.313
1,20000.0,0.5,MA22-WT,0.2186
2,10000.0,0.25,MA22-WT,0.0822
3,5000.0,0.125,MA22-WT,0.03682
4,2500.0,0.0625,MA22-WT,0.01996


In [10]:
moi_data = pd.read_csv('data/250416_moi_test.csv')

moi_lines = pd.DataFrame({
    'virus': ['MA22-WT', 'K140I', 'K189E', 'N165H', 'R220T', 'R229I', 'R307T', 'S145N', 'S205Y'],
    'moi_line': [0.3, 0.3, 0.4, 0.6, 0.5, 0.6, 0.9, 0.5, 0.55]
})

moi_data = pd.merge(
    moi_data,
    moi_lines,
    on='virus',
    how='left'
)

# Altair chart
base = alt.Chart(moi_data).encode(
    x=alt.X('MOI', title='MOI', scale=alt.Scale(type='log'), axis=alt.Axis(grid=False)),
    y=alt.Y('signal_to_bg', title='Signal to Background'),
).properties(
    width=150,
    height=150
)

line = base.mark_line()
points = base.mark_point()

vline = alt.Chart(moi_data).mark_rule(
    color='red',
    strokeDash=[4, 4]
).encode(
    x='moi_line:Q',
)

hline = alt.Chart(pd.DataFrame({'signal_to_bg': [0.1]})).mark_rule(
    color='red',
    strokeDash=[4, 4]  # Dotted line
).encode(
    y='signal_to_bg:Q'
)

chart = (line + points + vline + hline).facet(
    facet=alt.Facet('virus:N'),
    columns=4
).resolve_scale(
    y='independent',
    x='independent'
)

chart