## Analyze functional effects on HA-mediated entry into MDCK-SIAT1 cells

In [15]:
import pandas as pd
import numpy as np
import altair as alt
import theme

alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [16]:
# read in data
func_data = pd.read_csv('../results/func_effects/averages/MDCKSIAT1_entry_func_effects.csv')
func_data.head()

Unnamed: 0,site,wildtype,mutant,effect,effect_std,times_seen,n_selections
0,1,Q,*,-4.945,0.0,16.25,4
1,1,Q,A,-0.1226,0.2296,7.5,4
2,1,Q,C,-0.5732,0.5667,5.75,4
3,1,Q,D,0.255,0.3448,6.5,4
4,1,Q,E,0.2941,0.0502,9.0,4


In [17]:
# read in 60y entropy of sites
entropy_df = pd.concat(
    [pd.read_csv(
        'data/nextstrain_groups_blab_flu_seasonal_h3n2_ha1_60y_diversity.tsv', sep = '\t'
    ),
    pd.read_csv(
        'data/nextstrain_groups_blab_flu_seasonal_h3n2_ha2_60y_diversity.tsv', sep = '\t'
    ).assign(position=lambda x: x['position'] + 329)]
).rename(
    columns={'position': 'site'}
).drop(columns=['gene'])

entropy_df.head()

Unnamed: 0,site,entropy
0,1,0.034
1,2,0.271
2,3,0.719
3,4,0.032
4,5,0.169


In [18]:
# read in structure mapping
site_map = pd.read_csv('../data/site_numbering_map.csv')
site_map.head()

Unnamed: 0,sequential_site,reference_site,sequential_wt,region,rbs_region
0,1,1,Q,HA1,outside RBS
1,2,2,K,HA1,outside RBS
2,3,3,I,HA1,outside RBS
3,4,4,P,HA1,outside RBS
4,5,5,G,HA1,outside RBS


In [19]:
# annotate the data with epitope information
func_data_for_heatmap = pd.merge(
    func_data, 
    site_map[['reference_site', 'region', 'rbs_region']], 
    left_on='site', 
    right_on='reference_site', 
    how='right'
).drop(
    columns=['reference_site']
).query(
    'times_seen >= 2'
).query(
    'effect_std <= 2'
)

# write out dataframe for heatmaps that includes * and wildtype
func_data_for_heatmap.to_csv('data/heatmap_data/filtered_cell_entry.csv')

# write out dataframe for plotting with entropy
func_data_ann = pd.merge(
    func_data_for_heatmap.query(
        'mutant not in ["*", "-"]'
    ).query(
        'mutant != wildtype'
    ),
    entropy_df,
    on='site',
    how='left'
)

# sites with 0 entropy were not included
func_data_ann['entropy'] = func_data_ann['entropy'].fillna(0)
func_data_ann.head()

Unnamed: 0,site,wildtype,mutant,effect,effect_std,times_seen,n_selections,region,rbs_region,entropy
0,1,Q,A,-0.1226,0.2296,7.5,4,HA1,outside RBS,0.034
1,1,Q,C,-0.5732,0.5667,5.75,4,HA1,outside RBS,0.034
2,1,Q,D,0.255,0.3448,6.5,4,HA1,outside RBS,0.034
3,1,Q,E,0.2941,0.0502,9.0,4,HA1,outside RBS,0.034
4,1,Q,F,-0.7141,0.6042,7.0,4,HA1,outside RBS,0.034


### Distribution of functional effects across classic receptor binding pocket regions

In [20]:
# Define custom colors for each RBS region
colors = {
    'outside RBS': '#bdbebb',
    'RBS 130-loop': '#725663',
    'RBS 150-loop': '#725663',
    'RBS 190-loop': '#725663',
    'RBS 220-loop': '#725663',
    'RBS base': '#725663'
}

# Create the jitter plot layer
points = alt.Chart(func_data_ann).mark_circle(size=40, opacity=0.3, stroke=None, strokeWidth=0).encode(
    y=alt.Y(
        "rbs_region",
    ).title(None),
    x=alt.X(
        'effect',
        title=(["Mutation effect on", "cell entry"]),
    ),
    yOffset="jitter:Q",
    color=alt.Color(
            "rbs_region",
            scale=alt.Scale(domain=list(colors.keys()), range=list(colors.values())),
            legend=None
    ),
    tooltip=['site', 'wildtype', 'mutant', 'effect'],
).transform_calculate(
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

# Add black median line
median_line = alt.Chart(func_data_ann).mark_tick(
    color='black',
    thickness=3,
    size=25
).encode(
    y='rbs_region:N',
    x='median(effect):Q'
)

vline = alt.Chart().mark_rule(
    color='black',
    size=1.25,
    opacity=1.0,
    strokeDash=[6,6]
).encode(x=alt.X(datum=0))

# Combine layers with median line on top
chart = alt.layer(
    points,
    median_line,
    vline
).properties(
    height=200, 
    width=200
)

mean_entropy_rbs = func_data_ann[['site', 'rbs_region', 'entropy']].drop_duplicates().groupby(
    "rbs_region"
)["entropy"].mean().reset_index().rename(
    columns={'entropy': 'mean_entropy'}
)

bar_chart = alt.Chart(mean_entropy_rbs).mark_bar(size=20).encode(
    x=alt.X(
        'mean_entropy:Q',
        title=['Mean entropy', '(in natural sequences)'],
        axis=alt.Axis(
            tickCount=2,
        )
    ),
    y=alt.Y(
        'rbs_region:N',
        title=None,
        axis=None
    ),
    color=alt.Color(
            "rbs_region",
            scale=alt.Scale(domain=list(colors.keys()), range=list(colors.values())),
            legend=None
    ),
    tooltip=['rbs_region', 'mean_entropy']
).properties(
    height=200,
    width=80
)

# Concatenate the plots
rbs_combined_chart = alt.concat(
    chart,  # jitter + median line + vline chart
    bar_chart, # entropy bar chart
    spacing=10
).resolve_scale(
    y='shared'
).properties(
    title=alt.TitleParams(
        text='Receptor binding pocket region',
        anchor='middle',
        fontSize=16,
        fontWeight='bold',
    )
)

rbs_combined_chart.display()

### Distribution of functional effects across classic antigenic regions

In [21]:
# Define custom colors for each RBS region
colors = {
    'epitope-A': '#FFB547',
    'epitope-B': '#FFB547',
    'epitope-C': '#FFB547',
    'epitope-D': '#FFB547',
    'epitope-E': '#FFB547',
    'HA1': '#bdbebb',
    'HA2': '#767676',
}

order = ['epitope-A', 'epitope-B', 'epitope-C', 'epitope-D', 'epitope-E', 'HA1', 'HA2']

# Create the jitter plot layer
points = alt.Chart(func_data_ann).mark_circle(size=40, opacity=0.3, stroke=None, strokeWidth=0).encode(
    y=alt.Y(
        "region",
        sort=order
    ).title(None),
    x=alt.X(
        'effect',
        title=(["Mutation effect on", "cell entry"]),
    ),
    yOffset="jitter:Q",
    color=alt.Color(
            "region",
            scale=alt.Scale(domain=list(colors.keys()), range=list(colors.values())),
            legend=None
    ),
    tooltip=['site', 'wildtype', 'mutant', 'effect'],
).transform_calculate(
    jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
)

# Add black median line
median_line = alt.Chart(func_data_ann).mark_tick(
    color='black',
    thickness=3,
    size=25
).encode(
    y=alt.Y('region:N', sort=order),
    x='median(effect):Q'
)

vline = alt.Chart().mark_rule(
    color='black',
    size=1.25,
    opacity=1.0,
    strokeDash=[6,6]
).encode(x=alt.X(datum=0))

# Combine layers with median line on top
chart = alt.layer(
    points,
    median_line,
    vline
).properties(
    height=200, 
    width=200
)

mean_entropy_epitope = func_data_ann[['site', 'region', 'entropy']].drop_duplicates().groupby(
    "region"
)["entropy"].mean().reset_index().rename(
    columns={'entropy': 'mean_entropy'}
)

bar_chart = alt.Chart(mean_entropy_epitope).mark_bar(size=20).encode(
    x=alt.X(
        'mean_entropy:Q',
        title=['Mean entropy', '(in natural sequences)'],
        axis=alt.Axis(
            tickCount=1,
        )
    ),
    y=alt.Y(
        'region:N',
        sort=order, 
        title=None,
        axis=None
    ),
    color=alt.Color(
            "region",
            scale=alt.Scale(domain=list(colors.keys()), range=list(colors.values())),
            legend=None
    ),
    tooltip=['region', 'mean_entropy']
).properties(
    height=200,
    width=80
)

# Concatenate the plots
epitope_combined_chart = alt.concat(
    chart,  # jitter + median line + vline chart
    bar_chart, # entropy bar chart
    spacing=10
).resolve_scale(
    y='shared'
).properties(
    title=alt.TitleParams(
        text='Antigenic region',
        anchor='middle',
        fontSize=16,
        fontWeight='bold',
    )
)

epitope_combined_chart.display()

In [14]:
(rbs_combined_chart & epitope_combined_chart).resolve_scale(
    color='independent'
)