# Plot yeast RBD DMS data

## Import modules and read data
Import Python modules:

In [1]:
import altair as alt

import pandas as pd

Disable max rows specifier for Altair:

In [2]:
_ = alt.data_transformers.disable_max_rows()

Read the deep mutational scanning data, and reduce to site-level data, calculating the max and total site-based metrics:

In [3]:
dms_mut_data = pd.read_csv('../results/merged_data/yeast_RBD_DMS_data.csv')

# calculate site metrics and fill missing sites as 0
sites = list(range(dms_mut_data['site'].min(), dms_mut_data['site'].max() + 1))
dms_data = (
    dms_mut_data
    .groupby(['condition', 'condition_type', 'condition_subtype', 'study', 'site'],
             as_index=False, dropna=False)
    .aggregate(site_total_escape=pd.NamedAgg('mut_escape', 'sum'),
               site_max_escape=pd.NamedAgg('mut_escape', 'max'),
               )
    )
dms_data = (pd.merge_ordered(dms_data,
                             pd.DataFrame({'site': sites}),
                             on='site',
                             left_by=['condition', 'study', 'condition_type'],
                             )
            .fillna(0)
            )

# check no duplicated conditions
dup_conditions = (dms_data
                  .groupby('condition', as_index=False)
                  .aggregate(n_studies=pd.NamedAgg('study', 'nunique'))
                  .query('n_studies > 1')
                  )
if len(dup_conditions):
    raise ValueError('duplicate studies for some conditions:\n' + str(dup_conditions))

dms_data

Unnamed: 0,condition,condition_type,condition_subtype,study,site,site_total_escape,site_max_escape
0,AZD1061,antibody,clinical,Dong 2021,331,0.032706,0.005524
1,AZD1061,antibody,clinical,Dong 2021,332,0.034237,0.005990
2,AZD1061,antibody,clinical,Dong 2021,333,0.025487,0.002917
3,AZD1061,antibody,clinical,Dong 2021,334,0.030717,0.009528
4,AZD1061,antibody,clinical,Dong 2021,335,0.021808,0.003185
...,...,...,...,...,...,...,...
8035,subject K (day 29),serum,convalescent,Greaney 2021b,527,0.005310,0.002243
8036,subject K (day 29),serum,convalescent,Greaney 2021b,528,0.009810,0.002504
8037,subject K (day 29),serum,convalescent,Greaney 2021b,529,0.031831,0.009543
8038,subject K (day 29),serum,convalescent,Greaney 2021b,530,0.024201,0.007600


Make a tidy version of `dms_data` that is melted to have the two site metrics in one column:

In [4]:
tidy_cols = {'site_total_escape': 'sum of all mutations at site',
             'site_max_escape': 'max of any mutation at site'}
dms_data_tidy = (
    dms_data
    .rename(columns=tidy_cols)
    .melt(value_vars=tidy_cols.values(),
          value_name='antibody escape',
          var_name='metric',
          id_vars=[c for c in dms_data.columns if c not in tidy_cols])
    )

dms_data_tidy

Unnamed: 0,condition,condition_type,condition_subtype,study,site,metric,antibody escape
0,AZD1061,antibody,clinical,Dong 2021,331,sum of all mutations at site,0.032706
1,AZD1061,antibody,clinical,Dong 2021,332,sum of all mutations at site,0.034237
2,AZD1061,antibody,clinical,Dong 2021,333,sum of all mutations at site,0.025487
3,AZD1061,antibody,clinical,Dong 2021,334,sum of all mutations at site,0.030717
4,AZD1061,antibody,clinical,Dong 2021,335,sum of all mutations at site,0.021808
...,...,...,...,...,...,...,...
16075,subject K (day 29),serum,convalescent,Greaney 2021b,527,max of any mutation at site,0.002243
16076,subject K (day 29),serum,convalescent,Greaney 2021b,528,max of any mutation at site,0.002504
16077,subject K (day 29),serum,convalescent,Greaney 2021b,529,max of any mutation at site,0.009543
16078,subject K (day 29),serum,convalescent,Greaney 2021b,530,max of any mutation at site,0.007600


## Make interactive plots
Notes on `altair` color palettes are here:

In [13]:
width = 800

# build zoom bar to zoom in on sites
zoom_brush = alt.selection_interval(
                encodings=['x'],
                mark=alt.BrushConfig(stroke='black', strokeWidth=2))
zoom_bar = (
    alt.Chart(dms_data_tidy)
    .mark_rect(color='lightgray')
    .encode(x='site:O')
    .add_selection(zoom_brush)
    .properties(width=width,
                height=15,
                title='zoom bar')
    )

# build drop down menu to select y-axis on escape plot
# https://github.com/altair-viz/altair/issues/965
y_axis_select_binding = alt.binding_select(options=dms_data_tidy['metric'].unique())
y_axis_selection = alt.selection_single(name='y-axis antibody escape',
                                        fields=['metric'],
                                        bind=y_axis_select_binding)

# build drop down menu to selection condition types on escape plot
condition_type_binding = alt.binding_select(options=dms_data_tidy['condition_type'].unique())
condition_type_selection = alt.selection_single(name='data to show',
                                                fields=['condition_type'],
                                                bind=condition_type_binding)

# selector to highlight specific conditions on escape plots
highlight_escape = (  # https://altair-viz.github.io/gallery/multiline_highlight.html
    alt.selection(type='single',
                  on='click',
                  fields=['condition'],
                  nearest=True)
    )

# the escape plots
escape_plot_base = (
    alt.Chart(dms_data_tidy)
    .encode(x='site:O',
            y='antibody escape:Q',
            detail='condition:N',  # https://github.com/altair-viz/altair/issues/985
            )
    )

escape_plot = (
    (escape_plot_base.mark_line(interpolate='step')
                     .encode(size=alt.condition(~highlight_escape, alt.value(1), alt.value(2)),
                             color=alt.condition(~highlight_escape, alt.value('gray'), alt.value('black')),
                             opacity=alt.condition(~highlight_escape, alt.value(0.4), alt.value(1))
                             ) +
     # don't understand need for this dummy mark circle plot (points never show, opacity = 0),
     # but used in example: https://altair-viz.github.io/gallery/multiline_highlight.html
     (escape_plot_base
      .mark_circle()
      .encode(opacity=alt.value(0))
      .add_selection(highlight_escape)
      )
     )
    .interactive(bind_y=False)  # https://github.com/altair-viz/altair/issues/1512#issuecomment-691720690
    .add_selection(y_axis_selection)
    .add_selection(condition_type_selection)
    .transform_filter(y_axis_selection)
    .transform_filter(condition_type_selection)
    .transform_filter(zoom_brush)
    .properties(width=width,
                height=200) 
    )

# combine all the elements into a chart
chart = (
    alt.vconcat(zoom_bar, escape_plot)
    .configure_axis(grid=False)
    .configure_view(strokeWidth=0)
    )

# show the chart
chart