# Plot yeast RBD DMS escape maps

## Import modules and read data
Import Python modules:

In [1]:
import itertools
import os

import altair as alt

import numpy

import pandas as pd

import sklearn.manifold

Disable max rows specifier for Altair:

In [2]:
_ = alt.data_transformers.disable_max_rows()

Read the deep mutational scanning data, and reduce to site-level data, calculating the max, mean, and total site-based metrics:

In [3]:
dms_data = pd.read_csv('./processed_data/escape_data.csv')

metric_cols = {'site_total_escape': 'sum of mutations at site',
               'site_mean_escape': 'mean of mutations at site'}

# get only site-level data
dms_data = (
    dms_data
    [['condition', 'condition_type', 'condition_subtype',
      'study', 'site'] + list(metric_cols)]
    .drop_duplicates()
    )

# fill missing sites as 0
sites = list(range(dms_data['site'].min(), dms_data['site'].max() + 1))
assert dms_data.notnull().all().all()
dms_data = (pd.merge_ordered(dms_data,
                             pd.DataFrame({'site': sites}),
                             on='site',
                             left_by=['condition', 'study', 'condition_type', 'condition_subtype'],
                             )
            .fillna(0)
            )

# check no duplicated conditions
dup_conditions = (dms_data
                  .groupby('condition', as_index=False)
                  .aggregate(n_studies=pd.NamedAgg('study', 'nunique'))
                  .query('n_studies > 1')
                  )
if len(dup_conditions):
    raise ValueError('duplicate studies for some conditions:\n' + str(dup_conditions))

dms_data

Unnamed: 0,condition,condition_type,condition_subtype,study,site,site_total_escape,site_mean_escape
0,COV2-2196,antibody,clinical antibody,2021_Dong_AZ,331,0.24570,0.015360
1,COV2-2196,antibody,clinical antibody,2021_Dong_AZ,332,0.29950,0.015760
2,COV2-2196,antibody,clinical antibody,2021_Dong_AZ,333,0.28330,0.015740
3,COV2-2196,antibody,clinical antibody,2021_Dong_AZ,334,0.29370,0.016310
4,COV2-2196,antibody,clinical antibody,2021_Dong_AZ,335,0.28870,0.015190
...,...,...,...,...,...,...,...
9844,REGN10987,antibody,clinical antibody,2021_Starr_REGN,527,0.03580,0.002106
9845,REGN10987,antibody,clinical antibody,2021_Starr_REGN,528,0.03736,0.002076
9846,REGN10987,antibody,clinical antibody,2021_Starr_REGN,529,0.03734,0.002075
9847,REGN10987,antibody,clinical antibody,2021_Starr_REGN,530,0.03944,0.002076


Make a tidy version of `dms_data` that is melted to have the two site metrics in one column, and gets rid of some columns we don't need for escape line plots:

In [4]:
dms_data_tidy = (
    dms_data
    .rename(columns=metric_cols)
    .melt(value_vars=metric_cols.values(),
          value_name='escape',
          var_name='metric',
          id_vars=[c for c in dms_data.columns if c not in metric_cols])
    .drop(columns=['condition_type', 'study'])
    )

dms_data_tidy

Unnamed: 0,condition,condition_subtype,site,metric,escape
0,COV2-2196,clinical antibody,331,sum of mutations at site,0.245700
1,COV2-2196,clinical antibody,332,sum of mutations at site,0.299500
2,COV2-2196,clinical antibody,333,sum of mutations at site,0.283300
3,COV2-2196,clinical antibody,334,sum of mutations at site,0.293700
4,COV2-2196,clinical antibody,335,sum of mutations at site,0.288700
...,...,...,...,...,...
19693,REGN10987,clinical antibody,527,mean of mutations at site,0.002106
19694,REGN10987,clinical antibody,528,mean of mutations at site,0.002076
19695,REGN10987,clinical antibody,529,mean of mutations at site,0.002075
19696,REGN10987,clinical antibody,530,mean of mutations at site,0.002076


## Perform multidimensional scaling
Steps:
 1. Calculate similarities betweeen escape maps for each antibody.
 2. Convert similarities to dissimilarities.
 3. Do multi-dimensional scaling on dissimilarities.


First, compute the dissimilarity between all pairs of escape profiles in a data frame.
We calculate similarity as the dot product of the escape profile site-level metric for each pair of conditions, normalizing each profile so it's dot product with itself is one.
Then we compute the dissimilarity as just one minux the similarity:

In [5]:
def escape_similarity(df):
    """Compute similarity between all pairs of conditions in `df`."""
    df = df[['condition', 'site', 'escape']].drop_duplicates()
    assert not df.isnull().any().any()
    
    conditions = df['condition'].unique()
    similarities = []
    pivoted_df = (
        df
        .pivot_table(index='site',
                     columns='condition',
                     values='escape',
                     fill_value=0)
        # for normalization: https://stackoverflow.com/a/58113206
        # to get norm: https://stackoverflow.com/a/47953601
        .transform(lambda x: x / numpy.linalg.norm(x, axis=0))
        )
    for cond1, cond2 in itertools.product(conditions, conditions):
        similarity = (
            pivoted_df
            [list({cond1, cond2})]
            .assign(similarity=lambda x: x[cond1] * x[cond2])
            ['similarity']
            )
        assert similarity.notnull().all()  # make sure no sites have null values
        similarities.append(similarity.sum())  # sum of similarities over sites
    return pd.DataFrame(numpy.array(similarities).reshape(len(conditions), len(conditions)),
                        columns=conditions, index=conditions)

similarities = (
    dms_data_tidy
    .groupby('metric')
    .apply(escape_similarity)
    )

dissimilarities = (1 - similarities).clip(lower=0)

dissimilarities.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,COV2-2196,COV2-2130,CR3022,COV2-2677,COV2-2082,COV2-2094,COV2-2165,COV2-2832,COV2-2479,COV2-2050,...,COV-107,C002,C105,C110,C121,C135,C144,LY-CoV555,REGN10933,REGN10987
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
mean of mutations at site,COV2-2196,0.000,0.966,0.870,0.967,0.931,0.951,0.518,0.231,0.878,0.834,...,0.567,0.775,0.893,0.963,0.879,0.966,0.777,0.835,0.207,0.959
mean of mutations at site,COV2-2130,0.966,0.000,0.951,0.991,0.964,0.977,0.928,0.973,0.953,0.978,...,0.697,0.988,0.995,0.409,0.979,0.278,0.989,0.968,0.976,0.375
mean of mutations at site,CR3022,0.870,0.951,0.000,0.607,0.604,0.548,0.803,0.878,0.872,0.924,...,0.639,0.941,0.958,0.951,0.952,0.944,0.955,0.947,0.924,0.943
mean of mutations at site,COV2-2677,0.967,0.991,0.607,0.000,0.428,0.291,0.934,0.973,0.941,0.979,...,0.868,0.990,0.992,0.991,0.991,0.990,0.992,0.990,0.987,0.988
mean of mutations at site,COV2-2082,0.931,0.964,0.604,0.428,0.000,0.098,0.883,0.943,0.919,0.955,...,0.774,0.956,0.667,0.962,0.966,0.965,0.964,0.963,0.865,0.958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sum of mutations at site,C135,0.967,0.260,0.935,0.991,0.968,0.976,0.922,0.976,0.970,0.981,...,0.830,0.991,0.993,0.698,0.992,0.000,0.993,0.992,0.988,0.690
sum of mutations at site,C144,0.862,0.990,0.930,0.991,0.965,0.975,0.790,0.837,0.317,0.261,...,0.455,0.072,0.744,0.840,0.167,0.993,0.000,0.276,0.582,0.994
sum of mutations at site,LY-CoV555,0.798,0.976,0.915,0.990,0.961,0.972,0.896,0.701,0.137,0.181,...,0.526,0.139,0.994,0.675,0.113,0.992,0.276,0.000,0.691,0.993
sum of mutations at site,REGN10933,0.169,0.990,0.899,0.987,0.816,0.926,0.597,0.189,0.842,0.790,...,0.482,0.501,0.630,0.988,0.776,0.988,0.582,0.691,0.000,0.988


Now do the multidimensional scaling [as described here](https://scikit-learn.org/stable/auto_examples/manifold/plot_mds.html#sphx-glr-auto-examples-manifold-plot-mds-py) to get the x and y coordinates for each antibody / serum.
For each metric, we do this for three different random number seeds (different seeds will given different MDS layouts):

In [6]:
mds_coords = []
for seed, (metric, mat) in itertools.product([1, 2], dissimilarities.groupby('metric')):
    # use multidimensional scaling to get locations of antibodies
    mds = sklearn.manifold.MDS(n_components=2,
                               metric=True,
                               max_iter=3000,
                               eps=1e-6,
                               random_state=seed,
                               dissimilarity='precomputed',
                               n_jobs=1)
    locs = mds.fit_transform(mat)
    mds_coords.append(pd.DataFrame(locs, columns=['x', 'y'])
                      .assign(metric=metric,
                              seed=seed,
                              condition=mat.columns,
                              xmin=lambda df: df['x'].min(),
                              ymin=lambda df: df['y'].min(),
                              x=lambda df: df['x'] - df['xmin'],
                              y=lambda df: df['y'] - df['ymin'],
                              )
                      )
mds_coords = (
    pd.concat(mds_coords,
              ignore_index=True)
    .merge(dms_data
           [['condition', 'condition_type', 'condition_subtype', 'study']]
           .drop_duplicates(),
           on='condition',
           how='left',
           validate='many_to_one')
    .drop(columns=['xmin', 'ymin'])
    )
mds_coords

Unnamed: 0,x,y,metric,seed,condition,condition_type,condition_subtype,study
0,0.030044,0.740559,mean of mutations at site,1,COV2-2196,antibody,clinical antibody,2021_Dong_AZ
1,0.384229,1.301459,mean of mutations at site,1,COV2-2130,antibody,clinical antibody,2021_Dong_AZ
2,0.295008,0.147468,mean of mutations at site,1,CR3022,antibody,not clinical antibody,2021_Greaney_Crowe_Abs
3,0.051362,0.111741,mean of mutations at site,1,COV2-2677,antibody,not clinical antibody,2021_Greaney_Crowe_Abs
4,0.018703,0.262577,mean of mutations at site,1,COV2-2082,antibody,not clinical antibody,2021_Greaney_Crowe_Abs
...,...,...,...,...,...,...,...,...
191,1.150869,1.155043,sum of mutations at site,2,C135,antibody,not clinical antibody,2021_Greaney_Rockefeller
192,0.752817,0.112095,sum of mutations at site,2,C144,antibody,not clinical antibody,2021_Greaney_Rockefeller
193,0.962862,0.100889,sum of mutations at site,2,LY-CoV555,antibody,clinical antibody,2021_Starr_LY-CoV555
194,0.658834,1.010103,sum of mutations at site,2,REGN10933,antibody,clinical antibody,2021_Starr_REGN


Get a data frame with just the conditions and their citations:

In [7]:
conditions_df = (
    dms_data
    [['condition_type', 'condition_subtype', 'condition', 'study']]
    .sort_values(['condition_type', 'condition_subtype', 'condition'])
    .drop_duplicates()
    .reset_index(drop=True)
    )

## Read information on studies and merge into conditions data frame

In [8]:
studies = pd.read_csv('processed_data/studies.csv')

studies

Unnamed: 0,study,citation,url
0,2021_Dong_AZ,Dong et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...
1,2021_Greaney_Crowe_Abs,Greaney et al. Cell Host Microbe (2021a),https://www.sciencedirect.com/science/article/...
2,2021_Greaney_HAARVI_sera,Greaney et al. Cell Host Microbe (2021b),https://www.sciencedirect.com/science/article/...
3,2021_Greaney_Rockefeller,Greaney et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...
4,2021_Starr_LY-CoV555,Starr et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...
5,2021_Starr_REGN,Starr et al. Science (2021),https://science.sciencemag.org/content/early/2...


In [9]:
conditions_df = (
    conditions_df
    .drop(columns=['citation','url'], errors='ignore')
    .merge(studies, how='left', on='study', validate='many_to_one')
    )

Add `dms-view` links:

In [10]:
dms_view_base_url = 'https://dms-view.github.io/?data-url=https%3A%2F%2Fraw.githubusercontent.com%2Fjbloomlab%2FSARS2_RBD_Ab_escape_maps%2Fmain%2Fprocessed_data%2Fescape_data.csv&condition={condition}&site_metric=site_total_escape&mutation_metric=mut_escape&selected_sites=&protein-data-color=&protein-other-color=pink&markdown-url=https%3A%2F%2Fraw.githubusercontent.com%2Fjbloomlab%2FSARS2_RBD_Ab_escape_maps%2Fmain%2Fdms-view%2Fmanifest.md&pdb-url=https%3A%2F%2Fraw.githubusercontent.com%2Fjbloomlab%2FSARS2_RBD_Ab_escape_maps%2Fmain%2Fdms-view%2F6M0J.pdb'

conditions_df['dms_view_url'] = conditions_df['condition'].map(lambda c: dms_view_base_url.format(condition=c))

conditions_df.head()

Unnamed: 0,condition_type,condition_subtype,condition,study,citation,url,dms_view_url
0,antibody,clinical antibody,COV2-2130,2021_Dong_AZ,Dong et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...,https://dms-view.github.io/?data-url=https%3A%...
1,antibody,clinical antibody,COV2-2196,2021_Dong_AZ,Dong et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...,https://dms-view.github.io/?data-url=https%3A%...
2,antibody,clinical antibody,LY-CoV555,2021_Starr_LY-CoV555,Starr et al. bioRxiv (2021),https://www.biorxiv.org/content/10.1101/2021.0...,https://dms-view.github.io/?data-url=https%3A%...
3,antibody,clinical antibody,REGN10933,2021_Starr_REGN,Starr et al. Science (2021),https://science.sciencemag.org/content/early/2...,https://dms-view.github.io/?data-url=https%3A%...
4,antibody,clinical antibody,REGN10987,2021_Starr_REGN,Starr et al. Science (2021),https://science.sciencemag.org/content/early/2...,https://dms-view.github.io/?data-url=https%3A%...


## Make interactive plots
First make plot to select condition(s) both to show:

In [11]:
condition_subtypes = (conditions_df
                      ['condition_subtype']
                      .unique()
                      .tolist()
                      )

# define colors from here: https://vega.github.io/vega/docs/schemes/
condition_subtype_colors = {'clinical antibody': '#0072B2',
                            'not clinical antibody': '#56B4E9',
                            'convalescent serum': '#FD5602',
                            'Moderna serum': '#FFAF42',
                            }
if not set(condition_subtypes).issubset(condition_subtype_colors):
    raise ValueError('missing colors for some condition subtypes')
select_condition_subtype = alt.selection_multi(fields=['condition_subtype'],
                                               # initialize to show antibodies but not sera
                                               init=[{'condition_subtype': subtype} for subtype in
                                                     conditions_df.query('condition_type == "antibody"')
                                                     ['condition_subtype'].unique()],
                                               resolve='union',
                                               empty='none',
                                               )
condition_subtype_color = alt.condition(select_condition_subtype,
                                   alt.Color('condition_subtype:N',
                                             legend=None,
                                             scale=alt.Scale(domain=condition_subtypes,
                                                             range=[condition_subtype_colors[c]
                                                                    for c in condition_subtypes]),
                                                             ),
                                   alt.value('white'),
                                   )

circle_size = 110

legend_condition_type = (
    alt.Chart(conditions_df[['condition_type', 'condition_subtype']].drop_duplicates())
    .mark_circle(size=0.7 * circle_size,
                 stroke='black',
                 strokeWidth=1)
    .encode(x=alt.X('condition_type:N',
                    axis=alt.Axis(title=['',
                                         'On each subplot, you can:',
                                         ' - click to select one antibody/serum',
                                         ' - shift-click to select additional antibodies/sera',
                                         ' - double-click to clear selected antibodies/sera',
                                         ' - mouseover to see antibody/serum name',
                                         ],
                                  titleAlign='left',
                                  titleFontSize=14,
                                  titleFontWeight='normal',
                                  titleFontStyle='italic',
                                  labelFontSize=12),
                    ),
            y=alt.Y('condition_subtype:N',
                    sort=condition_subtypes,
                    axis=alt.Axis(title=None,
                                  labelFontSize=12),
                    ),
            color=condition_subtype_color,
            )
    .add_selection(select_condition_subtype)
    .properties(title={'text': ['choose antibody/serum',
                                'types to display'],
                       'align': 'left',
                       'anchor': 'start'})
    )

legend_condition_type.configure_view(strokeOpacity=0)

In [12]:
highlight_condition = (
    alt.selection(type='multi',
                  on='click',
                  fields=['condition'],
                  nearest=False,
                  empty='none',
                  toggle=True,
                  resolve='union',
                  )
    )

# build zoom bar to zoom in condition legend
legend_condition_zoom_brush = alt.selection_interval(
                encodings=['y'],
                mark=alt.BrushConfig(stroke='black', strokeWidth=2))
legend_condition_zoom_bar = (
    alt.Chart(conditions_df)
    .mark_rect()
    .encode(y=alt.Y('condition:N',
                    title='antibody / sera zoom bar',
                    sort=conditions_df['condition'].unique(),
                    axis=alt.Axis(ticks=False,
                                  labels=False,
                                  titleFontSize=12)
                    ),
            color=condition_subtype_color,
            )
    .add_selection(legend_condition_zoom_brush)
    .transform_filter(select_condition_subtype)
    .properties(height=150,
                width=15)
    )

condition_base = (
    alt.Chart(conditions_df)
    .add_selection(select_condition_subtype,
                   highlight_condition)
    .transform_filter(select_condition_subtype)
    .transform_filter(legend_condition_zoom_brush)
    .properties(height={'step': 17},
                width=17,
                )
    )

legend_condition_heatmap = (
    condition_base
    .encode(y=alt.Y('condition:N',
                    sort=conditions_df['condition'].unique(),
                    title=None,
                    axis=alt.Axis(orient='right',
                                  labelFontSize=11,
                                  ),
                    ),
            color=condition_subtype_color,
            strokeWidth=alt.condition(~highlight_condition,
                                      alt.value(0.5),
                                      alt.value(3)),
            stroke=alt.condition(~highlight_condition,
                                 alt.value('black'),
                                 alt.value('black')),
            )
    .mark_rect()
    )

condition_citations = (
    condition_base
    .encode(y=alt.Y('condition:N',
                    sort=conditions_df['condition'].unique(),
                    title=None,
                    axis=None,
                    ),
            text='citation:N',
            href='url:N'
            )
    .mark_text(align='left',
               fontSize=11,
               fontStyle='normal',
               color='darkblue',
               )
    )

condition_dms_view = (
    condition_base
    .encode(y=alt.Y('condition:N',
                    sort=conditions_df['condition'].unique(),
                    title=None,
                    axis=None,
                    ),
            href='dms_view_url:N'
            )
    .mark_text(text='dms-view',
               align='left',
               fontSize=11,
               fontStyle='normal',
               color='darkblue',
               )
    )

legend_condition = (
    (legend_condition_zoom_bar | alt.hconcat(legend_condition_heatmap,
                                             condition_citations,
                                             condition_dms_view,
                                             spacing=2)
     )
    .properties(title={'text': ['select antibody/serum by by clicking box; shift-click',
                                'citation or dms-view text to open that information']})
    )

legend_condition.configure_view(strokeOpacity=0)

Next make MDS plot:

In [13]:
# build drop down menu to select metric and random seed
metric_select_binding = alt.binding_select(options=mds_coords['metric'].unique())
metric_selection = alt.selection_single(name='escape',
                                        fields=['metric'],
                                        bind=metric_select_binding,
                                        init={'metric': 'sum of mutations at site'})
seed_select_binding = alt.binding_select(options=mds_coords['seed'].unique())
seed_selection = alt.selection_single(name='multidimensional scaling random',
                                      fields=['seed'],
                                      bind=seed_select_binding,
                                      init={'seed': 1},
                                      )

# size, but scaled so a unit on x and y mean the same; note
# padding added here so sizes correct
size = 180
pad = 0.04
x_extent = mds_coords['x'].max() - mds_coords['x'].min()
y_extent = mds_coords['y'].max() - mds_coords['y'].min()
y_min = mds_coords['y'].min() - pad * y_extent
y_max = mds_coords['y'].max() + pad * y_extent
x_min = mds_coords['x'].min() - pad * x_extent
x_max = mds_coords['x'].max() + pad * x_extent

mds_plot = (
    alt.Chart(mds_coords)
    .encode(x=alt.X('x:Q',
                    scale=alt.Scale(padding=0,
                                    nice=False,
                                    domain=(x_min, x_max),
                                    ),
                    axis=alt.Axis(labels=False,
                                  title=None,
                                  ticks=False,
                                  grid=False,
                                  ),
                    ),
            y=alt.Y('y:Q',
                    scale=alt.Scale(padding=0,
                                    nice=False,
                                    domain=(y_min, y_max),
                                    ),
                    axis=alt.Axis(labels=False,
                                  title=None,
                                  ticks=False,
                                  grid=False,
                                  ),
                    ),
            opacity=alt.condition(~highlight_condition, alt.value(0.75), alt.value(1)),
            stroke=alt.condition(~highlight_condition, alt.value(None), alt.value('black')),
            color=condition_subtype_color,
            tooltip=['condition'])
    .mark_circle(size=circle_size)
    .properties(width=size * x_extent,
                height=size * y_extent,
                title={'text': 'multidimensional scaling of antibodies/sera',
                       'subtitle': ['antibodies/sera with escape mutations at similar',
                                    'sites are positioned nearby in the plot below'],
                       'anchor': 'start',
                       'align': 'left',
                       }
                )
    .add_selection(seed_selection,
                   metric_selection,
                   highlight_condition,
                   select_condition_subtype,
                   )
    .transform_filter(metric_selection)
    .transform_filter(seed_selection)
    .transform_filter(select_condition_subtype)
    )

# box around MDS plot: https://stackoverflow.com/a/62862229/4191652
dummy_lines = {}
for key, x, y in [('top', (x_min, x_max), (y_max, y_max)),
                  ('right', (x_max, x_max), (y_min, y_max)),
                  ]:
    dummy_lines[key] = (
        alt.Chart(pd.DataFrame({'x': x,
                                'y': y})
                  )
        .mark_line(color='black',
                   strokeWidth=0.5)
        .encode(x=alt.X('x:Q',
                        scale=alt.Scale(padding=0,
                                        nice=False,
                                        domain=(x_min, x_max),
                                        ),
                        axis=alt.Axis(labels=False,
                                      title=None,
                                      ticks=False,
                                      grid=False,
                                      ),
                        ),
                y=alt.Y('y:Q',
                        scale=alt.Scale(padding=0,
                                        nice=False,
                                        domain=(y_min, y_max),
                                        ),
                        axis=alt.Axis(labels=False,
                                      title=None,
                                      ticks=False,
                                      grid=False,
                                      ),
                        )
                )
        )
mds_plot = mds_plot + dummy_lines['top'] + dummy_lines['right']

# show the plot with legend
(legend_condition_type | mds_plot).configure_view(stroke='black').configure_view(strokeOpacity=0)

Next make line plots:

In [14]:
width = 800

# build zoom bar to zoom in on sites
zoom_brush = alt.selection_interval(
                encodings=['x'],
                mark=alt.BrushConfig(stroke='black', strokeWidth=2))
zoom_bar = (
    alt.Chart(dms_data_tidy[['site']].drop_duplicates())
    .mark_rect(color='lightgray')
    .encode(x=alt.X('site:O',
                    title=None,
                    ),
            )
    .add_selection(zoom_brush)
    .properties(width=width,
                height=15,
                title='site zoom bar')
    )

# build base for escape plots
escape_base = (
    alt.Chart(dms_data_tidy.assign(all_antibodies_sera_of_displayed_types=True))
    .encode(x=alt.X('site:O',
                    axis=alt.Axis(grid=False),
                    ),
            )
    .transform_filter(metric_selection)
    .transform_filter(select_condition_subtype)
    .transform_filter(zoom_brush)
    .properties(width=width,
                height=200,
                )
    )

# the escape line plot
escape_lines = (
    escape_base
    .encode(size=alt.condition(~highlight_condition, alt.value(0.9), alt.value(1.5)),
            opacity=alt.condition(~highlight_condition, alt.value(0.4), alt.value(1)),
            )
    .add_selection(metric_selection,
                   select_condition_subtype,
                   zoom_brush,
                   )
    .mark_line()
    )

# escape point plot
escape_points = (
    escape_base
    .encode(fill=condition_subtype_color,
            tooltip=['condition:N', 'site:O'],
            )
    .mark_point(size=40)
    .transform_filter(highlight_condition)
    # needs to be add_selection within chart: https://github.com/altair-viz/altair/issues/2368#issuecomment-742377146
    .add_selection(highlight_condition)
    )

# combine point and line plots
escape_lines_points = (
    (escape_lines + escape_points)
    .encode(detail='condition:N',  # https://github.com/altair-viz/altair/issues/985
            color=condition_subtype_color,
            y=alt.Y('escape:Q',
                    axis=alt.Axis(grid=False),
                    ),
            )
    .properties(title={'text': 'escape from individual antibodies/sera'})
    )

# checkbox to specify if mean for only selected antibodies or all antibody/serum types
mean_radio = alt.binding_radio(options=[True, False])
mean_selection = alt.selection_single(fields=['all_antibodies_sera_of_displayed_types'],
                                               bind=mean_radio,
                                               name='mean_over',
                                               init={'all_antibodies_sera_of_displayed_types': False})
# plot of mean values
escape_mean = (
    escape_base
    .mark_line(color='darkgray',
               point={'color': 'darkgray',
                      'size': 60},
               )
    .encode(tooltip=['site:O'],
            y=alt.Y('mean(escape):Q',
                    axis=alt.Axis(grid=False,
                                  title='escape',
                                  ),
                    ),
            )
    .transform_filter(highlight_condition | (select_condition_subtype & mean_selection))
    .add_selection(highlight_condition,
                   mean_selection,
                   )
    .properties(title={'text': 'mean escape over selected antibodies/sera or ' +
                               'all antibodies/sera of displayed types (see ' +
                               'radio button below)',
                       })
    )

# combine zoom bar, lines, and points
escape_plot = (zoom_bar & (escape_lines_points & escape_mean).resolve_scale(x='shared'))

escape_plot

Now combine the antibody MDS and escape plots:

In [15]:
chart = (
    (((mds_plot | legend_condition_type) & escape_plot) | legend_condition)
    .configure(padding={'left': 5,
                        'right': 60,
                        'top': 5,
                        'bottom': 5})
    .configure_view(strokeOpacity=0)
    )

chartfile = 'plots/chart.html'
os.makedirs(os.path.dirname(chartfile), exist_ok=True)
print(f"Saving chart to {chartfile}")
chart.save(chartfile)

chart

Saving chart to plots/chart.html
