# Interactive heat maps of effects of mutations
Use [Altair](https://altair-viz.github.io/) to create interactive heat maps of mutational effects with relevant annotations.

## Imports and read data


In [2]:
import math
import os
import altair as alt
import pandas as pd
import numpy as np
import yaml
alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

Read in the configuration file, and then read the input data files from that:

In [11]:
with open('config.yaml') as f:
    config = yaml.safe_load(f)
    
os.makedirs(config['figs_dir'], exist_ok=True)

print(f"Reading single-mutant effects from {config['single_mut_effects_file']}")
mut_effects = pd.read_csv(config['single_mut_effects_file'])
mut_effects.head()

Reading single-mutant effects from results/single_mut_effects/single_mut_effects.csv


Unnamed: 0,site_RBD,site_SARS2,wildtype,mutant,mutation,mutation_RBD,bind_lib1,bind_lib2,bind_avg,expr_lib1,expr_lib2,expr_avg
0,1,331,N,A,N331A,N1A,-0.05,-0.02,-0.03,-0.14,-0.08,-0.11
1,1,331,N,C,N331C,N1C,-0.08,-0.1,-0.09,-1.56,-0.97,-1.26
2,1,331,N,D,N331D,N1D,0.0,0.07,0.03,-0.75,-0.12,-0.44
3,1,331,N,E,N331E,N1E,0.02,-0.02,0.0,-0.39,-0.24,-0.31
4,1,331,N,F,N331F,N1F,-0.03,-0.16,-0.1,-0.83,-0.57,-0.7


Read annnotations of various properties of sites:

In [12]:
site_annotations = pd.read_csv('data/RBD_sites.csv')

# make sure consistent with site and wildtype data in mut_effects data frame
pd.testing.assert_frame_equal(
        mut_effects[['site_RBD', 'site_SARS2', 'wildtype']].drop_duplicates().reset_index(drop=True),
        site_annotations[['site_RBD', 'site_SARS2', 'amino_acid_SARS2']].rename(columns={'amino_acid_SARS2': 'wildtype'}),
        )
site_annotations['SARS-CoV-1/2 differences'] = site_annotations[['amino_acid_SARS2', 'amino_acid_SARS1']].apply(lambda x: False if x[0] == x[1] else True, axis=1)
# first few lines
site_annotations.head()

Unnamed: 0,site_RBD,amino_acid_SARS2,site_SARS2,amino_acid_SARS1,site_SARS1,chain_6M0J,codon_SARS2,amino_acid_RaTG13,amino_acid_GD_Pangolin,entropy,...,epitope_CR3022,epitope_VHH72,epitope_S230,epitope_m396,epitope_F26G19,epitope_80R,epitope_B38,epitope_S309,buried_downRBD,SARS-CoV-1/2 differences
0,1,N,331,N,318.0,,aat,N,N,0.0,...,False,False,False,False,False,False,False,False,True,False
1,2,I,332,I,319.0,,att,I,I,0.0,...,False,False,False,False,False,False,False,False,True,False
2,3,T,333,T,320.0,E,aca,T,T,0.0,...,False,False,False,False,False,False,False,False,False,False
3,4,N,334,N,321.0,E,aac,N,N,0.179256,...,False,False,False,False,False,False,False,True,False,False
4,5,L,335,L,322.0,E,ttg,L,L,1.468456,...,False,False,False,False,False,False,False,True,False,False


## Data frame for heat maps
This data frame needs to have the mutational effects along with the additional site annotations we'd like to show in interactive format:

In [13]:
# mut effects to keep and their names
mut_effects_to_keep = {'site_SARS2': 'site',
                       'mutant': 'mutant',
                       'wildtype': 'wildtype',
                       'mutation': 'mutation',
                       'bind_avg': 'ACE2_binding',
                       'expr_avg': 'expression',
                      }

# site annotations to keep and their names
site_annotations_to_keep = {'site_SARS2': 'site',
                            'RSA_bound': 'RSA_bound',
                            'Neff': 'Neff',
                            'SARS2_ACE2_contact': 'ACE2 contact SARS-CoV-2',
                            'SARS1_ACE2_contact': 'ACE2 contact SARS-CoV-1',
                            'amino_acid_SARS1': 'SARS-CoV-1 aa',
                            'amino_acid_RaTG13': 'RaTG13 aa',
                            'amino_acid_GD_Pangolin': 'GD-Pangolin aa',
                            'SARS-CoV-1/2 differences': 'SARS-CoV-1/2 differences',
                            'epitope_B38': 'epitope B38',
                            'epitope_S309': 'epitope S309',
                            'epitope_CR3022': 'epitope CR3022',
                            'epitope_VHH72': 'epitope VHH-72'
                           }

# conditions is the data shown in the heatmaps
# targets are the values in the dropdown menus
conditions = ['expression', 'ACE2 binding']
targets = ['all sites', 'ACE2 contact SARS-CoV-2', 'ACE2 contact SARS-CoV-1', 'SARS-CoV-1/2 differences',
           'epitope B38', 'epitope S309', 'epitope CR3022', 'epitope VHH-72']

def format_RSA(rsa):
    if pd.isnull(rsa):
        return 'n.a.'
    else:
        return f"{round(rsa * 100)}%"

df = (mut_effects[list(mut_effects_to_keep)]
      .rename(columns=mut_effects_to_keep)
      .query('(mutant != "*") and (wildtype != "*")')
      .merge(site_annotations[list(site_annotations_to_keep)]
             .rename(columns=site_annotations_to_keep),
             on='site')
     )
df['wildtype_code'] = (df[['wildtype', 'mutant']].apply(lambda x: 'x' if x[0] == x[1] else '', axis=1))
df['all sites'] = True
zoom_bar_info = df[['site', 'ACE2 contact SARS-CoV-2']]
df = pd.melt(df,
            id_vars=[x for x in df.columns.values if x not in targets],
            var_name='subset')
# remove sites/subset pairs without data
df = df.drop(df[df['value'] == False].index).drop(columns='value')
df = pd.merge(df, zoom_bar_info, on='site')

# clean up 
df = df.drop(columns=['wildtype']).drop_duplicates()
df = df.fillna(np.nan)
df.head()

Unnamed: 0,site,mutant,mutation,ACE2_binding,expression,RSA_bound,Neff,SARS-CoV-1 aa,RaTG13 aa,GD-Pangolin aa,wildtype_code,subset,ACE2 contact SARS-CoV-2
0,417,A,K417A,-0.35,0.03,0.191304,1.616775,V,K,R,,ACE2 contact SARS-CoV-2,True
20,417,C,K417C,-0.42,-0.05,0.191304,1.616775,V,K,R,,ACE2 contact SARS-CoV-2,True
40,417,D,K417D,-1.04,-0.33,0.191304,1.616775,V,K,R,,ACE2 contact SARS-CoV-2,True
60,417,E,K417E,-0.75,-0.25,0.191304,1.616775,V,K,R,,ACE2 contact SARS-CoV-2,True
80,417,F,K417F,-0.13,-0.11,0.191304,1.616775,V,K,R,,ACE2 contact SARS-CoV-2,True


## Create the interactive heat map

In [43]:
aa_order = ['R', 'K', 'H', 'D', 'E', 'Q', 'N', 'S', 'T', 'Y',
            'W', 'F', 'A', 'I', 'L', 'M', 'V', 'G', 'P', 'C', '*']

# SELECTIONS
# select the cell in the other heatmap
cell_selector = alt.selection_single(on='mouseover',
                                     empty='none')

# A dropdown filter
subset_dropdown = alt.binding_select(options=targets)
subset_select = alt.selection_single(fields=['subset'],
                                     bind=subset_dropdown,
                                     name=" ",
                                     init={'subset': 'all sites'})
# zoom brush
zoom_brush = alt.selection_interval(encodings=['x'], mark=alt.BrushConfig(stroke='black',
                                                                          strokeWidth=2))

# PLOTS
# zoom bar at the bottom
zoom_bar = (alt.Chart(df)
            .mark_rect()
            .encode(x='site:O',
                    color=alt.Color('ACE2 contact SARS-CoV-2', 
                                    scale=alt.Scale(domain=[True, False],  
                                                    range=['lightgrey',
                                                           'darkgrey']),
                                   legend=alt.Legend(orient='bottom')))
            .add_selection(zoom_brush)
            .add_selection(subset_select)
            .transform_filter(subset_select)
            .properties(width=900,
                        title='zoom bar'))

# heatmaps. made of three charts: base heatmap, wildtype 'X', missing data
def baseChart(data, metric):
    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x='site:O',
                    y=alt.Y('mutant:O',
                            sort=aa_order)
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric,
                                       type='quantitative', 
                                       scale=alt.Scale(scheme='redblue',
                                                       domain=[data[metric].min(),
                                                               data[metric].max()],
                                                       domainMid=0),
                                       legend=alt.Legend(orient='left',
                                                         title='')),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(2),
                                                 alt.value(0)),
                       tooltip=[c for c in data.columns if c not in
                                {'site', 'wildtype', 'mutant', 'wildtype_code', 'subset'}]
                      )
              )
    
    wildtype = (base
                .mark_text(color='black')
                .encode(text=alt.Text('wildtype_code:N')
                       )
               )

    nulls = (base
             .mark_rect()
             .transform_filter(f"!isValid(datum.{metric})")
             .mark_rect(opacity=0.5)
             .encode(alt.Color(f'{metric}:N',
                               scale=alt.Scale(scheme='greys'),
                               legend=alt.Legend(title=' ',
                                                 orient='left'))
                    )
            )
    
    return ((heatmap + nulls + wildtype)
            .interactive()
            .add_selection(subset_select)  # add dropdown menu
            .add_selection(cell_selector)  # tooltip highlighter
            .transform_filter(subset_select)  # add dropdown filtering
            .transform_filter(zoom_brush)  # add zoom bar filtering
            .properties(height=250, title=' '.join(metric.split('_'))))

chart = (alt.vconcat(baseChart(df, 'expression'),
                     baseChart(df, 'ACE2_binding'))
         .resolve_scale(color='independent')
        )

chart = (alt.vconcat(chart, zoom_bar)
         .properties(title={'text': ['SARS-CoV-2 RBD DMS data'],
                            'subtitle': ['instructions:',
                                         'hover over squares to reveal tooltip information',
                                         'select site sets using the dropdown',
                                         'change x-axis domain by brushing zoom bar, dragging brush',
                                         'to clear reset view, double click on zoom bar']})
         .configure_title(anchor='start',fontSize=20))
chart

In [6]:
print(f"Saving chart to {config['interactive_heatmap']}")
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

chart.save(config['interactive_heatmap'])

Saving chart to results/interactive_heatmap.html
