# Interactive heat maps of effects of mutations
Use [Altair](https://altair-viz.github.io/) to create interactive heat maps of mutational effects with relevant annotations.

## Imports and read data
Import Python modules:

In [1]:
import math
import os

import altair as alt

import pandas as pd

import yaml

Read in the configuration file, and then read the input data files from that:

In [2]:
with open('config.yaml') as f:
    config = yaml.safe_load(f)
    
os.makedirs(config['figs_dir'], exist_ok=True)

print(f"Reading single-mutant effects from {config['single_mut_effects_file']}")
mut_effects = pd.read_csv(config['single_mut_effects_file'])
mut_effects.head()

Reading single-mutant effects from results/single_mut_effects/single_mut_effects.csv


Unnamed: 0,site_RBD,site_SARS2,wildtype,mutant,mutation,mutation_RBD,bind_lib1,bind_lib2,bind_avg,expr_lib1,expr_lib2,expr_avg
0,1,331,N,A,N331A,N1A,-0.05,-0.02,-0.03,-0.14,-0.08,-0.11
1,1,331,N,C,N331C,N1C,-0.08,-0.1,-0.09,-1.56,-0.97,-1.26
2,1,331,N,D,N331D,N1D,0.0,0.07,0.03,-0.75,-0.12,-0.44
3,1,331,N,E,N331E,N1E,0.02,-0.02,0.0,-0.39,-0.24,-0.31
4,1,331,N,F,N331F,N1F,-0.03,-0.16,-0.1,-0.83,-0.57,-0.7


Read annnotations of various properties of sites:

In [3]:
site_annotations = pd.read_csv('data/RBD_sites.csv')

# make sure consistent with site and wildtype data in mut_effects data frame
pd.testing.assert_frame_equal(
        mut_effects[['site_RBD', 'site_SARS2', 'wildtype']].drop_duplicates().reset_index(drop=True),
        site_annotations[['site_RBD', 'site_SARS2', 'amino_acid_SARS2']].rename(columns={'amino_acid_SARS2': 'wildtype'}),
        )

# first few lines
site_annotations.head()

Unnamed: 0,site_RBD,amino_acid_SARS2,site_SARS2,amino_acid_SARS1,site_SARS1,chain_6M0J,codon_SARS2,amino_acid_RaTG13,amino_acid_GD_Pangolin,RBM,...,SARS1_key_adaptation,epitope_CR3022,epitope_VHH72,epitope_S230,epitope_m396,epitope_F26G19,epitope_80R,epitope_B38,epitope_S309,buried_downRBD
0,1,N,331,N,318.0,,aat,N,N,False,...,False,False,False,False,False,False,False,False,False,True
1,2,I,332,I,319.0,,att,I,I,False,...,False,False,False,False,False,False,False,False,False,True
2,3,T,333,T,320.0,E,aca,T,T,False,...,False,False,False,False,False,False,False,False,False,False
3,4,N,334,N,321.0,E,aac,N,N,False,...,False,False,False,False,False,False,False,False,True,False
4,5,L,335,L,322.0,E,ttg,L,L,False,...,False,False,False,False,False,False,False,False,True,False


## Data frame for heat maps
This data frame needs to have the mutational effects along with the additional site annotations we'd like to show in interactive format:

In [4]:
# mut effects to keep and their names
mut_effects_to_keep = {
        'site_SARS2': 'site_SARS2',
        'mutant': 'mutant',
        'wildtype': 'wildtype',
        'mutation': 'mutation',
        'bind_avg': 'ACE2 binding',
        'expr_avg': 'expression',
        #'bind_lib1': 'ACE2 binding (library 1)',
        #'bind_lib2': 'ACE2 binding (library 2)',
        #'expr_lib1': 'expression (library 1)',
        #'expr_lib2': 'expression (library 2)',
        }

# site annotations to keep and their names
site_annotations_to_keep = {
        'site_SARS2': 'site_SARS2',
        'RSA_bound': 'RSA +ACE2',
        'RSA_unbound': 'RSA -ACE2',
        'SARS2_ACE2_contact': 'ACE2 contact',
        #'SARS1_ACE2_contact': 'ACE2 contact in SARS-CoV-1',
        'amino_acid_SARS1': 'SARS-CoV-1 aa',
        'amino_acid_RaTG13': 'RaTG13 aa',
        'amino_acid_GD_Pangolin': 'GD-Pangolin aa',
        }

def format_RSA(rsa):
    if pd.isnull(rsa):
        return 'n.a.'
    else:
        return f"{round(rsa * 100)}%"

heatmap_data = (
    mut_effects
    [list(mut_effects_to_keep)]
    .rename(columns=mut_effects_to_keep)
    .query('(mutant != "*") and (wildtype != "*")')
    .merge(site_annotations[list(site_annotations_to_keep)]
                           .assign(RSA_bound=lambda x: x['RSA_bound'].map(format_RSA),
                                   RSA_unbound=lambda x: x['RSA_unbound'].map(format_RSA),
                                   )
                           .rename(columns=site_annotations_to_keep),
           on='site_SARS2')
    )

heatmap_data

Unnamed: 0,site_SARS2,mutant,wildtype,mutation,ACE2 binding,expression,RSA +ACE2,RSA -ACE2,ACE2 contact,SARS-CoV-1 aa,RaTG13 aa,GD-Pangolin aa
0,331,A,N,N331A,-0.03,-0.11,n.a.,n.a.,False,N,N,N
1,331,C,N,N331C,-0.09,-1.26,n.a.,n.a.,False,N,N,N
2,331,D,N,N331D,0.03,-0.44,n.a.,n.a.,False,N,N,N
3,331,E,N,N331E,0.00,-0.31,n.a.,n.a.,False,N,N,N
4,331,F,N,N331F,-0.10,-0.70,n.a.,n.a.,False,N,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...
4015,531,S,T,T531S,0.01,0.02,n.a.,n.a.,False,T,T,T
4016,531,T,T,T531T,0.00,0.00,n.a.,n.a.,False,T,T,T
4017,531,V,T,T531V,0.01,-0.06,n.a.,n.a.,False,T,T,T
4018,531,W,T,T531W,-0.02,-0.08,n.a.,n.a.,False,T,T,T


## Create the interactive heat map

In [5]:
# amino acids ordered by physicochemical property for plotting
aa_order = ['R', 'K', 'H', 'D', 'E', 'Q', 'N', 'S', 'T', 'Y',
            'W', 'F', 'A', 'I', 'L', 'M', 'V', 'G', 'P', 'C', '*']

cell_selector = alt.selection_single(on='mouseover', nearest=True)

heatmap = (
    alt.Chart(heatmap_data)
    .mark_rect()
    .encode(
        x='site_SARS2:O',
        y=alt.Y('mutant:O', sort=aa_order),
        color=alt.Color(alt.repeat('row'), type='quantitative'),
        stroke=alt.value('black'),
        strokeWidth=alt.condition(cell_selector, alt.value(2), alt.value(0)),
        tooltip=[c for c in heatmap_data.columns if c not in {'site_SARS2', 'wildtype', 'mutant'}],
        )
    .properties(width=3000, height=300)
    .repeat(row=['expression', 'ACE2 binding'])
    .add_selection(cell_selector)
    .interactive()
    )

heatmap

Save the chart:

In [6]:
print(f"Saving chart to {config['interactive_heatmap']}")
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

heatmap.save(config['interactive_heatmap'])

Saving chart to results/interactive_heatmap.html
