# Make paper figures

In this notebook, we'll take the QC data, mutation preferences, and mutation effects to make figures for publication.

In [1]:
import os
import altair as alt
import pandas as pd

Configure the plots:

In [82]:
heatmap_label_font_size = 22
heatmap_title_font_size = 30
heatmap_wildtype_font_size = 28
aa_order = ['R', 'K', 'H', 'D', 'E', 'Q', 'N', 'S', 'T', 'Y',
            'W', 'F', 'A', 'I', 'L', 'M', 'V', 'G', 'P', 'C', '*']

## Annotations

The number is with respect to the reference, not sequential numbering.

In [86]:
# Anything starting with "sub" is a binding pocket from the ASAP consortium
annotation_dict = {
    "subP1": ["(NS3) 151", "(NS3) 161", "(NS3) 129", "(NS3) 160", "(NS3) 150", "(NS3) 130", "(NS3) 131", "(NS3) 132", "(NS3) 135", "(NS3) 134"],
    "subP1_prime": ["(NS3) 51", "(NS3) 52", "(NS3) 36", "(NS3) 35", "(NS3) 133"],
    "subP2": ["(NS3) 152", "(NS3) 83", "(NS3) 75", "(NS3) 81", "(NS3) 82", "(NS3) 72"],
    "subP3": ["(NS3) 153", "(NS3) 154", "(NS3) 155"],
    "catalytic_triad": ["(NS3) 51", "(NS3) 75", "(NS3) 135"]
}

# Convert dictionary to a long format DataFrame
annotations_df = pd.DataFrame([
    {"feature": feature, "reference_site": site}
    for feature, sites in annotation_dict.items()
    for site in sites
])

# Get the names of what we're annotating
annotations = [key for key in annotation_dict.keys()]

## Heatmap of mutation preferences

A heatmap of mutation preference from `dms_tools2`. The heatmap is divided into three parts the same size of each tile. We're **including stop codons** in this heatmap. It's clear that in *most* cases, the most preferred mutation is the wildtype.

In [3]:
effects_and_preferences_with_stops = pd.read_csv("results/summary/all_tiles_effects_and_preferences_with_stops.csv")
effects_and_preferences_with_stops['wildtype_code'] = (effects_and_preferences_with_stops[['wildtype', 'mutant']].apply(lambda x: 'x' if x[0] == x[1] else '', axis=1))
effects_and_preferences_with_stops.head()

Unnamed: 0,site,wildtype,mutant,mutation,effect,log2effect,tile,preference,reference_site,sequential_site,protein_site,protein,wildtype_code
0,1,R,*,R1*,0.001401,-9.4799,tile_1,0.00118,(NS2B) 0,1,0,NS2B,
1,1,R,A,R1A,0.003133,-8.3181,tile_1,0.00264,(NS2B) 0,1,0,NS2B,
2,1,R,C,R1C,0.007928,-6.9788,tile_1,0.00668,(NS2B) 0,1,0,NS2B,
3,1,R,D,R1D,0.008225,-6.9258,tile_1,0.00693,(NS2B) 0,1,0,NS2B,
4,1,R,E,R1E,0.004463,-7.8079,tile_1,0.00376,(NS2B) 0,1,0,NS2B,


In [97]:

# Make a heatmap for each tile in the data
tile_ranges = [list(range(1,104)), list(range(104, 207)), list(range(207, 309))]
preference_heatmaps = []
for tile_range in tile_ranges:
    plot_data = effects_and_preferences_with_stops
    plot_data = effects_and_preferences_with_stops[effects_and_preferences_with_stops['site'].isin(tile_range)]

    # Make the base plot
    base = alt.Chart(plot_data).encode(
        x = alt.X('reference_site',
                  sort=alt.EncodingSortField(field='site', order='ascending'),
                  axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Site", titleFontSize=heatmap_title_font_size)),
        y = alt.Y('mutant:O', 
                  sort=aa_order,
                  axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Mutation", titleFontSize=heatmap_title_font_size)),
        )
    
    # Make the color scale
    color = base.mark_rect(stroke='black').encode(
        color=alt.Color('preference:Q', title="Preference", scale = alt.Scale(
            scheme='lightgreyteal'), legend=alt.Legend(
        labelFontSize=heatmap_label_font_size,
        titleFontSize=heatmap_title_font_size,
    )
            ),
        tooltip=[alt.Tooltip('preference', title="Preference"), alt.Tooltip('wildtype', title="Wildtype"), alt.Tooltip('mutant', title="Mutant")]

    )

    # Add a black 'x' to the wildtype amino acids 
    wildtype = base.mark_text(
        color='black',
        fontSize=heatmap_wildtype_font_size
    ).encode(
        text=alt.Text('wildtype_code:N', )
    )

    heatmap = color + wildtype
        
    # Combine the heatmap and the wildtype amino acid annotation
    preference_heatmaps.append(heatmap)

# Combine all the heatmaps into a single chart
preference_heatmaps = alt.vconcat(*preference_heatmaps).configure_legend(
                   strokeColor='gray',
                   padding=10,
                   cornerRadius=10,
                   labelLimit = 500)

preference_heatmaps.display()

In [5]:
preference_heatmaps.save("results/summary/preference_heatmap_with_stops.html")

How many sites "prefer" the wildtype amino acid? It's sufficient to report this in the paper rather than showing the plot.

## Heatmap of mutation effects

Mutation effects are the ratio of the preference for any given mutation to the preference for the wildtype amino acid. We take the log of this value so that wildtype effects are 0, negative values are worse than wildtype, and positive values are better than wildtype. We're including a plot with and without stop codons. We want to make sure that stop codons are deleterious, but that figure is better suited for the supplement.

### Without stop codons

In [6]:
effects_and_preferences = pd.read_csv("results/summary/all_tiles_effects_and_preferences.csv")
effects_and_preferences['wildtype_code'] = (effects_and_preferences[['wildtype', 'mutant']].apply(lambda x: 'x' if x[0] == x[1] else '', axis=1))
effects_and_preferences.head()

Unnamed: 0,site,wildtype,mutant,mutation,effect,log2effect,tile,preference,reference_site,sequential_site,protein_site,protein,wildtype_code
0,1,R,A,R1A,0.003142,-8.3143,tile_1,0.00265,(NS2B) 0,1,0,NS2B,
1,1,R,C,R1C,0.007931,-6.9783,tile_1,0.00669,(NS2B) 0,1,0,NS2B,
2,1,R,D,R1D,0.008227,-6.9253,tile_1,0.00694,(NS2B) 0,1,0,NS2B,
3,1,R,E,R1E,0.004469,-7.8057,tile_1,0.00377,(NS2B) 0,1,0,NS2B,
4,1,R,F,R1F,0.009128,-6.7754,tile_1,0.0077,(NS2B) 0,1,0,NS2B,


In [7]:
effect_heatmaps = []
effect_min = effects_and_preferences['log2effect'].min()
effect_max = effects_and_preferences['log2effect'].max()
scale_domain = [-max(abs(effect_min), abs(effect_max)), max(abs(effect_min), abs(effect_max))]
for tile_range in tile_ranges:
    plot_data = effects_and_preferences
    plot_data = effects_and_preferences[effects_and_preferences['site'].isin(tile_range)]
    
    # Make the base plot
    base = alt.Chart(plot_data).encode(
        x = alt.X('reference_site',
            sort=alt.EncodingSortField(field='site', order='ascending'),
            axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Site", titleFontSize=heatmap_title_font_size)),
        y = alt.Y('mutant:O', 
                  sort=aa_order,
                  axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Mutation", titleFontSize=heatmap_title_font_size)),
        )
    
    # Make the color scale
    color = base.mark_rect(stroke='black').encode(
        color=alt.Color('log2effect:Q', title="Log2(Effect)", scale = alt.Scale(domain=scale_domain,
            scheme='redblue'), legend=alt.Legend(
        labelFontSize=heatmap_label_font_size,
        titleFontSize=heatmap_title_font_size,
    )
            ),
        tooltip=[alt.Tooltip('log2effect', title="Log2(Effect)"), alt.Tooltip('wildtype', title="Wildtype"), alt.Tooltip('mutant', title="Mutant")]
    )

    # And a black 'x' to the wildtype amino acids 
    wildtype = base.mark_text(
        color='black',
        fontSize=heatmap_wildtype_font_size
    ).encode(
        text=alt.Text('wildtype_code:N', )
    )

    heatmap = color + wildtype
        
    # Combine the heatmap and the wildtype amino acid annotation
    effect_heatmaps.append(heatmap)

# Combine all the heatmaps into a single chart
effect_heatmaps = alt.vconcat(*effect_heatmaps).configure_legend(
                   strokeColor='gray',
                   padding=10,
                   cornerRadius=10,
                   labelLimit = 500)

effect_heatmaps.display()

In [8]:
effect_heatmaps.save("results/summary/effects_heatmap_without_stops.html")

### With stop codons

In [9]:
effect_with_stop_heatmaps = []
effect_min = effects_and_preferences_with_stops['log2effect'].min()
effect_max = effects_and_preferences_with_stops['log2effect'].max()
scale_domain = [-max(abs(effect_min), abs(effect_max)), max(abs(effect_min), abs(effect_max))]
for tile_range in tile_ranges:
    plot_data = effects_and_preferences_with_stops
    plot_data = effects_and_preferences_with_stops[effects_and_preferences_with_stops['site'].isin(tile_range)]
    
    # Make the base plot
    base = alt.Chart(plot_data).encode(
        x = alt.X('reference_site',
                  sort=alt.EncodingSortField(field='site', order='ascending'),
                  axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Site", titleFontSize=heatmap_title_font_size)),
        y = alt.Y('mutant:O', 
                  sort=aa_order,
                  axis=alt.Axis(labelFontSize=heatmap_label_font_size, title="Mutation", titleFontSize=heatmap_title_font_size)),
        )
    
    # Make the color scale
    color = base.mark_rect(stroke='black').encode(
        color=alt.Color('log2effect:Q', title="Log2(Effect)", scale = alt.Scale(domain=scale_domain,
            scheme='redblue'), legend=alt.Legend(
        labelFontSize=heatmap_label_font_size,
        titleFontSize=heatmap_title_font_size,
    )
            ),
        tooltip=[alt.Tooltip('log2effect', title="Log2(Effect)"), alt.Tooltip('wildtype', title="Wildtype"), alt.Tooltip('mutant', title="Mutant")]
    )

    # And a black 'x' to the wildtype amino acids 
    wildtype = base.mark_text(
        color='black',
        fontSize=heatmap_wildtype_font_size
    ).encode(
        text=alt.Text('wildtype_code:N', )
    )

    heatmap = color + wildtype
        
    # Combine the heatmap and the wildtype amino acid annotation
    effect_with_stop_heatmaps.append(heatmap)

# Combine all the heatmaps into a single chart
effect_with_stop_heatmaps = alt.vconcat(*effect_with_stop_heatmaps).configure_legend(
                   strokeColor='gray',
                   padding=10,
                   cornerRadius=10,
                   labelLimit = 500)

effect_with_stop_heatmaps.display()

In [10]:
effect_with_stop_heatmaps.save("results/summary/effects_heatmap_with_stops.html")