# Heatmaps 

This notebook generates heatmaps for the paper figures. 

The code for generating the heatmaps was written by Brendan Larsen and then modified by Caelan Radford. 

Import modules:

In [1]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys
import polyclonal
import natsort

alphabet = polyclonal.alphabets.AAS
alphabet = polyclonal.alphabets.biochem_order_aas(alphabet)

figure_palette = ['#0072B2',  '#E69F00', '#F0E442', '#009E73','#56B4E9', "#D55E00", "#CC79A7", '#9F0162','#8400CD'] 
figure_palette = ['#000000',  '#000000', '#000000', '#000000','#000000', "#000000", "#000000", '#000000','#000000'] # This is used for paper figures
#figure_palette = ['#FFFFFF',  '#FFFFFF', '#FFFFFF', '#FFFFFF','#FFFFFF', "#FFFFFF", "#FFFFFF", '#FFFFFF','#FFFFFF']

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

if not os.path.exists('./results/heatmaps/'):
    os.makedirs('./results/heatmaps/')

Brendan's heatmap function, with some modifications by Caelan: 

In [2]:
#heatmap code from Brendan    
def prepare_top_label():  # need to prepare top_label data for plotting on heatmap
    df = site_numbering_map.copy()
    df['site'] = df['reference_site']
    df['wildtype'] = ''
    df['type'] = "region"
    df['mutant'] = "region"
    df['value'] = df['region']
    df = df[['site', 'wildtype', 'type', 'mutant', 'value']].copy()
    return df
    
# This gets called during heatmap generation. Makes a dataframe with the site, mutant, and value for each site, regardless if it was measured in DMS or not.
def make_empty_df(
    df,
    top_label_df=None,
    top_label_flag=None,
    low_entry_df=None,
    escape_flag=None,
):
    sites = []
    for x in ranges:
        sites.extend(x) #define the range of sites in the RBP to make the empty dataframe
    amino_acids = alphabet
    # Create the combination of each site with each amino acid
    data = [{"site": str(site), "mutant": aa} for site in sites for aa in amino_acids]

    # Create the DataFrame
    empty_df = pd.DataFrame(data)

    if escape_flag:
        print("plotting escape")
        if low_entry_df is None:
            print("You indicated escape but did not provide a low_entry_df")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["escape_median"],
            var_name="type",
            value_name="value",
        )
        low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
        df_filter = low_entry_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["low_effect"],
            var_name="type",
            value_name="value",
        )
        df_test = pd.concat([df_test, df_filter])
    else:
        print("plotting entry")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["effect"],
            var_name="type",
            value_name="value",
        )

    if top_label_flag is True:
        df_test = pd.concat([df_test, top_label_df], ignore_index=True)
    return df_test

# Make the base heatmap. This contains information about the x_axis and heatmap_sites which are important for sorting them correctly.
def make_base_heatmap(df, heatmap_sites, x_axis):
    base = (
        alt.Chart(df)
        .encode(
            x=alt.X("site:O", title="Site", sort=heatmap_sites, axis=x_axis),
            y=alt.Y(
                "mutant",
                title="Amino Acid",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
                axis=alt.Axis(grid=False),
            ),
        )
        .properties(
            width=alt.Step(10),
            height=alt.Step(10),
        )
    )
    return base


# This makes an 'empty' heatmap that shows sites that were not observed as some color (default:gray)
def make_empty_heatmap(base, background_color):
    chart_empty = (
        base.mark_rect(color=background_color)
        .encode(tooltip=["site", "mutant"])
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == "escape_median"))
            & (alt.datum.value == None)
        )
    )
    return chart_empty


# This makes the white squares and X for the wildtype amino acids
def make_wildtype_heatmap(unique_wildtypes_df, strokewidth_size, heatmap_sites):
    wildtype_layer_box = (
        alt.Chart(unique_wildtypes_df)
        .mark_rect(color="white", stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_median"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    wildtype_layer = (
        alt.Chart(unique_wildtypes_df)
        .mark_text(color="black", text="X", size=8, align="center", baseline="middle",dy=1)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_median"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    return wildtype_layer_box, wildtype_layer


# This makes the actual effect heatmap, and adds a bar for the legend if its the first time through the loop
def create_effect_chart(
    base, color_scale_effect, strokewidth_size, legend_title=None, effect_legend=None
):
    chart = (
        base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            color=alt.condition(
                '(datum.type == "effect" | datum.type == "binding_mean" | datum.type == "escape_median")',
                alt.Color("value:Q", scale=color_scale_effect, legend=effect_legend),
                alt.value("transparent"),
            ),
            tooltip=["site", "mutant", "wildtype", "value"],
        )
        .transform_filter((alt.datum.wildtype != "") & (alt.datum.wildtype != None))
    )
    return chart

# This makes a chart for the top_label values
def create_top_label_chart(
    base, color_scale_top_label, strokewidth_size, legend_title=None, top_label_legend=None
):
    chart = base.mark_rect(stroke="#000000", strokeWidth=0).encode(
        color=alt.condition(
            'datum.mutant == "region"',
            alt.Color("value:N", scale=color_scale_top_label, legend=top_label_legend),
            alt.value("transparent"),
        ),
        tooltip=["site", "mutant", "wildtype", "value"],
    )
    return chart

# Masks sites with low cell entry scores for binding or escape
def make_low_effect_heatmap(base, strokewidth_size, heatmap_sites):
    chart_filtered = (
        base.mark_rect(color="#808285", stroke="#000000", strokeWidth=strokewidth_size) ##939598
        .encode()
        .transform_filter(alt.datum.type == "low_effect")
    )
    return chart_filtered


# This compiles all the different charts and returns a single chart
def compile_chart(
    df,
    heatmap_sites,
    unique_wildtypes_df,
    x_axis,
    background_color,
    color_scale_effect,
    color_scale_top_label,
    strokewidth_size=None,
    legend_title=None,
    effect_legend=None,
    top_label_legend=None,
    escape_flag=None,
):
    base = make_base_heatmap(df, heatmap_sites, x_axis)
    chart_empty = make_empty_heatmap(base, background_color)
    chart_effect = create_effect_chart(
        base, color_scale_effect, strokewidth_size, legend_title, effect_legend
    )
    chart_top_label = create_top_label_chart(
        base, color_scale_top_label, strokewidth_size, legend_title, top_label_legend
    )
    wildtype_layer_box, wildtype_layer = make_wildtype_heatmap(
        unique_wildtypes_df, strokewidth_size, heatmap_sites
    )
    
    if escape_flag:
        print("Now making dataframe for escape")
        low_entry_heatmap = make_low_effect_heatmap(
            base, strokewidth_size, heatmap_sites
        )
        chart = alt.layer(
            chart_empty,
            chart_effect,
            low_entry_heatmap,
            chart_top_label,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")
    else:
        chart = alt.layer(
            chart_empty,
            chart_effect,
            chart_top_label,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")

    return chart

def plot_entry_heatmap(
    df,
    legend_title,
    x_labels,
    null_color=None,
    ranges=None,
    effect_color=None,
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    x_label_freq=10,
    top_label_flag=None,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=None,
    escape_flag=None,
    custom_domain=None,
    subtitle_str=None,
):
    """
    Generates a customizable heatmap for deep mutational scanning (DMS) data visualization.

    Parameters:
    - df (DataFrame): The data frame containing the data to be visualized. It must include the columns 'site', 'mutant', 'value', and 'wildtype'.
    - legend_title (str): The title of the heatmap legend.
    - null_color (str, optional): Color for mutants with no observations. Default is 'gray'.
    - ranges (list of tuples, optional): Defines the ranges for site wrapping on the heatmap. If not provided, a default range is used.
    - effect_color (str, optional): Color scheme for effect values. Default is 'red-blue'.
    - top_label_color (str, optional): Color scheme for top_label values. Default is 'purples'.
    - strokewidth_size (float, optional): The width of the stroke used in the heatmap. Default size is not specified.
    - custom_y_axis_order (list, optional): Specifies a custom order for the y-axis, overriding the default amino acid order.
    - x_label_freq (int, optional): Frequency of x tick labels; label is added for every x residues.
    - top_label_flag (bool, optional): If True, sequence top_label is included in the heatmap. Default is False.
    - contact_flag (bool, optional): If True, contact sites are included in the heatmap. Default is False.
    - specific_sites (list, optional): Specifies a subset of sites to be plotted. If None, all sites are plotted using wrapping. Default is None.
    - specific_sites_name (str, optional): A title to display at the top of the heatmap for specific sites. Default is None.
    - low_entry_df (DataFrame,optional): If given, will use different color to show sites with low entry scores (Used for Binding Score Heatmaps)
    - binding_flag (bool, optional): If True, will plot binding instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - custom_domain (list, optional): Give custom domain used for coloring. If None, will use default [-4,2.5]
    - escape_flag (bool,optional): If True, will plot escape instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - escape_distance (DataFrame, optional): give distance file for residues
    - subtitle_str (str, optional): Subtitle for the heatmap. Default is None.

    Returns:
    An Altair chart object representing the generated heatmap. This chart can be further customized or directly displayed in Jupyter notebooks or other compatible environments.
    """

    if top_label_flag is True:
        top_label_df = prepare_top_label()
    else:
        top_label_df = None

    # Make the dataframes for plotting. This calls the code in the cell above.
    empty_df = make_empty_df(
        df,
        top_label_df=top_label_df,
        top_label_flag=top_label_flag,
        low_entry_df=low_entry_df,
        escape_flag=escape_flag,
    )

    # Define the base order list. What order do you want the amino acids to appear?
    base_order = [x for x in alphabet]
    #base_order = alphabet

    # Initialize custom_order with custom_y_axis_order or base_order based on custom_y_axis_order's value
    custom_order = (
        custom_y_axis_order if custom_y_axis_order is not None else base_order
    )
    # Prepend conditions based on flags
    if top_label_flag:
        # Only top_label_flag is true, prepend "top_label"
        custom_order = ["top_label"] + custom_order

    # Optional parameters
    if null_color is None:
        background_color = "#d1d3d4"
    else:
        background_color = null_color

    # Sites for wrapping heatmap correctly. In this case, I did DMS on sites 71-603, so I wrap the heatmap with the specified sites.
    if ranges is None:
        full_ranges = [
            list(range(start, end))
            for start, end in [
                (71, 181),
                (181, 291),
                (291, 401),
                (401, 511),
                (511, 603),
            ]
        ]
    else:
        full_ranges = ranges

    if subtitle_str:
        subtitle_str = subtitle_str
    else:
        subtitle_str = ""

    # effect_color
    if custom_domain:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=custom_domain
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=custom_domain
            )
    else:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=[-4, 2.5] #default color scale for effects
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=[-4, 2.5] 
            )

    # top_label_color
    if top_label_color is None:
        color_scale_top_label = alt.Scale(
            domain=regions,
        range = figure_palette) #default color scale for top_label
    else:
        color_scale_top_label = alt.Scale(
            scheme=top_label_color, domain=[0, 1.5], reverse=True
        )
    
    # strokewidth size
    if strokewidth_size is None:
        strokewidth_size = 0.25 #default stroke width size
    else:
        strokewidth_size = strokewidth_size

    def determine_sorting_order(df):
        # Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
        final_df = df
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        final_df["mutant_rank"] = final_df["mutant"].map(sort_order)
        # Map the 'mutant' column to these ranks
        # Now sort the dataframe by this rank
        final_df = final_df.sort_values("mutant_rank")
        sites = natsort.natsorted(final_df["site"].unique())#, key=lambda x: float(x))
        return final_df, sites, sort_order

    heatmap_df, heatmap_sites, sort_order = determine_sorting_order(empty_df) #call function to sort the dataframe.

    # container to hold the charts during the loop
    charts = []

    if specific_sites: #if specific sites are given, plot only those sites in the heatmaps
        # Filter the heatmap to only show certain sites
        subset_df = heatmap_df[heatmap_df["site"].isin(specific_sites)]

        # Need to do independently for wildtype here for individual sites
        unique_wildtypes_df = subset_df.drop_duplicates(
            subset=["site", "wildtype"])
        unique_wildtypes_df = unique_wildtypes_df.sort_values("site")
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        unique_wildtypes_df["mutant_rank"] = unique_wildtypes_df["wildtype"].map(
            sort_order
        )
        unique_wildtypes_df = unique_wildtypes_df.sort_values("mutant_rank")

        # Setup the legend
        effect_legend = alt.Legend(
            title=legend_title,
            titleAlign="left",
            tickCount=3,
            gradientLength=75,
            titleAnchor="start",
            offset=5,
            titlePadding=2,
            titleOrient='top',
            gradientThickness=10,
        )
        top_label_legend = (
            alt.Legend(title="Region", values=regions)#subset_df['region'].unique().tolist())
            if top_label_flag is True
            else None
        )

        # Setup x-axis labeling. We want the labels to be vertical.
        x_axis = alt.Axis(
            labelAngle=-90,
            title="Site",
            labels=True,
        )
        # Run the main heatmap compiler function. This is where the magic happens. Code for this function in the cell above.
        chart = compile_chart(
            subset_df,
            heatmap_sites,
            unique_wildtypes_df,
            x_axis,
            background_color,
            color_scale_effect=color_scale_effect,
            color_scale_top_label=color_scale_top_label,
            strokewidth_size=strokewidth_size,
            legend_title=legend_title,
            effect_legend=effect_legend,
            top_label_legend=top_label_legend,
            escape_flag=escape_flag,
        )
        # Since this is a single chart, I don't know why I need to do this, but I seem to get errors if I don't append and then do alt.vconcat below. I get why I need to do this for multiple heatmaps in a for loop, but not here. Leaving in.
        charts.append(chart)
        if specific_sites_name:
            specific_sites_name = specific_sites_name
        else:
            specific_sites_name = ""
        combined_charts = alt.vconcat(*charts, title=specific_sites_name).resolve_scale(
            y="shared", x="shared", color="shared"
        )
        return combined_charts
    else:
        for idx, subset in enumerate(full_ranges): #if we didnt specify specific sites to be plotted, then it assumes you want the full range of sites to be wrapped in the heatmap.
            # Flags for showing the legend only the first time
            subset_df = heatmap_df[
                heatmap_df["site"].isin(subset)
            ]  # for the wrapping of sites
            unique_wildtypes_df = subset_df.drop_duplicates(
                subset=["site", "wildtype"]
            )  # for the wildtype mapping

            # Keep track of where in the loop we are for plotting
            is_last_plot = idx == len(full_ranges) - 1

            #define the legend for the effect values
            effect_legend = (
                alt.Legend(
                    title=legend_title,
                    direction="vertical",
                    orient='right',
                    gradientLength=200,
                    titleAnchor="middle",
                    tickCount=3,
                    labelAlign="left",
                    titleAlign='center',
                    titleOrient='left',
                    offset=50,
                    titleFontSize=14,  # Increase the title font size
                    labelFontSize=14,  # Increase the label font size
                    symbolSize=150,    # Increase the size of the legend symbols
                )
                if is_last_plot
                else None
            )
            top_label_legend = (
                alt.Legend(
                    title="Region",
                    orient="left",
                    direction="vertical",
                    titleOrient="left",
                    offset=-1200,
                    #gradientLength=100,
                    #titleAnchor="middle",
                    values=regions,
                    #labelAlign="center",
                    titleFontSize=16,  # Increase the title font size
                    labelFontSize=14,  # Increase the label font size
                    symbolSize=100,    # Increase the size of the legend symbols
                )
                if is_last_plot and top_label_flag is True
                else None
            )
            # Setup x-axis labeling. We want the labels to be vertical. Only show the labels every 10 sites. Only show the axis title on the last row.
            x_axis = alt.Axis(
                labelAngle=-90,
                values=x_labels,
                #labelExpr=f"datum.value % {x_label_freq} === 0 ? datum.value : ''",  #NOTE: MAKE THIS A SETTING
                title="Site" if is_last_plot else None,
                labels=True,
            )
            #This is where the magic happens. Code for this function in the cell above.
            chart = compile_chart( 
                subset_df,
                heatmap_sites,
                unique_wildtypes_df,
                x_axis,
                background_color,
                color_scale_effect=color_scale_effect,
                color_scale_top_label=color_scale_top_label,
                strokewidth_size=strokewidth_size,
                legend_title=legend_title,
                effect_legend=effect_legend,
                top_label_legend=top_label_legend,
                escape_flag=escape_flag
            )
            charts.append(chart)
        combined_chart = alt.vconcat(
            *charts, spacing=3,# title=alt.Title(f"{legend_title}",subtitle=subtitle_str)
        ).resolve_scale(y="shared", x="independent", color="shared").configure_axisY(titlePadding=0)
        return combined_chart

In [3]:
func_effects_file = "results/func_effects/averages/TZM-bl_entry_func_effects.csv"
func_effects = pd.read_csv(func_effects_file).round(2) 
site_numbering_map = pd.read_csv('data/site_numbering_map.csv')
def remap(x):
    if x in ['gp120', 'gp41', "signal peptide", "Cytoplasmic tail", "N-helix", "C-helix"]:
        return ''
    else: 
        return x
site_numbering_map['region'] = [remap(x) for x in site_numbering_map['region'].tolist()]
regions = site_numbering_map.query('region not in ["", "signal peptide", "Cytoplasmic tail", "N-helix", "C-helix"]')['region'].unique().tolist()

#ranges=[list(str(x) for x in range(start, end)) for start, end in [(30, 150), (150, 270), (270, 390), (390, 510), (510, 630), (630, 702)]]
ranges = []
reference_sites = site_numbering_map['reference_site'].tolist()

i = 30
label_count = -1
x_labels = []
for row in range(0, 6, 1):
    row_range = []
    for x in range(0, 120, 1):
        if i < 710:
            label_count+=1 
            if label_count % 10 == 0:
                x_labels.append(reference_sites[i])
            row_range.append(reference_sites[i])
            i+=1
    ranges.append(row_range)

plot_df = func_effects.query('times_seen>=2').copy()
plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on TZM-bl entry',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=None,
    escape_flag=None,
    custom_domain=[-5, 1.5],
    subtitle_str=None,
).save('results/heatmaps/TRO11_cell_entry.svg')

plotting entry


In [4]:
escape_effects_file = "results/antibody_escape/averages/10-1074_mut_effect.csv"
escape_effects = pd.read_csv(escape_effects_file).round(2) 

plot_df = escape_effects.query('times_seen>=2').copy()
#plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_df['effect'] = plot_df['escape_median']

ledf = func_effects.query('times_seen>=2').query('effect<-4').query('mutant not in ["*", "-"]').copy()

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on escape from antibody 10-1074',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=ledf,
    escape_flag=True,
    custom_domain=[min(plot_df['escape_median']), max(plot_df['escape_median'])],
    subtitle_str=None,
).save('results/heatmaps/TRO11_10-1074.svg')

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape


In [5]:
escape_effects_file = "results/antibody_escape/averages/3BNC117_mut_effect.csv"
escape_effects = pd.read_csv(escape_effects_file).round(2) 

plot_df = escape_effects.query('times_seen>=2').copy()
#plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_df['effect'] = plot_df['escape_median']

ledf = func_effects.query('times_seen>=2').query('effect<-4').query('mutant not in ["*", "-"]').copy()

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on escape from antibody 10-1074',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=ledf,
    escape_flag=True,
    custom_domain=[min(plot_df['escape_median']), max(plot_df['escape_median'])],
    subtitle_str=None,
).save('results/heatmaps/TRO11_3BNC117.svg')

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape


In [6]:
#func_effects_file = "results/func_effects/averages/TZM-bl_entry_func_effects.csv"
func_effects_file = "../HIV_Envelope_BF520_DMS_3BNC117_10-1074/results/func_effects/averages/TZM-bl_entry_func_effects.csv"
func_effects = pd.read_csv(func_effects_file).round(2) 
site_numbering_map = pd.read_csv('../HIV_Envelope_BF520_DMS_3BNC117_10-1074/data/site_numbering_map.csv')
def remap(x):
    if x in ['gp120', 'gp41', "signal peptide", "Cytoplasmic tail", "N-helix", "C-helix"]:
        return ''
    else: 
        return x
site_numbering_map['region'] = [remap(x) for x in site_numbering_map['region'].tolist()]
regions = site_numbering_map.query('region not in ["", "signal peptide", "Cytoplasmic tail", "N-helix", "C-helix"]')['region'].unique().tolist()

#ranges=[list(str(x) for x in range(start, end)) for start, end in [(30, 150), (150, 270), (270, 390), (390, 510), (510, 630), (630, 702)]]
ranges = []
reference_sites = site_numbering_map['reference_site'].tolist()

i = 30
label_count = -1
x_labels = []
for row in range(0, 6, 1):
    row_range = []
    for x in range(0, 120, 1):
        if i < 688:
            label_count+=1 
            if label_count % 10 == 0:
                x_labels.append(reference_sites[i])
            row_range.append(reference_sites[i])
            i+=1
    ranges.append(row_range)

plot_df = func_effects.query('times_seen>=3').copy()
plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on TZM-bl entry',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=None,
    escape_flag=None,
    custom_domain=[-5, 1.5],
    subtitle_str=None,
).save('results/heatmaps/BF520_cell_entry.svg')

plotting entry


In [7]:
escape_effects_file = "../HIV_Envelope_BF520_DMS_3BNC117_10-1074/results/antibody_escape/averages/10-1074_mut_effect.csv"
escape_effects = pd.read_csv(escape_effects_file).round(2) 

plot_df = escape_effects.query('times_seen>=3').copy()
#plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_df['effect'] = plot_df['escape_median']

ledf = func_effects.query('times_seen>=3').query('effect<-4').query('mutant not in ["*", "-"]').copy()

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on escape from antibody 10-1074',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=ledf,
    escape_flag=True,
    custom_domain=[min(plot_df['escape_median']), max(plot_df['escape_median'])],
    subtitle_str=None,
).save('results/heatmaps/BF520_10-1074.svg')

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape


In [8]:
escape_effects_file = "../HIV_Envelope_BF520_DMS_3BNC117_10-1074/results/antibody_escape/averages/3BNC117_mut_effect.csv"
escape_effects = pd.read_csv(escape_effects_file).round(2) 

plot_df = escape_effects.query('times_seen>=3').copy()
#plot_df['effect'] = plot_df['effect'].clip(lower=-5)

plot_df['effect'] = plot_df['escape_median']

ledf = func_effects.query('times_seen>=3').query('effect<-4').query('mutant not in ["*", "-"]').copy()

plot_entry_heatmap(
    df=plot_df,
    legend_title='Effects on escape from antibody 10-1074',
    x_labels=x_labels,
    null_color=None,
    ranges=ranges,
    effect_color='redblue',
    top_label_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    top_label_flag=True,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=ledf,
    escape_flag=True,
    custom_domain=[min(plot_df['escape_median']), max(plot_df['escape_median'])],
    subtitle_str=None,
).save('results/heatmaps/BF520_3BNC117.svg')

plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
