### Figure 4D Heatmap Generator
- Reads in data from file specified by `INPUT_CSV` and outputs heatmap to destination specified by `OUTPUT_PATH`
- `FIG_SIZE` determines dimension of final plot width, height in inches
- If `FILTER_CPM` is set to true, genes without at least one expression point >= `CPM_CUTOFF` will be filtered out of the final result
- If `RAW_COUNT` is set to true, heatmap will plot raw data values on an axis from 0 to `V_MAX` instead of normalized counts
- Further customization can be achieved by changing parameters for the `sns.clustermap` function according to the [official Seaborn docs](https://seaborn.pydata.org/generated/seaborn.clustermap.html)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

INPUT_CSV = "raw/top500_genes.csv"
OUTPUT_PATH = "out/top500_hm.pdf"

FIG_SIZE = [10, 10] 

FILTER_CPM = True # set to true to remove certain genes based on CPM
CPM_CUTOFF = 100

RAW_COUNT = False
V_MAX = 15000 # only used when RAW_COUNT = True

In [None]:
df = pd.read_csv(INPUT_CSV)
df.set_index('Gene', inplace=True)

if (FILTER_CPM):
    df = df.loc[(df['NAIP2C1'] >= CPM_CUTOFF) | (df['NEUROG2'] >= CPM_CUTOFF) | (df['NA'] >= CPM_CUTOFF) | (df['PSC'] >= CPM_CUTOFF) | (df['NAIP2'] >= CPM_CUTOFF) | (df['CTL'] >= CPM_CUTOFF)]

display(df)


In [None]:
    
if (RAW_COUNT):
    cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True) # linear color scheme (better for raw counts)
    heatmap = sns.clustermap(data=df,
               z_score=None,
               colors_ratio=0.01,
               dendrogram_ratio=.1,
               cbar_pos=(.02,.5,.03,.15),
               vmin=0,
               vmax=V_MAX,
               yticklabels=False,
               cmap=cmap,
               row_cluster=True,
               col_cluster=True,
               figsize=(FIG_SIZE[0], FIG_SIZE[1]),
               
    )
else:
    cmap = sns.diverging_palette(150, 275, s=80, l=55) # purple/green diverging color scheme
    heatmap = sns.clustermap(data=df,
                z_score=0,
                colors_ratio=0.01,
                dendrogram_ratio=.1,
                cbar_pos=(.02,.5,.03,.15),
                vmin=-3,
                vmax=3,
                yticklabels=False,
                cmap=cmap,
                row_cluster=True,
                col_cluster=True,
                figsize=(FIG_SIZE[0], FIG_SIZE[1]),
                
    )
    
figure = plt.gcf()
plt.savefig(OUTPUT_PATH, dpi=400)

Written by Manan Chopra @ Wahlin Lab  
Last updated Oct 7 2022