In [None]:
# visualize heatmaps

In [None]:
import numpy as np
import pandas as pd
import csv
import string
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import filecmp
from scipy import stats
import re

In [None]:
#### POST-PROCESSING ###################
def pivot_pos_aa(master):
#Generate table of positions of amino acids.  
    position_n = ['1','2','3','4','5','6','7','8','9','10','11','12','13']
    index_x = ['A','G','I','L','P','V','F','W','Y','D','E','R','H','K','S','T','C','M','N','Q'] 
    
    # NOT Weighted by read counts
    pivot = pd.DataFrame(columns=position_n, index=index_x)
    for i in position_n:
        for j in index_x:
            pivot[i][j]=np.sum((master[int(i)]==j)*1)
    return pivot



def heatmaps_discrete(pivot_norm, labels, bins, cbar_label='Fraction of Peptides', xlabel='', ylabel='',cscheme='redblue'):
#normalized data (input) shown with DISCRETE colorbar
    #Generate discrete dataframe
    position_n = list(pivot_norm.columns.values)
    index_x = list(pivot_norm.index.values)
    n_bins = np.size(bins)-1
    bin_color =  list(range(0, n_bins+1))
    pivot_discrete = pd.DataFrame(columns=position_n, index=index_x)

    for x in range(1,n_bins+1):
        for i in position_n:
            for j in index_x:
                if pivot_norm[i][j]<bins[x] and pivot_norm[i][j]>=bins[x-1]:
                    pivot_discrete[i][j] = 0.5*(bin_color[x]+bin_color[x-1]) #this puts the value in the middle of a bin

    plt.figure(figsize=(16,15)) #adjust size of figure
    if cscheme=='redblue':
        ax=sns.heatmap(pivot_discrete.astype(float),cbar_kws={'label': cbar_label},vmin=0,vmax=n_bins,cmap=plt.cm.get_cmap('bwr_r', n_bins))#, square=True)
    if cscheme=='blue':
        ax=sns.heatmap(pivot_discrete.astype(float),cbar_kws={'label': cbar_label},vmin=0,vmax=n_bins,cmap=plt.cm.get_cmap('Blues', n_bins))#, square=True)
    
    font_size = 35
    ax.set_xticklabels(labels,fontsize=font_size)
        
    ax.set_yticklabels(['A','G','I','L','P','V','F','W','Y','D','E','R','H','K','S','T','C','M','N','Q'],horizontalalignment='center', fontsize=font_size) #CGR ordering
    
    ax.tick_params(axis='both', pad=25,length=0)
    
    ax.figure.axes[-1].yaxis.label.set_size(font_size) #colorbar label
    
    
    colorbar = ax.collections[0].colorbar
    colorbar.set_ticks(bin_color)
    colorbar.set_ticklabels(bins)
    colorbar.ax.tick_params(labelsize=font_size) 
    plt.yticks(rotation=0) 
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()
    
      
    
## Call other functions ###############
def do_it(write_file_name, cluster_n):
    master = pd.read_csv(write_file_name+'.csv')
    if cluster_n != 'all':
        master = master.loc[master["P1"] == cluster_n]
    for i in range(1,14):
        master[i] = master['sequence'].str[i-1]
   
    pivot = pivot_pos_aa(master)
    
    bins = [0, 0.02, 0.04, 0.08, 0.16, 0.32, 1] #bins for discrete colorbar
    labels = ['-2','-1','1','2','3','4','5','6','7','8','9','10','11']
    pivot_norm = pivot/pivot.sum(0)[0]
    

    heatmaps_discrete(pivot_norm, labels, bins) #red/blue scheme
    heatmaps_discrete(pivot_norm, labels, bins,cscheme='blue')
    
    return pivot, pivot_norm


In [None]:
# R0
write_file_name = 'R0_cdhit-corrected_data'

pivotR0, pivotR0_norm =do_it(write_file_name,'all')
display(pivotR0)

In [None]:
def do_it_withFC(write_file_name,cluster_n,pivotR0_norm):
    pivot, pivot_norm=do_it(write_file_name,cluster_n)
    
    #log2FC
    fc = pivot_norm/pivotR0_norm
    log2fc = fc.applymap(np.log2)
    labels = ['-2','-1','1','2','3','4','5','6','7','8','9','10','11']
    heatmaps_discrete(log2fc, labels, [-10,-2,-1,0,1,2,10],cbar_label='log2FC')

    display(pivot)
    
    return pivot, pivot_norm

In [None]:
# HLA-DR15 Data
save_prefix = 'align-v3_NGS6_DR15'
cluster_n = 2

pivot, pivot_norm=do_it_withFC("peptidedisplay_{}_clusters".format(save_prefix),cluster_n,pivotR0_norm)