# Plotting pileups

## Imports

In [None]:
# import standard python libraries
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.dpi'] = 96
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# import libraries for biological data analysis
from coolpuppy import coolpup
from plotpuppy import plotpup
import cooler
import bioframe
import cooltools
from cooltools import expected_cis
from cooltools.lib import plotting

import bbi

## Enrichment calculation

In [None]:
#Viraat's new calculation
#Modified 2022/10/04 by Miles to try to avoid NaN values and correct an issue with the background sum, 
#and generally make the code a little more streamlined
def enrichmentCalc(mtx, dotWindow):
    #Dimension of array side (should be square)
    sideLength = len(mtx)
    #Middle of side length
    midPoint = (sideLength - 1) // 2
    #Half size of box around centre pixel (one pixel smaller if even-sized dot window - don't do this)
    buffer = (dotWindow - 1) // 2
    
    #Get sum of pixels around dot
    dotSum = np.nansum(mtx[midPoint-buffer:midPoint+buffer+1, midPoint-buffer:midPoint+buffer+1])
    
    #Subset the matrix and calculate the mean without NaN values
    backgroundSum1 = np.nansum(mtx[0:dotWindow, 0:dotWindow])
    backgroundSum2 = np.nansum(mtx[sideLength-dotWindow:sideLength, sideLength-dotWindow:sideLength])
    
    #Calculate enrichment (NB this assumes all boxes are the same size.
    #If you set an even dotWindow value, they won't be)
    enrichment = dotSum / ((backgroundSum1 + backgroundSum2)/2)
    
    return enrichment

## Inputs

In [None]:
#mcool resolution to read
resolution = 250
#List of mcool locations as strings
clrfiles = ["mcoollocation1", "mcoollocation2", "mcoollocation3"]
#List of mcool conditions as strings
conditions = ["condition1", "condition2", "condition3"]
#List of loop types as strings
loopTypesNames = ["loop", "type", "names"]
#List of loop file locations (bedpe)
loopFiles = ["looplocation1", "looplocation2", "looplocation3"]

#Specify the RCMC regions of the mcools to look at (format: chromosome (string), start (number), end (number), name of region (string))
regions = pd.DataFrame([['chrA',1,100,'regionname1'],['chrB',1,100,'regionname2'],['chrC',1,100,'regionname3']],
                  columns=['chrom', 'start', 'end', 'name'])
#Cis expected file locations from cooltools - .tsv file - one for each mcool
expectedFiles = ["expectedlocation1", "expectedlocation2", "expectedlocation3"]
#Set save directory
saveDir = '/a/directory/on/your/system/'

#Set the size of the area flanking the dot
flankDist = 10000
#Don't set this to be even... This is the size of the area to measure around the dot 
#(and by extension the size of the boxes at the edges of the region too)
#For this reason, it needs to be odd to have integer box sizes on each side.
dotWindow = 5



Read in the loops

In [None]:
#######Don't change this section#######
#Creat an empty list to store the imported loop locations
loopTypes = []
#List of column names to use for imported loops (this is constant - do not change)
colNames = ['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2']
#Read in files, put them in loopTypes
for file in loopFiles:
    temploops = pd.read_csv(file, sep='\t', names=colNames, header=None)
    loopTypes.append(temploops)

## Run the script

In [None]:
#Loop through each cooler
for i, clrfile in enumerate(clrfiles):
    #Get condition name
    condition = conditions[i]
    #Get expected file
    expected = pd.read_csv(expectedFiles[i], sep='\t')
    #Read in cooler
    clr = cooler.Cooler(clrfile+'::/resolutions/'+str(resolution))
    #Loop through different loop types
    for j in range(len(loopTypes)):
        loops = loopTypes[j]
        loopsName = loopTypesNames[j]
        #Calculate pileups
        stack = cooltools.pileup(clr, loops, view_df=regions, expected_df=expected, flank=flankDist)
        #Flatten stack by calculating means
        mtx = np.nanmean(stack, axis=2)
        #Calculate enrichment
        enrichment = enrichmentCalc(mtx, dotWindow)
        #Plot figure
        plt.imshow(
            np.log2(mtx),
            vmax = 2.5,
            vmin = -2.5,
            cmap='coolwarm')
        
        plt.colorbar(label = 'log2 mean obs/exp')
        ticks_pixels = np.linspace(0, flankDist*2//resolution,5)
        ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*resolution//1000).astype(int)
        plt.xticks(ticks_pixels, ticks_kbp)
        plt.yticks(ticks_pixels, ticks_kbp)
        plt.xlabel('relative position, kbp')
        plt.ylabel('relative position, kbp')
        plt.text(1, 1, round(enrichment, 2))
        plt.savefig(saveDir+'LoopPileups_'+condition+'_'+loopsName+'_'+str(resolution)+'bp_'+str(flankDist)+'bp.pdf', dpi=1200)
        plt.clf()

