In [None]:
#Import the packages we will use
#Utilities
import os
import re
import itertools
from itertools import combinations
import glob
import pickle
import argparse

#Data Management
import numpy as np
from numpy import diff
import pandas as pd
import h5py
import scipy
from scipy.stats import linregress
from scipy import ndimage
from functools import partial
from scipy.linalg import toeplitz

#Plotting
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import gridspec
from matplotlib import cm
from matplotlib.gridspec import GridSpec
from matplotlib.gridspec import GridSpecFromSubplotSpec
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap
import seaborn as sns
import upsetplot
from upsetplot import UpSet

#Genomics
import pairtools
import cooler
import cooltools
# Import python package for working with cooler files and tools for analysis
import cooler
import cooltools.lib.plotting
import bioframe
from bioframe import overlap
import bbi
from cooltools import insulation

In [None]:
#conditions
conditions = [
    'WT_Ctrl_R1',
    'WT_ATRA_R1',
    'BKO_Ctrl_R1',
    'BKO_ATRA_R1',
    'WT_Ctrl_R2',
    'WT_ATRA_R2',
    'BKO_Ctrl_R2',
    'BKO_ATRA_R2',
    'WT_Ctrl_R1R2',
    'WT_ATRA_R1R2',
    'BKO_Ctrl_R1R2',
    'BKO_ATRA_R1R2'
]

long_names = {
    'WT_Ctrl_R1' : 'CA-HiC-Dpn-SH-SY5Y-WT-Ctrl-4-51-R1-T1',
    'WT_ATRA_R1' : 'CA-HiC-Dpn-SH-SY5Y-WT-ATRA-5days-4-51-R1-T1',
    'BKO_Ctrl_R1' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-Ctrl-4-51-R1-T1',
    'BKO_ATRA_R1' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-ATRA-5days-4-51-R1-T1',
    'WT_Ctrl_R2' : 'CA-HiC-Dpn-SH-SY5Y-WT-Ctrl-4-52-R2-T1',
    'WT_ATRA_R2' : 'CA-HiC-Dpn-SH-SY5Y-WT-ATRA-5days-4-52-R2-T1',
    'BKO_Ctrl_R2' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-Ctrl-4-52-R2-T1',
    'BKO_ATRA_R2' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-ATRA-5days-4-52-R2-T1',
    'WT_Ctrl_R1R2' : 'CA-HiC-Dpn-SH-SY5Y-WT-Ctrl-4-51-and-4-52-R1R2',
    'WT_ATRA_R1R2' : 'CA-HiC-Dpn-SH-SY5Y-WT-ATRA-5days-4-51-and-4-52-R1R2',
    'BKO_Ctrl_R1R2' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-Ctrl-4-51-and-4-52-R1R2',
    'BKO_ATRA_R1R2' : 'CA-HiC-Dpn-SH-SY5Y-BKO98-ATRA-5days-4-51-and-4-52-R1R2'
}

In [None]:
#add colors for each sample
sampleColors = {
    'WT_Ctrl_R1' : '#17BECF',
    'WT_ATRA_R1' : '#574D68',
    'BKO_Ctrl_R1' : '#D62728',
    'BKO_ATRA_R1' : '#C6A15B',
    'WT_Ctrl_R2' : '#17BECF',
    'WT_ATRA_R2' : '#574D68',
    'BKO_Ctrl_R2' : '#D62728',
    'BKO_ATRA_R2' : '#C6A15B',
    'WT_Ctrl_R1R2' : '#17BECF',
    'WT_ATRA_R1R2' : '#574D68',
    'BKO_Ctrl_R1R2' : '#D62728',
    'BKO_ATRA_R1R2' : '#C6A15B'
}

sampleLineStyles = {
    'WT_Ctrl_R1' : '--',
    'WT_ATRA_R1' : '--',
    'BKO_Ctrl_R1' : '--',
    'BKO_ATRA_R1' : '--',
    'WT_Ctrl_R2' : ':',
    'WT_ATRA_R2' : ':',
    'BKO_Ctrl_R2' : ':',
    'BKO_ATRA_R2' : ':',
    'WT_Ctrl_R1R2' : '-',
    'WT_ATRA_R1R2' : '-',
    'BKO_Ctrl_R1R2' : '-',
    'BKO_ATRA_R1R2' : '-'
}

In [None]:
SepCtrlConds = [
    'WT_Ctrl_R1',
    'WT_Ctrl_R1',
    'WT_ATRA_R1',
    'BKO_Ctrl_R1',
    'WT_Ctrl_R2',
    'WT_Ctrl_R2',
    'WT_ATRA_R2',
    'BKO_Ctrl_R2',
]

SepTreatConds = [
    'WT_ATRA_R1',
    'BKO_Ctrl_R1',
    'BKO_ATRA_R1',
    'BKO_ATRA_R1',
    'WT_ATRA_R2',
    'BKO_Ctrl_R2',
    'BKO_ATRA_R2',
    'BKO_ATRA_R2',   
]

ComboCtrlConds = [
    'WT_Ctrl_R1R2',
    'WT_Ctrl_R1R2',
    'WT_ATRA_R1R2',
    'BKO_Ctrl_R1R2'
]

ComboTreatConds = [
    'WT_ATRA_R1R2',
    'BKO_Ctrl_R1R2',
    'BKO_ATRA_R1R2',
    'BKO_ATRA_R1R2'  
]

In [None]:
outDataDir = '..'

In [None]:
#Comparing gene expression changes vs compartment switches

In [None]:
#read in eigs
binsize = 250000
eigs = {}
for cond in conditions:
    eigs[cond] = pd.read_csv(f'{outDataDir}/data/{long_names[cond]}.{binsize//1000}kb.mapq30.byarm.eigs.cis.vecs.txt', sep = '\t')

In [None]:
eigs[cond]

In [None]:
log2fc_eigs = {}
for ctrlcond, treatcond in zip(ComboCtrlConds, ComboTreatConds):
    log2fc_eigs[f'{treatcond}vs{ctrlcond}'] = np.log2(eigs[treatcond]['E1']) - np.log2(eigs[ctrlcond]['E1'])

In [None]:
sns.scatterplot(eigs['WT_Ctrl_R1R2']['E1'], eigs['WT_ATRA_R1R2']['E1'], alpha = 0.2)
plt.xlabel('WT Ctrl')
plt.ylabel('WT ATRA')

In [None]:
sns.scatterplot(np.log2(eigs['WT_ATRA_R1R2']['E1']) - np.log2(eigs['WT_Ctrl_R1R2']['E1']), 
    np.log2(eigs['BKO_ATRA_R1R2']['E1']) - np.log2(eigs['BKO_Ctrl_R1R2']['E1']), alpha = 0.2)
plt.xlabel('WT Log2FC E1')
plt.ylabel('BKO Log2FC E1')

In [None]:
gene_data_dir = '..'
deg_genes_24hr = pd.read_csv(f'{gene_data_dir}/SHSY2019_ATRA_24hr_union_table_test_all_log2.csv')

In [None]:
deg_genes_24hr

In [None]:
deg_genes_24hr[deg_genes_24hr['symbol'] == 'CRABP2']

In [None]:
DEG_Comparisons = [
    'WT_C_WT_R_log2FC',
    'BKO_C_BKO_R_log2FC',
    'WT_C_BKO_C_log2FC',
    'WT_R_BKO_R_log2FC']

In [None]:
#New approach
#Separate out genes that are up or down regulated in WT differentiation, but not in Top2B KO differentiation, or are
#changed the same in both - at 24 hr timepoint
#Plot compartment eig1 values as boxplots for each of these classes of genes in each 5 day sample

In [None]:
#Using FC cutoff of 1.5, pvalue cutoff of 0.05

WTDiff_UpReg = deg_genes_24hr[
    (deg_genes_24hr['WT_C_WT_R_log2FC'] > 1.5) &
    (deg_genes_24hr['WT_C_WT_R_padj'] < 0.05)
]

WTDiff_DownReg = deg_genes_24hr[
    (deg_genes_24hr['WT_C_WT_R_log2FC'] < -1.5) &
    (deg_genes_24hr['WT_C_WT_R_padj'] < 0.05)
]

Top2BKODiff_UpReg = deg_genes_24hr[
    (deg_genes_24hr['BKO_C_BKO_R_log2FC'] > 1.5) &
    (deg_genes_24hr['BKO_C_BKO_R_padj'] < 0.05)
]

Top2BKODiff_DownReg = deg_genes_24hr[
    (deg_genes_24hr['BKO_C_BKO_R_log2FC'] < -1.5) &
    (deg_genes_24hr['BKO_C_BKO_R_padj'] < 0.05)
]


In [None]:
WTnotTop2B_UpReg = WTDiff_UpReg[~WTDiff_UpReg['ensembl_geneid'].isin(Top2BKODiff_UpReg['ensembl_geneid'])]
WTnotTop2B_DownReg = WTDiff_DownReg[~WTDiff_DownReg['ensembl_geneid'].isin(Top2BKODiff_DownReg['ensembl_geneid'])]
Top2BnotWT_UpReg = Top2BKODiff_UpReg[~Top2BKODiff_UpReg['ensembl_geneid'].isin(WTDiff_UpReg['ensembl_geneid'])]
Top2BnotWT_DownReg = Top2BKODiff_DownReg[~Top2BKODiff_DownReg['ensembl_geneid'].isin(WTDiff_DownReg['ensembl_geneid'])]
Both_UpReg = WTDiff_UpReg[WTDiff_UpReg['ensembl_geneid'].isin(Top2BKODiff_UpReg['ensembl_geneid'])]
Both_DownReg = WTDiff_DownReg[WTDiff_DownReg['ensembl_geneid'].isin(Top2BKODiff_DownReg['ensembl_geneid'])]

In [None]:
#Overlap each gene list with eigens for all samples at gene locations

In [None]:
Gene_Lists = {
    'WTOnly_Up' : WTnotTop2B_UpReg,
    'WTOnly_Down' : WTnotTop2B_DownReg,
    'BKOOnly_Up' : Top2BnotWT_UpReg,
    'BKOOnly_Down' : Top2BnotWT_DownReg,
    'Both_Up' : Both_UpReg,
    'Both_Down' : Both_DownReg
}

Gene_List_Names = [
    'WTOnly_Up',
    'WTOnly_Down',
    'BKOOnly_Up',
    'BKOOnly_Down',
    'Both_Up',
    'Both_Down'
]

In [None]:
#need to fix chromosome column
for cond in Gene_List_Names:
    Gene_Lists[cond].loc[:, 'chrom'] = 'chr' + Gene_Lists[cond].loc[:, 'chr'].astype(str)

In [None]:
gene_bioframes = {}

#make bioframes
for cond in Gene_List_Names:
    gene_bioframes[cond] = bioframe.sanitize_bedframe(Gene_Lists[cond][[
        'chrom', 'start', 'end', 'strand', 'ensembl_geneid', 'symbol', 'Category', 
        'WT_C_WT_R_log2FC', 'WT_C_WT_R_padj', 'BKO_C_BKO_R_log2FC', 'BKO_C_BKO_R_padj', 
        'WT_C_BKO_C_log2FC', 'WT_C_BKO_C_padj', 'WT_R_BKO_R_log2FC', 'WT_R_BKO_R_padj', 'description']])

In [None]:
eigs_bf = {}
for cond in conditions:
    eigs_bf[cond] = bioframe.sanitize_bedframe(eigs[cond][['chrom', 'start', 'end', 'E1']])

In [None]:
#overlap eigs with gene_bioframes
#Some genes are across multiple bins, so final dataframe end up with more entries than at start
#Should group and take mean eig1 for genes with multiple eig1 bins

olap_bf = {}

for genes in Gene_List_Names:
    olap_bf[genes] = {}
    for cond in conditions:
        olap_bf[genes][cond] = bioframe.overlap(
            gene_bioframes[genes], 
            eigs_bf[cond], 
            how = 'inner',
            suffixes = ('', f'_{cond}')).groupby([
    'chrom', 'start', 'end', 'strand', 'ensembl_geneid', 'symbol', 'Category', 
    'WT_C_WT_R_log2FC', 'BKO_C_BKO_R_log2FC', 'WT_C_BKO_C_log2FC', 'WT_R_BKO_R_log2FC']).mean()[f'E1_{cond}']

In [None]:
olap_bf_all_degenes = {}
for cond in conditions:
    olap_bf_all_degenes[cond] = pd.DataFrame()
    for genes in Gene_List_Names:
        olap_bf_all_degenes[cond] = pd.concat([olap_bf_all_degenes[cond], olap_bf[genes][cond].reset_index()], ignore_index = True).reset_index(drop = True) 

In [None]:
olap_bf_all_degenes['BKO_Ctrl_R1R2']

In [None]:
sns.scatterplot(np.log2(olap_bf_all_degenes['BKO_Ctrl_R1R2']['E1_BKO_Ctrl_R1R2']) - 
                np.log2(olap_bf_all_degenes['BKO_ATRA_R1R2']['E1_BKO_ATRA_R1R2']), 
                np.log2(olap_bf_all_degenes['BKO_Ctrl_R1R2']['BKO_C_BKO_R_log2FC'])
               )

In [None]:
#Now plot! - need to reformat first

In [None]:
boxplot_df = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1'])
for genes in Gene_List_Names:
    for cond in conditions[8:12]:
        bf = pd.DataFrame(olap_bf[genes][cond].reset_index(drop = True))
        bf.columns = ['E1']
        bf['GeneList'] = genes
        bf['Condition'] = cond
        boxplot_df = boxplot_df.append(bf[['GeneList', 'Condition', 'E1']])
        

In [None]:
sns.set_style("ticks")
sns.set_context("paper")
cmap_bar = sns.color_palette(['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c'])
gs = GridSpec(nrows= 1, ncols=1, wspace = 0.6, hspace = 0.6)

plt.figure(figsize=(10, 4))

ax = plt.subplot(gs[0])
sns.boxplot(x = boxplot_df['GeneList'], y = boxplot_df['E1'], hue = boxplot_df['Condition'], ax = ax, notch = True, 
           palette = cmap_bar, showfliers = False)

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, frameon = False)
plt.title('E1 by DE Gene Type')
plt.xlabel('DE Gene List')
plt.savefig(f'../../figures/E1_BoxPlot_ByGeneList_AllSamples.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#difference not foldchange since have negative numbers
boxplot_diff_df = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1_Diff'])
for genes in Gene_List_Names:
    for (ctrlcond, treatcond) in zip(['WT_Ctrl_R1R2', 'BKO_Ctrl_R1R2'], ['WT_ATRA_R1R2', 'BKO_ATRA_R1R2']):
        bfctrl = pd.DataFrame(olap_bf[genes][ctrlcond].reset_index(drop = True))
        
        bfctrl.columns = ['E1']
        bfctrl['GeneList'] = genes
        bfctrl['Condition'] = f'{treatcond}vs{ctrlcond}'
        
        bftreat = pd.DataFrame(olap_bf[genes][treatcond].reset_index(drop = True))
        bftreat.columns = ['E1'] 
        
        bfctrl['E1_Diff'] = bftreat['E1'] - bfctrl['E1']
        
        boxplot_diff_df = boxplot_diff_df.append(bfctrl[['GeneList', 'Condition', 'E1_Diff']])
        

In [None]:
pd.DataFrame(olap_bf[genes][ctrlcond].reset_index())

In [None]:
sns.set_style("ticks")
sns.set_context("paper")
cmap_bar = sns.color_palette(['#a6cee3', '#b2df8a'])
gs = GridSpec(nrows= 1, ncols=1, wspace = 0.6, hspace = 0.6)
plt.figure(figsize=(7, 6))

ax = plt.subplot(gs[0])
sns.boxplot(x = boxplot_diff_df['GeneList'], y = boxplot_diff_df['E1_Diff'], hue = boxplot_diff_df['Condition'],
           palette = cmap_bar, ax = ax, notch = True, showfliers = False)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, frameon = False)
plt.title('E1 Difference By Gene Expression Change (24hrs)')
plt.ylabel('ATRA - WT')
plt.xlabel('Gene List')

In [None]:
#What about splitting based on compartment type in WT control?

#WT genes that are in B comp to start and go up in expression
#WT genes that are in A comp to start and go up in expression
#WT genes that are in B comp to start and go down in expression
#WT genes that are in A comp to start and go down in expression

In [None]:
bf

In [None]:
#A comp comparisons

boxplot_df_A = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1'])
for genes in Gene_List_Names:
    for cond in conditions[8:12]:

        SelectGenes = olap_bf[genes]['WT_Ctrl_R1R2'].reset_index()
        SelectGenes = SelectGenes[SelectGenes['E1_WT_Ctrl_R1R2'] > 0]
        
        bf = pd.DataFrame(olap_bf[genes][cond].reset_index())[['chrom', 'start', 'end', 'strand', 'ensembl_geneid', 'symbol', 'Category', f'E1_{cond}']]
        bf.columns = ['chrom', 'start', 'end', 'strand', 'ensembl_geneid', 'symbol', 'Category', 'E1']
        bf['GeneList'] = genes
        bf['Condition'] = cond
        bf = bf[bf['ensembl_geneid'].isin(SelectGenes['ensembl_geneid'])]
        boxplot_df_A = boxplot_df_A.append(bf[['GeneList', 'Condition', 'E1']]).reset_index(drop = True)
        

In [None]:
boxplot_df_A

In [None]:
plt.figure(figsize=(8, 8))
sns.boxplot(x = boxplot_df_A['GeneList'], y = boxplot_df_A['E1'], hue = boxplot_df_A['Condition'], notch = True)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1)
plt.title('WT Ctrl A compartment genes')

In [None]:
#B comp comparisons

boxplot_df_B = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1'])
for genes in Gene_List_Names:
    for cond in conditions[8:12]:

        SelectGenes = olap_bf[genes]['WT_Ctrl_R1R2'].reset_index()
        SelectGenes = SelectGenes[SelectGenes['E1_WT_Ctrl_R1R2'] < 0]
        
        bf = pd.DataFrame(olap_bf[genes][cond].reset_index())[['chrom', 'start', 'end', 'strand', 'ensembl_geneid', 'symbol', 'Category', f'E1_{cond}']]
        bf.columns = ['chrom', 'start', 'end', 'starnd', 'ensembl_geneid', 'symbol', 'Category', 'E1']
        bf['GeneList'] = genes
        bf['Condition'] = cond
        bf = bf[bf['ensembl_geneid'].isin(SelectGenes['ensembl_geneid'])]
        boxplot_df_B = boxplot_df_B.append(bf[['GeneList', 'Condition', 'E1']]).reset_index(drop = True)
        

In [None]:
boxplot_df_B

In [None]:
plt.figure(figsize=(8, 8))
sns.boxplot(x = boxplot_df_B['GeneList'], y = boxplot_df_B['E1'], hue = boxplot_df_B['Condition'], notch = True, showfliers = False)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1)
plt.title('WT Ctrl B compartment genes')

In [None]:
#Correlated change

boxplot_df_corr = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1_Diff'])
for genes in Gene_List_Names[0:2]:
    SelectGenesCtrl = olap_bf[genes]['WT_Ctrl_R1R2'].reset_index()
    SelectGenesATRA = olap_bf[genes]['WT_ATRA_R1R2'].reset_index()
        
    SelectGenesCtrl['E1_Diff'] = SelectGenesATRA['E1_WT_ATRA_R1R2'] - SelectGenesCtrl['E1_WT_Ctrl_R1R2']
    
    SelectGenesCtrl['CorrDirection'] = SelectGenesCtrl['E1_Diff'] * SelectGenesCtrl['WT_C_WT_R_log2FC']
       
    SelectGenesCtrl = SelectGenesCtrl[SelectGenesCtrl['CorrDirection'] > 0]  #change in same direction
        
    for (geno, ctrlcond, treatcond) in zip(['WT', 'BKO'], ['WT_Ctrl_R1R2', 'BKO_Ctrl_R1R2'], ['WT_ATRA_R1R2', 'BKO_ATRA_R1R2']):

        bfctrl = pd.DataFrame(olap_bf[genes][ctrlcond].reset_index())[['chrom', 'start', 'end', 'ensembl_geneid', 'symbol', f'E1_{ctrlcond}']]
        bfctrl.columns = ['chrom', 'start', 'end', 'ensembl_geneid', 'symbol', 'E1']
        bfctrl['GeneList'] = genes
        bfctrl['Condition'] = f'{geno}'
        
        bftreat = pd.DataFrame(olap_bf[genes][treatcond].reset_index())[['ensembl_geneid', f'E1_{treatcond}']]
        bftreat.columns = ['ensembl_geneid', 'E1']
        
        bfctrl['E1_Diff'] = bftreat['E1'] - bfctrl['E1']

        bfctrl = bfctrl[bfctrl['ensembl_geneid'].isin(SelectGenesCtrl['ensembl_geneid'])]
        
        boxplot_df_corr = boxplot_df_corr.append(bfctrl[['chrom', 'start', 'end', 'ensembl_geneid', 'symbol', 'GeneList', 'Condition', 'E1_Diff']]).reset_index(drop = True)
        

In [None]:
plt.figure(figsize=(8, 8))
sns.boxplot(x = boxplot_df_corr['GeneList'], y = boxplot_df_corr['E1_Diff'], hue = boxplot_df_corr['Condition'],
           notch = True)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1)
plt.title('WT CtrlvsATRA Correlated Expression and Compartment Change Genes')
plt.savefig(f'{outDataDir}/figures/E1Diff_WTCtrlvsATRACorrDEGandE1_100kbEigs.png', dpi = 300, bbox_inches = 'tight')

In [None]:
sns.set_style("ticks")
sns.set_context("paper")
cmap_bar = sns.color_palette(['#a6cee3', '#b2df8a'])
gs = GridSpec(nrows= 1, ncols=1, wspace = 0.6, hspace = 0.6)
plt.figure(figsize=(4, 4))

ax = plt.subplot(gs[0])
cmap_bar = sns.color_palette(['#a6cee3', '#b2df8a'])

sns.boxplot(x = boxplot_df_corr['GeneList'], y = boxplot_df_corr['E1_Diff'], hue = boxplot_df_corr['Condition'],
           notch = True, showfliers = False, palette = cmap_bar, ax = ax)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, frameon = False)
plt.ylabel('ATRA-Ctrl E1')
plt.xlabel('DE Gene Category')
plt.title(f'WT Ctrl vs ATRA Correlated Expression \nand Compartment Change Genes')
plt.savefig(f'{outDataDir}/figures/E1Diff_WTCtrlvsATRACorrDEGandE1_nooutliers_250kbEigs.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#Overall there is less change in E1 in the Top2BKO condition
#WT Only Up category is regions with more of a change in E1 in WT than Top2BKO, when
#I only consider regions with correlated changes in gene expression and compartment eigen in WT.

In [None]:
boxplot_df_corr[(boxplot_df_corr['GeneList'] == 'WTOnly_Up') &
                (boxplot_df_corr['Condition'] == 'BKO')
               ].sort_values('E1_Diff').to_csv(f'{outDataDir}/data/WTOnlyUpDEG_CorrWTE1_BKOATRAvsBKOCtrlE1_250kbE1.txt', 
                                            sep = '\t', index = False)

In [None]:
boxplot_df_corr[(boxplot_df_corr['GeneList'] == 'WTOnly_Up') &
                (boxplot_df_corr['Condition'] == 'BKO')
               ].sort_values('E1_Diff')

In [None]:
boxplot_df_corr[(boxplot_df_corr['GeneList'] == 'WTOnly_Up') &
                (boxplot_df_corr['Condition'] == 'WT')
               ].sort_values('E1_Diff')

In [None]:
boxplot_df_corr.to_csv(f'{outDataDir}/data/CorrelatedDEGWT_GeneList_250kbE1.txt', 
                                            sep = '\t', index = False)

In [None]:
#Plot diff of eigens - ATRA vs Control for each genotype - at all bins

In [None]:
eigs_diff = {}
for (cond1, cond2) in zip(['WT_Ctrl_R1R2', 'BKO_Ctrl_R1R2'], ['WT_ATRA_R1R2', 'BKO_ATRA_R1R2']):
    eigs_diff[f'{cond2}-{cond1}'] = eigs[cond2]['E1']-eigs[cond1]['E1']

In [None]:
eigs_diff

In [None]:
x = eigs_diff['WT_ATRA_R1R2-WT_Ctrl_R1R2']
y = eigs_diff['BKO_ATRA_R1R2-BKO_Ctrl_R1R2']

sns.regplot(x, y, ci=None, scatter_kws={"s": 0.5})

plt.xlabel('WT_ATRA_R1R2-WT_Ctrl_R1R2')
plt.ylabel('BKO_ATRA_R1R2-BKO_Ctrl_R1R2')

plt.ylim(-2, 2)
plt.xlim(-2, 2)

#Overall positive correlation. So what is changed in WT is also changed in Top2BKO, in similar direction.

In [None]:
Gene_List_Names

In [None]:
#Correlated change - in top2BKO

boxplot_df_corr_BKO = pd.DataFrame(columns = ['GeneList', 'Condition', 'E1_Diff'])
for genes in Gene_List_Names[2:4]:
    SelectGenesCtrl = olap_bf[genes]['BKO_Ctrl_R1R2'].reset_index()
    SelectGenesATRA = olap_bf[genes]['BKO_ATRA_R1R2'].reset_index()
        
    SelectGenesCtrl['E1_Diff'] = SelectGenesATRA['E1_BKO_ATRA_R1R2'] - SelectGenesCtrl['E1_BKO_Ctrl_R1R2']
    
    SelectGenesCtrl['CorrDirection'] = SelectGenesCtrl['E1_Diff'] * SelectGenesCtrl['BKO_C_BKO_R_log2FC']
       
    SelectGenesCtrl = SelectGenesCtrl[SelectGenesCtrl['CorrDirection'] > 0]  #change in same direction
        
    for (geno, ctrlcond, treatcond) in zip(['WT', 'BKO'], ['WT_Ctrl_R1R2', 'BKO_Ctrl_R1R2'], ['WT_ATRA_R1R2', 'BKO_ATRA_R1R2']):

        bfctrl = pd.DataFrame(olap_bf[genes][ctrlcond].reset_index())[['chrom', 'start', 'end', 'ensembl_geneid', 
                                                                       'symbol', f'E1_{ctrlcond}']]
        bfctrl.columns = ['chrom', 'start', 'end', 'ensembl_geneid', 'symbol', 'E1']
        bfctrl['GeneList'] = genes
        bfctrl['Condition'] = f'{geno}'
        
        bftreat = pd.DataFrame(olap_bf[genes][treatcond].reset_index())[['ensembl_geneid', f'E1_{treatcond}']]
        bftreat.columns = ['ensembl_geneid', 'E1']
        
        bfctrl['E1_Diff'] = bftreat['E1'] - bfctrl['E1']

        bfctrl = bfctrl[bfctrl['ensembl_geneid'].isin(SelectGenesCtrl['ensembl_geneid'])]
        
        boxplot_df_corr_BKO = boxplot_df_corr_BKO.append(bfctrl[['chrom', 'start', 'end', 'ensembl_geneid', 'symbol', 'GeneList', 'Condition', 'E1_Diff']]).reset_index(drop = True)
        

In [None]:
plt.figure(figsize=(8, 8))
sns.boxplot(x = boxplot_df_corr_BKO['GeneList'], y = boxplot_df_corr_BKO['E1_Diff'], hue = boxplot_df_corr_BKO['Condition'],
           notch = True)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1)
plt.title('WT CtrlvsATRA Correlated Expression in BKO and Compartment Change Genes')
plt.savefig(f'{outDataDir}/figures/E1Diff_WTCtrlvsATRACorrDEGandE1inBKO_250kbEigs.png', dpi = 300, bbox_inches = 'tight')

In [None]:
sns.set_style("ticks")
sns.set_context("paper")
cmap_bar = sns.color_palette(['#a6cee3', '#b2df8a'])
gs = GridSpec(nrows= 1, ncols=1, wspace = 0.6, hspace = 0.6)
plt.figure(figsize=(4, 4))

ax = plt.subplot(gs[0])
cmap_bar = sns.color_palette(['#a6cee3', '#b2df8a'])

sns.boxplot(x = boxplot_df_corr_BKO['GeneList'], y = boxplot_df_corr_BKO['E1_Diff'], hue = boxplot_df_corr_BKO['Condition'],
           notch = True, showfliers = False, palette = cmap_bar, ax = ax)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, frameon = False)
plt.ylabel('ATRA-Ctrl E1')
plt.xlabel('DE Gene Category')
plt.title(f'BKO Ctrl vs ATRA Correlated Expression \nand Compartment Change Genes')
plt.savefig(f'{outDataDir}/figures/E1Diff_BKOCtrlvsATRACorrDEGandE1_nooutliers_250kbEigs.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#Overall there is less change in E1 in the Top2BKO condition - does this make sense? Not changing gene expression
#or compartments as much in response to ATRA?
#WT Only Up category - might be interesting? Seems to be regions with more of a change in E1 in WT than Top2BKO, when
#I only consider regions with correlated changes in gene expression and compartment eigen in WT.

In [None]:
boxplot_df_corr_BKO[(boxplot_df_corr_BKO['GeneList'] == 'BKOOnly_Up') &
                (boxplot_df_corr_BKO['Condition'] == 'BKO')
               ].sort_values('E1_Diff')

In [None]:
boxplot_df_corr_BKO.to_csv(f'{outDataDir}/data/CorrelatedDEGBKO_GeneList_250kbE1.txt', 
                                            sep = '\t', index = False)

In [None]:
#Plot diff of eigens - ATRA vs Control for each genotype - at all bins

In [None]:
eigs_diff = {}
for (cond1, cond2) in zip(['WT_Ctrl_R1R2', 'BKO_Ctrl_R1R2'], ['WT_ATRA_R1R2', 'BKO_ATRA_R1R2']):
    eigs_diff[f'{cond2}-{cond1}'] = eigs[cond2]['E1']-eigs[cond1]['E1']

In [None]:
eigs_diff

In [None]:
x = eigs_diff['WT_ATRA_R1R2-WT_Ctrl_R1R2']
y = eigs_diff['BKO_ATRA_R1R2-BKO_Ctrl_R1R2']

sns.regplot(x, y, ci=None, scatter_kws={"s": 0.5})

plt.xlabel('WT_ATRA_R1R2-WT_Ctrl_R1R2')
plt.ylabel('BKO_ATRA_R1R2-BKO_Ctrl_R1R2')

plt.ylim(-2, 2)
plt.xlim(-2, 2)

#Overall positive correlation? So what is changed in WT is also changed in Top2BKO, in similar direction?