In [None]:
#Use conda env open2c_env.yml to create conda env for this script
#This is an example script of plotting MC-3C data
#It is run from within the 'scripts' subdirectory, using following directory structure:
#Analysis_Dir
#├── data
#    ├── permutations
#├── alignments
#├── figures
#├── scripts
#├── lsf_jobs

In [None]:
import bioframe
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import gridspec
from matplotlib import cm
from matplotlib.gridspec import GridSpec
from matplotlib.gridspec import GridSpecFromSubplotSpec
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap
import random
import seaborn as sns
import scipy
import pickle
from numpy import diff

from pandas import read_csv
from sklearn.utils import resample
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from matplotlib import pyplot

%matplotlib inline

In [None]:
conditions = [
    't0Mit_R1',
    't2_R1',
    't4DMSO_R1',
    't4ICRF_R1',
    't8DMSO_R1',
    't8ICRF_R1',
    't0Mit_R2',
    't2_R2',
    't4DMSO_R2',
    't4ICRF_R2',
    't8DMSO_R2',
    't8ICRF_R2',
    't0Mit_R3',
    't2_R3',
    't4DMSO_R3',
    't4ICRF_R3',
    't8DMSO_R3',
    't8ICRF_R3'
]

long_names = {
    't0Mit_R1' : 'TI-MC3C-Dpn-t0Mit-4-30',
    't2_R1' : 'TI-MC3C-Dpn-t2-4-30',
    't4DMSO_R1' : 'TI-MC3C-Dpn-t4DMSO-4-30',
    't4ICRF_R1' : 'TI-MC3C-Dpn-t4ICRF-4-30',
    't8DMSO_R1' : 'TI-MC3C-Dpn-t8DMSO-4-30',
    't8ICRF_R1' : 'TI-MC3C-Dpn-t8ICRF-4-30',
    't0Mit_R2' : 'TI-MC3C-Dpn-t0Mit-4-39',
    't2_R2' : 'TI-MC3C-Dpn-t2-4-39',
    't4DMSO_R2' : 'TI-MC3C-Dpn-t4DMSO-4-39',
    't4ICRF_R2' : 'TI-MC3C-Dpn-t4ICRF-4-39',
    't8DMSO_R2' : 'TI-MC3C-Dpn-t8DMSO-4-39',
    't8ICRF_R2' : 'TI-MC3C-Dpn-t8ICRF-4-39',
    't0Mit_R3' : 'TI-MC3C-Dpn-t0Mit-R3-5-14',
    't2_R3' : 'TI-MC3C-Dpn-t2-R3-5-14',
    't4DMSO_R3' : 'TI-MC3C-Dpn-t4DMSO-R3-5-14',
    't4ICRF_R3' : 'TI-MC3C-Dpn-t4ICRF-R3-5-14',
    't8DMSO_R3' : 'TI-MC3C-Dpn-t8DMSO-R3-5-14',
    't8ICRF_R3' : 'TI-MC3C-Dpn-t8ICRF-R3-5-14',
}

In [None]:
sampleColors = {
    't0Mit_R1' : '#878787',
    't2_R1' : '#E1B7A3',
    't4DMSO_R1' : '#17BECF',
    't4ICRF_R1' : '#D62728',
    't8DMSO_R1': '#0D6871',
    't8ICRF_R1': '#751616',
    't0Mit_R2' : '#878787',
    't2_R2' : '#E1B7A3',
    't4DMSO_R2' : '#17BECF',
    't4ICRF_R2' : '#D62728',
    't8DMSO_R2' :  '#0D6871',
    't8ICRF_R2' : '#751616',
    't0Mit_R3' : '#878787',
    't2_R3' : '#E1B7A3',
    't4DMSO_R3' : '#17BECF',
    't4ICRF_R3' : '#D62728',
    't8DMSO_R3' :  '#0D6871',
    't8ICRF_R3' : '#751616'
}
    
sampleLineStyles = {
    't0Mit_R1' : '-',
    't2_R1' : '-',
    't4DMSO_R1' : '-',
    't4ICRF_R1' : '-',
    't8DMSO_R1': '-',
    't8ICRF_R1': '-',
    't0Mit_R2' : '--',
    't2_R2' : '--',
    't4DMSO_R2' : '--',
    't4ICRF_R2' : '--',
    't8DMSO_R2' :  '--',
    't8ICRF_R2' : '--',
    't0Mit_R3' : ':',
    't2_R3' : ':',
    't4DMSO_R3' : ':',
    't4ICRF_R3' : ':',
    't8DMSO_R3' : ':',
    't8ICRF_R3' : ':',
}

samplePlotNames = {
    't0Mit_R1' : 't0 Mit',
    't2_R1' : 't2',
    't4DMSO_R1' : 't4 DMSO',
    't4ICRF_R1' : 't4 ICRF-193',
    't8DMSO_R1' : 't8 DMSO',
    't8ICRF_R1' : 't8 ICRF-193',
    't0Mit_R2' : 't0 Mit, R2',
    't2_R2' : 't2, R2',
    't4DMSO_R2' : 't4 DMSO, R2',
    't4ICRF_R2' : 't4 ICRF-193, R2',
    't8DMSO_R2' : 't8 DMSO, R2',
    't8ICRF_R2' : 't8 ICRF-193, R2',
    't0Mit_R3' : 't0 Mit, R3',
    't2_R3' : 't2, R3',
    't4DMSO_R3' : 't4 DMSO, R3',
    't4ICRF_R3' : 't4 ICRF-193, R3',
    't8DMSO_R3' : 't8 DMSO, R3',
    't8ICRF_R3' : 't8 ICRF-193, R3' 
}

In [None]:
ctrlconds = [
    't8DMSO_R1',
    't8DMSO_R1',
    't8DMSO_R1',
    't8DMSO_R1',
    't8DMSO_R1',
    't4DMSO_R1',
    't8DMSO_R2',
    't8DMSO_R2',
    't8DMSO_R2',
    't8DMSO_R2',
    't8DMSO_R2',
    't4DMSO_R2',
    't8DMSO_R3',
    't8DMSO_R3',
    't8DMSO_R3',
    't8DMSO_R3',
    't8DMSO_R2',
    't4DMSO_R3',
]

treatconds = [
    't0Mit_R1',
    't2_R1',
    't4DMSO_R1',
    't4ICRF_R1',
    't8ICRF_R1',
    't4ICRF_R1',
    't0Mit_R2',
    't2_R2',
    't4DMSO_R2',
    't4ICRF_R2',
    't8ICRF_R2',
    't4ICRF_R2',
    't0Mit_R3',
    't2_R3',
    't4DMSO_R3',
    't4ICRF_R3',
    't8ICRF_R3',
    't4ICRF_R3',
]

In [None]:
repdict = {
    't0Mit_R1' : 'R1',
    't2_R1' : 'R1',
    't4DMSO_R1' : 'R1',
    't4ICRF_R1' : 'R1',
    't8DMSO_R1' : 'R1',
    't8ICRF_R1' : 'R1',
    't0Mit_R2' : 'R2',
    't2_R2' : 'R2',
    't4DMSO_R2' : 'R2',
    't4ICRF_R2' : 'R2',
    't8DMSO_R2' : 'R2',
    't8ICRF_R2' : 'R2',
    't0Mit_R3' : 'R3',
    't2_R3' : 'R3',
    't4DMSO_R3' : 'R3',
    't4ICRF_R3' : 'R3',
    't8DMSO_R3' : 'R3',
    't8ICRF_R3' : 'R3',
}

labeldict = {
    't0Mit_R1' : 't0 Prometa',
    't2_R1' : 't2 Ana/Telo',
    't4DMSO_R1' : 't4 G1 DMSO',
    't4ICRF_R1' : 't4 G1 ICRF-193',
    't8DMSO_R1' : 't8 G1 DMSO',
    't8ICRF_R1' : 't8 G1 ICRF-193',
    't0Mit_R2' : 't0 Prometa',
    't2_R2' : 't2 Ana/Telo',
    't4DMSO_R2' : 't4 G1 DMSO',
    't4ICRF_R2' : 't4 G1 ICRF-193',
    't8DMSO_R2' : 't8 G1 DMSO',
    't8ICRF_R2' : 't8 G1 ICRF-193',
    't0Mit_R3' : 't0 Prometa',
    't2_R3' : 't2 Ana/Telo',
    't4DMSO_R3' : 't4 G1 DMSO',
    't4ICRF_R3' : 't4 G1 ICRF-193',
    't8DMSO_R3' : 't8 G1 DMSO',
    't8ICRF_R3' : 't8 G1 ICRF-193',
}

labelPlotColors = {
    't0 Prometa' : '#878787',
    't2 Ana/Telo' : '#E1B7A3',
    't4 G1 DMSO' : '#17BECF',
    't4 G1 ICRF-193' : '#D62728',
    't8 G1 DMSO': '#0D6871',
    't8 G1 ICRF-193': '#751616',
}

labels = [
    't0 Prometa',
    't2 Ana/Telo',
    't4 G1 DMSO',
    't4 G1 ICRF-193',
    't8 G1 DMSO',
    't8 G1 ICRF-193',
]

In [None]:
#For each of the above, can separate by A vs B compartments, or by number of chromosomes visited
comp_types = ['A', 'B', 'AB']
good_chroms = ['chr4', 'chr14', 'chr17', 'chr18', 'chr20', 'chr21']

In [None]:
outDataDir = '..'

In [None]:
#Read in pickled files

f = open(f'{outDataDir}/data/220517_MRICRF_R1R2R3_real_walks_with_summary_firstx_length_fractions_dict_100kbEigs.pkl', 'rb')
real_walks_with_summary_firstx = pickle.load(f)
f.close()

f = open(f'{outDataDir}/data/220517_MRICRF_R1R2R3_real_walks_filtered_summarized_firstx_length_dict_100kbEigs.pkl', 'rb')
real_walks_summarized_firstx = pickle.load(f)
f.close()

In [None]:
#combining T1 and T2 reads for t0 Mit R1
real_walks_with_summary_firstx['t0Mit_R1'] = {}
for fragnum in [2, 3, 4, 5, 6, 7, 8, 9, 10]:
    real_walks_with_summary_firstx['t0Mit_R1'][f'length_{fragnum}'] = real_walks_with_summary_firstx['t0Mit_R1_T1'][f'length_{fragnum}'].append(
        real_walks_with_summary_firstx['t0Mit_R1_T2'][f'length_{fragnum}'],
        ignore_index = True
    ) 

In [None]:
#combining T1 and T2 reads for t0 Mit R1
real_walks_summarized_firstx['t0Mit_R1'] = {}
for fragnum in [2, 3, 4, 5, 6, 7, 8, 9, 10]:
    real_walks_summarized_firstx['t0Mit_R1'][f'length_{fragnum}'] = real_walks_summarized_firstx['t0Mit_R1_T1'][f'length_{fragnum}'].append(
        real_walks_summarized_firstx['t0Mit_R1_T2'][f'length_{fragnum}'],
        ignore_index = True
    ) 

In [None]:
#Kernel density plots of pairwise distance - overlay - 1 or 2 chrom walks, A, B, or AB compartments
#Just good chromosome walks here
#Just first 6 steps of walks

dist_df = pd.DataFrame()

#combine biological replicates
for i, cond in enumerate(conditions):
    df = real_walks_with_summary_firstx[cond]['length_6']
    df['Query_Fragment_Length'] = df['Query_End'] - df['Query_Start']
    grouped_walks = df.groupby('Query_Name')
    walks_min_mapq = grouped_walks.agg({'Mapping_Quality' : 'min'})
    good_walks_mapq = walks_min_mapq[walks_min_mapq['Mapping_Quality'] > 59] #use this to filter for mapq
    walks_frac_map = grouped_walks.agg({'Match_Length' : 'sum',
                                    'Query_Fragment_Length' : 'sum',
                                    'Alignment_Length' : 'sum'
                                   })
    walks_high_frac_map = walks_frac_map[
        (walks_frac_map['Match_Length']/walks_frac_map['Query_Fragment_Length']) > 0.8]
    #use this to filter for fraction mapped

    cond1_filtered = df[
        (df['Chrom_Number'] < 3) &
        (df['Walk_Comp_Type'].isin(['A', 'B', 'AB'])) &
        (df['Query_Name'].isin(good_walks_mapq.index)) &
        (df['Query_Name'].isin(walks_high_frac_map.index)) &
        (df['chrom'].isin(good_chroms))]
    dist_dropna = pd.DataFrame({'Dist' : cond1_filtered['dist'].copy(),
                               'Condition' : cond,
                               'Label' : labeldict[cond],
                               'Replicate' : repdict[cond]})
    dist_dropna = dist_dropna.replace([np.inf, -np.inf], np.nan).dropna()
    dist_dropna['Abs_Dist'] = abs(dist_dropna['Dist']).replace([np.inf, -np.inf], np.nan).dropna()    
    dist_df = dist_df.append(dist_dropna[dist_dropna['Abs_Dist'] > 0], ignore_index = True)

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])

g = sns.kdeplot(data = dist_df, x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')
plt.savefig(f'{outDataDir}/figures/220518_AllSamples_R1R2R3_Combined_Density_Dist_LogScaleKde_DropNA.png', bbox_inches = 'tight', dpi = 300)

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])

g = sns.kdeplot(data = dist_df, x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('MC-3C Pairwise Interaction Distance')
plt.grid(lw=0.5)
plt.xlim(1e2, 1e9)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')
plt.savefig(f'{outDataDir}/figures/220518_AllSamples_R1R2R3_Combined_Density_Dist_LogScaleKde_DropNA_MatchHiC_Axes.png', bbox_inches = 'tight', dpi = 300)

In [None]:
from scipy import signal

In [None]:
#Calculating location of peaks in each dataset

In [None]:
peak_list = pd.DataFrame(columns = ['Label', 'PeakNum', 'PeakSize(bp)', 'Prominence'])

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't0 Prometa'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't0 Prometa',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't2 Ana/Telo'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't2 Ana/Telo',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't4 G1 DMSO'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't4 G1 DMSO',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't4 G1 ICRF-193'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't4 G1 ICRF-193',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
 

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't8 G1 DMSO'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't8 G1 DMSO',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = dist_df[dist_df['Label'] == 't8 G1 ICRF-193'], x = 'Abs_Dist', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Pairwise Interaction Distance')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.001)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't8 G1 ICRF-193',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)

In [None]:
peak_list

In [None]:
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])
with sns.axes_style("whitegrid"):
    g = sns.displot(kind='kde', data=dist_df, col='Replicate', x='Abs_Dist', hue='Label', 
                    palette = cmap_bar, log_scale = True, common_norm = False, common_grid = True, 
                    lw = 2, facet_kws={'sharey': True}, height = 3)
    g._legend.set_title('Sample')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_AllSamples_R1R2R3_Separate_Density_Dist_LogScaleKde_DropNA.png', bbox_inches = 'tight', dpi = 300)

In [None]:
#walk span plot

In [None]:
#Kernel density plots of walk span - overlay - 1 or 2 chrom walks, A, B, or AB compartments
#Just good chromosome walks here
#Just first 6 steps of walks
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])

span_df = pd.DataFrame()

#combine biological replicates
for i, cond in enumerate(conditions):
    df = real_walks_with_summary_firstx[cond]['length_6']
    df['Query_Fragment_Length'] = df['Query_End'] - df['Query_Start']
    grouped_walks = df.groupby('Query_Name')
    walks_min_mapq = grouped_walks.agg({'Mapping_Quality' : 'min'})
    good_walks_mapq = walks_min_mapq[walks_min_mapq['Mapping_Quality'] > 59] #use this to filter for mapq
    walks_frac_map = grouped_walks.agg({'Match_Length' : 'sum',
                                    'Query_Fragment_Length' : 'sum',
                                    'Alignment_Length' : 'sum'})
    walks_high_frac_map = walks_frac_map[
        (walks_frac_map['Match_Length']/walks_frac_map['Query_Fragment_Length']) > 0.8]
    #use this to filter for fraction mapped

    df2 = real_walks_summarized_firstx[cond]['length_6']
    cond1_filtered = df2[
        (df2['Chrom_Number'] == 1) &
        (df2['Walk_Comp_Type'].isin(['A', 'B', 'AB'])) &
        (df2['Query_Name'].isin(good_walks_mapq.index)) &
        (df2['Query_Name'].isin(walks_high_frac_map.index)) &
        (df2['chrom'].isin(good_chroms))]
    dist_dropna = pd.DataFrame({'Span' : cond1_filtered['Span'].copy(),
                               'Condition' : cond,
                               'Label' : labeldict[cond],
                               'Replicate' : repdict[cond]})
    dist_dropna = dist_dropna.replace([np.inf, -np.inf], np.nan).dropna()
    dist_dropna['Abs_Span'] = abs(dist_dropna['Span']).replace([np.inf, -np.inf], np.nan).dropna()     
    span_df = span_df.append(dist_dropna[dist_dropna['Abs_Span'] > 0], ignore_index = True)

In [None]:
#Kernel density plots - overlay - 1 chrom walks, A, B, or AB compartments
fig = plt.figure(figsize=(3, 3))

g = sns.kdeplot(data = span_df, x = 'Abs_Span', common_norm = False,
        hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)

sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')
plt.xlabel('Walk Span, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)

plt.savefig(f'{outDataDir}/figures/220518_MRICRF_AllSamples_R1R2R3_Density_Span_Combo_1st6steps_Logbins_DropNA_100kbEigs_GoodChroms.png', bbox_inches = 'tight', dpi = 300)

In [None]:
#peak list for walk span as well

In [None]:
peak_list = pd.DataFrame(columns = ['Label', 'PeakNum', 'PeakSize(bp)', 'Prominence'])

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't0 Prometa'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't0 Prometa',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't2 Ana/Telo'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't2 Ana/Telo',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't4 G1 DMSO'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't4 G1 DMSO',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
    

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't4 G1 ICRF-193'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't4 G1 ICRF-193',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)
 

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't8 G1 DMSO'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't8 G1 DMSO',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)

In [None]:
fig = plt.figure(figsize=(3, 3))
cmap_bar = sns.color_palette(['#878787'])

g = sns.kdeplot(data = span_df[span_df['Label'] == 't8 G1 ICRF-193'], x = 'Abs_Span', common_norm = False,
    hue = 'Label', lw = 2, log_scale = True, common_grid = True, palette = cmap_bar)
plt.xlabel('Separation, bp')
plt.ylabel('Density')
plt.title('Walk Span')
plt.grid(lw=0.5)
sns.move_legend(g, "upper left", bbox_to_anchor=(1.05, 1), title='Sample')

In [None]:
#Location of peaks
#from https://stackoverflow.com/questions/63492366/python-get-fwhm-from-seaborn-kdeplot
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html

kde_curve = g.lines[0]
x = kde_curve.get_xdata()
y = kde_curve.get_ydata()
peaks, properties = scipy.signal.find_peaks(y, prominence = 0.01)
for i, element in enumerate(peaks):
    xpos = x[element]
    prom = properties['prominences'][i]
    peak_list = peak_list.append({
        'Label' : 't8 G1 ICRF-193',
        'PeakNum' : i,
        'PeakSize(bp)' : xpos,
        'Prominence' : prom
    }, ignore_index = True)

In [None]:
peak_list

In [None]:
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])
with sns.axes_style("whitegrid"):
    g = sns.displot(kind='kde', data=span_df, col='Replicate', x='Abs_Span', hue='Label', 
                    palette = cmap_bar, log_scale = True, common_norm = False, common_grid = True, 
                    lw = 2, facet_kws={'sharey': True}, height = 3)
    g._legend.set_title('Sample')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_AllSamples_R1R2R3_Separate_Density_Span_LogScaleKde_DropNA.png', bbox_inches = 'tight', dpi = 300)

In [None]:
# Fraction of 1 vs >1 chromosome walks in A, B, AB

CompartmentType_ByInterChromSteps = pd.DataFrame()

for cond in conditions:
    df = real_walks_with_summary_firstx[cond]['length_6']
    df['Query_Fragment_Length'] = df['Query_End'] - df['Query_Start']
    grouped_walks = df.groupby('Query_Name')
    walks_min_mapq = grouped_walks.agg({'Mapping_Quality' : 'min'})
    good_walks_mapq = walks_min_mapq[walks_min_mapq['Mapping_Quality'] > 59] #use this to filter for mapq
    walks_frac_map = grouped_walks.agg({'Match_Length' : 'sum',
                                    'Query_Fragment_Length' : 'sum',
                                    'Alignment_Length' : 'sum'
                                   })
    walks_high_frac_map = walks_frac_map[
        (walks_frac_map['Match_Length']/walks_frac_map['Query_Fragment_Length']) > 0.8] #use this to filter for fraction mapped

    df2 = real_walks_summarized_firstx[cond]['length_6']
    cond1_filtered = df2[
        (df2['Chrom_Number'] < 3) &
        (df2['Walk_Comp_Type'].isin(['A', 'B', 'AB'])) &
        (df2['Query_Name'].isin(good_walks_mapq.index)) &
        (df2['Query_Name'].isin(walks_high_frac_map.index)) &
        (df2['chrom'].isin(good_chroms))]
    
    #1 chrom walks
    cis_walks = cond1_filtered[
        (cond1_filtered['Chrom_Number'] == 1)]
    cis_walks['Condition'] = cond
    cis_walks['Label'] = labeldict[cond]
    cis_walks['Replicate'] = repdict[cond]
    cis_walks['Walk_CisTrans_Type'] = '1 chrom'
    
    #2 chrom walks
    trans_walks = cond1_filtered[
        (cond1_filtered['Chrom_Number'] == 2)]
    trans_walks['Condition'] = cond
    trans_walks['Label'] = labeldict[cond]
    trans_walks['Replicate'] = repdict[cond]
    trans_walks['Walk_CisTrans_Type'] = '2 chrom'
    
    CompartmentType_ByInterChromSteps = CompartmentType_ByInterChromSteps.append(cis_walks, ignore_index = True)
    CompartmentType_ByInterChromSteps = CompartmentType_ByInterChromSteps.append(trans_walks, ignore_index = True)


In [None]:
CompartmentType_ByInterChromSteps_ct = pd.crosstab(
    index=[CompartmentType_ByInterChromSteps['Condition']],
    columns=[CompartmentType_ByInterChromSteps['Chrom_Number'],
             CompartmentType_ByInterChromSteps['Walk_Comp_Type']],
    normalize = 'index')


In [None]:
CompartmentType_ByInterChromSteps_ct.plot(kind = 'bar', stacked = True, use_index = True,
                                          colormap='Accent', figsize = (7, 4))
plt.xlabel('Sample')
plt.ylabel('Fraction of Walks') 
plt.title('Walk Types')
plt.legend(bbox_to_anchor=(1.04,1), title = '# of Chroms, Comp. Type')
     
plt.savefig(f'{outDataDir}/figures/220518_MRICRF_AllSamples_R1R2R3_MC3C_CisTransByCompType_Bar_100kbbinEig1_GoodChroms.png', dpi = 300, bbox_inches = "tight")

In [None]:
CompartmentType_ByInterChromSteps_ct = pd.crosstab(
    index=[CompartmentType_ByInterChromSteps['Condition']
          ],
    columns=[CompartmentType_ByInterChromSteps['Chrom_Number'],
             CompartmentType_ByInterChromSteps['Walk_Comp_Type']])

In [None]:
CompartmentType_ByInterChromSteps_ct.sum(axis = 1) #good chroms only

In [None]:
#Scaling plots and scaling slope

In [None]:
def bedslice(grouped, chrom, start, end):
	"""Assumes no proper nesting of intervals"""
	chromdf = grouped.get_group(chrom)
	lo = chromdf['end'].values.searchsorted(start, side='right')
	hi = lo + chromdf['start'].values[lo:].searchsorted(end, side='left')
	return chromdf.iloc[lo:hi]


def intlogbins(start, end, N=None, ratio=None):
	
	start = int(start)
	end = int(end)
	if ratio is not None:
		if N is not None:
			raise ValueError("Please specify only N or ratio")
		N = np.log(end / start) / np.log(ratio)
	elif N is None:
		raise ValueError("Please specify either N or ratio")
	bins = np.logspace(np.log10(start), np.log10(end), N)
	bins = np.array(np.rint(bins), dtype=int)
	for i in range(1,len(bins)-1):
		if bins[i]==bins[i-1]:
			bins[i] = bins[i]+1
	assert np.all(bins[1:] > bins[:-1])
	assert bins[0] == start
	assert bins[-1] == end
	return bins


def geomprog(factor, start=1):
	yield start
	while True:
		start *= factor
		yield start


def _geomrange(start, end, factor, endpoint):
	prev = np.nan
	for i in geomprog(factor, start):
		x = int(round(i))
		
		if x > end:
			break

		if x == prev:
			continue
		
		prev = x
		yield x

	if endpoint and prev != end:
		yield end


def geomrange(start, end, factor, endpoint=False):
	return np.fromiter(_geomrange(start, end, factor, endpoint), dtype=int)


def geomspace(start, end, num=50, endpoint=True):
	factor = 1 + ((np.log10(end) - np.log10(start)) / num)
	return geomrange(start, end, factor, endpoint=endpoint)


def _contact_areas(distbins, scaffold_length):
	distbins = distbins.astype(float)
	scaffold_length = float(scaffold_length)
	outer_areas = np.maximum(scaffold_length - distbins[:-1], 0) ** 2
	inner_areas = np.maximum(scaffold_length - distbins[1: ], 0) ** 2
	return 0.5 * (outer_areas - inner_areas)
	

def contact_areas(distbins, region1, region2):
	if region1 == region2:
		start, end = region1
		areas = _contact_areas(distbins, end - start)
	else:
		start1, end1 = region1
		start2, end2 = region2
		if start2 <= start1:
			start1, start2 = start2, start1
			end1, end2 = end2, end1
		areas = (
			_contact_areas(distbins, end2 - start1) -
			_contact_areas(distbins, start2 - start1) -
			_contact_areas(distbins, end2 - end1)
		)
		if end1 < start2:
			areas += _contact_areas(distbins, start2 - end1)
	
	return areas

In [None]:
def scaling_on_go(input_df):
    chromosomes=list(chr_sizes.index[:-3])
    
    chrom_out = {}
    chrom_obs = {}

    for chrom in chromosomes:
    
        start = 0
        end = chr_sizes.loc[chrom].values[0]

        region1 = (start, end)
        region2 = region1

        df1 = input_df[input_df["chrom1"] == chrom]

        df1 = df1[ 
            (df1['pos1'] >= region1[0]) & 
            (df1['pos1'] < region1[1]) & 
            (df1['pos2'] >= region2[0]) & 
            (df1['pos2'] < region2[1])
        ]
        dists = abs(df1['pos2'] - df1['pos1']).values

        obs, _ = np.histogram(
            dists[(dists >= 1000) & (dists < 100000000)],
            bins=distbins)

        area = contact_areas(distbins, region1, region2)

        chrom_out[chrom] = obs/area
        chrom_obs[chrom] = obs
    chrom_out["Average"] = np.nanmean(list(chrom_out.values()), axis=0)
    chrom_out["Distbins"] = distbins


        
    return chrom_out, chrom_obs

In [None]:
#Read in walks, convert to pairwise interactions, convert to cooler

#Pair format: 
#index	name	description
#1	read_id	the ID of the read as defined in fastq files
#2	chrom1	the chromosome of the alignment on side 1
#3	pos1	the 1-based genomic position of the outer-most (5’) mapped bp on side 1 - I'm using midpoint of mapped fragment
#4	chrom2	the chromosome of the alignment on side 2
#5	pos2	the 1-based genomic position of the outer-most (5’) mapped bp on side 2 - I'm using midpoint of mapped fragment
#6	strand1	the strand of the alignment on side 1
#7	strand2	the strand of the alignment on side 2

#Using the midpoints of the mapped fragments since these are not sonicated, and are long reads.  

In [None]:
pairs_full = {}

for cond in conditions:
    reads =  real_walks_with_summary_firstx[cond]['length_6'][['Query_Name', 'chrom', 'mid', 'Strand']]
    reads.columns = ['read_id', 'chrom1', 'pos1', 'strand1']
    reads2 = reads.shift()
    reads2.columns = ['read_id2', 'chrom2', 'pos2', 'strand2']
    pairs = reads.join(reads2)
    pairs = pairs[pairs['read_id'] == pairs['read_id2']].reset_index(drop = True)[['read_id', 'chrom1', 'pos1', 'chrom2', 'pos2', 'strand1', 'strand2']]
    pairs = pairs.convert_dtypes(infer_objects=True, convert_string=True, convert_integer=True, convert_boolean=True, convert_floating=True)
    pairs_full[cond] = pairs

In [None]:
pairs_full[cond]

In [None]:
chr_sizes = pd.read_table("../data/hg38.reduced.chrom.sizes", header=None, index_col=0)
distbins = geomspace(1000, 100000000, 20)

In [None]:
chr_sizes.index

In [None]:
#Remove chromosomes not in hg38 reduced, save .pairs file
for cond in conditions:
    pairs_full[cond] = pairs_full[cond][(pairs_full[cond]['chrom1'].isin(chr_sizes.index)) &
                                        (pairs_full[cond]['chrom2'].isin(chr_sizes.index))
                                       ].reset_index(drop = True)

In [None]:
pairs_full[cond]

In [None]:
#https://stackoverflow.com/questions/31254050/adding-a-comment-with-to-csv-using-pandas

for cond in conditions:
    # Open a file in write mode to add the comment
    # Then close the file and reopen it with pandas in append mode
    with open(f'{outDataDir}/data/220518_{cond}_AllDirect.pairs', 'w') as f:
        f.write('#header\n')
        
    pairs_full[cond].to_csv(f'{outDataDir}/data/220518_{cond}_AllDirect.pairs', sep = '\t', 
                            index = False, header = None, mode = 'a')


In [None]:
#then flip pairs to make upper triangular matrix
chromsizes_file = "../data/hg38.reduced.chrom.sizes"
for cond in conditions:
    pairsfile = f'{outDataDir}/data/220518_{cond}_AllDirect.pairs'
    outputfile = f'{outDataDir}/data/220518_{cond}_AllDirect_Flipped.pairs'
    !pairtools flip -c $chromsizes_file -o $outputfile $pairsfile

In [None]:
#then sort pairs to make upper triangular matrix
for cond in conditions:
    pairsfile = f'{outDataDir}/data/220518_{cond}_AllDirect_Flipped.pairs'
    outputfile = f'{outDataDir}/data/220518_{cond}_AllDirect_Sorted.pairs'
    !pairtools sort -o $outputfile $pairsfile

In [None]:
pairs_full_sorted = {}
for cond in conditions:
    pairs_full_sorted[cond] = pd.read_csv(f'{outDataDir}/data/220518_{cond}_AllDirect_Sorted.pairs', 
                                          sep = '\t', header = None, comment = '#')
    pairs_full_sorted[cond].columns = ['read_id', 'chrom1', 'pos1', 'chrom2', 'pos2', 'strand1', 'strand2']

In [None]:
pairs_full_sorted[cond]

In [None]:
output = {}
output_obs = {}

for cond in conditions:
    output[cond], output_obs[cond] = scaling_on_go(pairs_full_sorted[cond])

In [None]:
#normalized by auc
#scaling plot
fig = plt.figure(figsize=(5, 4))

for cond in conditions:
    dbins = np.array(output[cond]['Distbins'], dtype = float)
    x = np.sqrt(dbins[:-1] * dbins[1:])
    y = output[cond]['Average']
    yfill = np.nan_to_num(y)
    auc = np.trapz(x, yfill)
    plt.plot(x[0:], y/-auc, label = cond, lw = 1, color = sampleColors[cond], ls = sampleLineStyles[cond])


ax = plt.gca()
ax.set_xscale('log')
ax.set_yscale('log')
plt.ylabel("P(s)")
plt.xlabel("separation (bp)")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, prop = {'size':8})
plt.title('Scaling, normalized to auc')

#fixing ticks
locmajx = mpl.ticker.LogLocator(base=10,numticks=6) 
ax.xaxis.set_major_locator(locmajx)
locmajy = mpl.ticker.LogLocator(base=10,numticks=6) 
ax.xaxis.set_major_locator(locmajy)

locmin = mpl.ticker.LogLocator(base=10.0,subs=(0.2,0.4,0.6,0.8),numticks=12)
ax.xaxis.set_minor_locator(locmin)
ax.xaxis.set_minor_formatter(mpl.ticker.NullFormatter())

locminy = mpl.ticker.LogLocator(base=10.0,subs=(0.2,0.4,0.6,0.8),numticks=12)
ax.yaxis.set_minor_locator(locminy)
ax.yaxis.set_minor_formatter(mpl.ticker.NullFormatter())
    
plt.xlim(1e3, 3e8)
plt.tight_layout()

fname = f'{outDataDir}/figures/220518_R1R2R3_MR_MC3C_ScalingPlot_AllReads_Direct_Normalized.png'

plt.savefig(fname, dpi = 300, bbox_inches = "tight")

In [None]:
#derivative plot

fig = plt.figure(figsize=(5, 4))

for cond in conditions:
    dbins = output[cond]['Distbins']
    x = np.sqrt(dbins[:-1] * dbins[1:])
    y = output[cond]['Average']
    dydx = diff(np.log(y))/diff(np.log(x))
    dx = np.sqrt(x[:-1] * x[1:])
    plt.plot(dx, scipy.ndimage.filters.gaussian_filter1d(dydx, 1.2), label = cond, lw = 1, color = sampleColors[cond],
            ls = sampleLineStyles[cond])

ax = plt.gca()
ax.set_xscale('log')
plt.ylabel("P(s) Slope")
plt.xlabel("separation (bp)")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.1, prop = {'size':8})
plt.title('Scaling Slope, MC3C')

#fixing ticks
locmajx = mpl.ticker.LogLocator(base=10,numticks=6) 
ax.xaxis.set_major_locator(locmajx)

locmin = mpl.ticker.LogLocator(base=10.0,subs=(0.2,0.4,0.6,0.8),numticks=12)
ax.xaxis.set_minor_locator(locmin)
ax.xaxis.set_minor_formatter(mpl.ticker.NullFormatter())

plt.xlim(1e3, 3e8)
plt.tight_layout()

fname = f'{outDataDir}/figures/220518_R1R2R3_MR_ScalingPlotSlope_AllReads_Direct.png'

plt.savefig(fname, dpi = 300, bbox_inches = "tight")

In [None]:
#Alignment length distribution

In [None]:
outDataDir

In [None]:
#read in alignments for each condition
alignmentDir = #directory_of_raw_alignments

aligned_reads = pd.DataFrame()
for cond in conditions:
    df = pd.read_csv(f'{alignmentDir}/{long_names[cond]}.hg38.minimap2.output.paf', header = None,
                                  sep = '\t')
    df.columns = ['Query_Name', 'Query_Length', 'Query_Start', 'Query_End',
                                  'Strand', 'chrom', 'Target_ChrSize', 'start', 
                                  'end', 'Match_Length', 'Alignment_Length', 'Mapping_Quality', 
                  'TypeOfAln', 'NumberOfMinimizers', 'ChainingScorePrimary', 
                  'ChainingScoreSecondary', 'ApproxPerBaseDivergence', 'RepetitiveSeedLength'] 
    df['Condition'] = cond
    
    aligned_reads = aligned_reads.append(df).reset_index(drop = True)

In [None]:
aligned_reads.head(20)

In [None]:
alignednum = aligned_reads.pivot_table(index = 'Condition', values = 'Query_Name', aggfunc = pd.Series.nunique)

In [None]:
alignednum

In [None]:
fragmentnum = pd.DataFrame(aligned_reads.groupby(['Condition', 'Query_Name']).size()).reset_index()

In [None]:
fragmentnum.columns = ['Condition', 'Query_Name', 'Count']

In [None]:
repdict

In [None]:
fragmentnum['Label'] = np.nan
fragmentnum['Replicate'] = np.nan

for cond in conditions:
    fragmentnum['Label'][fragmentnum['Condition'] == cond] = labeldict[cond]
    fragmentnum['Replicate'][fragmentnum['Condition'] == cond] = repdict[cond]

In [None]:
fragmentnum

In [None]:
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])
with sns.axes_style("whitegrid"):
    g = sns.displot(kind='hist', data=fragmentnum, col='Replicate', x='Count', hue='Label', 
                    palette = cmap_bar, log_scale = False, common_norm = False, common_bins = True, 
                    lw = 2, facet_kws={'sharey': True}, height = 3, element = 'step', fill = False,
                    stat = 'density', binwidth = 1, hue_order = ['t0 Prometa', 't2 Ana/Telo', 't4 G1 DMSO', 't4 G1 ICRF-193', 't8 G1 DMSO', 't8 G1 ICRF-193'])
    g._legend.set_title('Sample')
    g.set_xlabels('Number Of Fragments')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_R1R2R3_Separate_Density_FragmentNumber_Kde_DropNA.png', bbox_inches = 'tight', dpi = 300)

In [None]:
#read length (query_length) of aligned reads

In [None]:
aligned_reads

In [None]:
read_length = pd.DataFrame(aligned_reads.groupby(['Condition', 'Query_Name']).mean('Query_Length')).reset_index()[['Condition', 'Query_Name', 'Query_Length']]

In [None]:
read_length

In [None]:
read_length['Label'] = np.nan
read_length['Replicate'] = np.nan

for cond in conditions:
    read_length['Label'][read_length['Condition'] == cond] = labeldict[cond]
    read_length['Replicate'][read_length['Condition'] == cond] = repdict[cond]

In [None]:
read_length

In [None]:
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])
with sns.axes_style("whitegrid"):
    g = sns.displot(kind='kde', data=read_length, col='Replicate', x='Query_Length', hue='Label', 
                    palette = cmap_bar, log_scale = True, common_norm = False, common_grid = True, 
                    lw = 2, facet_kws={'sharey': True}, height = 3, hue_order = ['t0 Prometa', 't2 Ana/Telo', 't4 G1 DMSO', 't4 G1 ICRF-193', 't8 G1 DMSO', 't8 G1 ICRF-193'])
    g._legend.set_title('Sample')
    g.set_xlabels('Read Length')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_R1R3R4_Separate_Density_ReadLength_LogScaleKde.png', bbox_inches = 'tight', dpi = 300))

In [None]:
#run percent cis on pairs files
cis_percent = {}
for cond in conditions:
    pairs_full_sorted = pd.read_csv(f'{outDataDir}/data/220518_{cond}_AllDirect_Sorted.pairs', 
                                          sep = '\t', header = None, comment = '#')
    pairs_full_sorted.columns = ['read_id', 'chrom1', 'pos1', 'chrom2', 'pos2', 'strand1', 'strand2']
    cis_count = len(pairs_full_sorted[pairs_full_sorted['chrom1'] == pairs_full_sorted['chrom2']])
    total_count = len(pairs_full_sorted)
    cis_percent[cond] = (cis_count/total_count) * 100

In [None]:
cis_percent

In [None]:
#Intermingling plots

In [None]:
#For each of the above, can separate by A vs B compartments, or by number of chromosomes visited
comp_types = ['A', 'AB', 'B']

In [None]:
#load intermingling results as pickle file for later use since takes a long time to run
f = open(f'{outDataDir}/data/220518_MRICRF_R1R2R3_InterminglingSlidingWindowSweep.pkl', 'rb')
Intermingling_Sliding_Window_Sweep = pickle.load(f)
f.close()

In [None]:
Intermingling_Sliding_Window_Sweep_CT_Counts = {}

for window_size in [4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]:
    Intermingling_Sliding_Window_Sweep_CT_Counts[f'{window_size}'] = pd.crosstab(
        index=[Intermingling_Sliding_Window_Sweep[f'{window_size}']['Condition'],
               Intermingling_Sliding_Window_Sweep[f'{window_size}']['Window_Midpoint'],
               Intermingling_Sliding_Window_Sweep[f'{window_size}']['Label'],
               Intermingling_Sliding_Window_Sweep[f'{window_size}']['Replicate']
              ],
        columns=Intermingling_Sliding_Window_Sweep[f'{window_size}']['Walk_Comp_Type'], 
        normalize = False).reset_index().melt(id_vars = [
        'Condition', 
        'Window_Midpoint',
        'Label',
        'Replicate'
    ])

In [None]:
fig = plt.figure(figsize=(24, 12))
gs = GridSpec(nrows= 2, ncols=4, wspace = 1, hspace = .4)
cmap_bar = sns.color_palette(['#878787','#E1B7A3','#17BECF', '#D62728', '#0D6871', '#751616'])

for i, window_size in enumerate([4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]):
    ax = plt.subplot(gs[i])
    sns.lineplot(data = Intermingling_Sliding_Window_Sweep_CT_Counts[f'{window_size}'][
        Intermingling_Sliding_Window_Sweep_CT_Counts[f'{window_size}']['Walk_Comp_Type'] == 'All'], 
                 x = 'Window_Midpoint', y = 'value', hue = 'Label', style = 'Replicate', palette = cmap_bar)
    plt.ylabel('Number of Walks')
    plt.xlabel('Window Midpoint (bp)')
    plt.title(f'Walk Number Per {window_size} bp Window')
plt.savefig(f'{outDataDir}/figures/220518_MRICRF_AllSamples_WalkNumberPerWindow_AllComps_Separate_WindowSizeSweep_GoodChroms.png', dpi = 300)

In [None]:
#Heatmaps of each comparison of interest

In [None]:
#for heatmaps - combine all replicates
Intermingling_Sliding_Window_Sweep_CT = {}

for window_size in [4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]:
    Intermingling_Sliding_Window_Sweep_CT[f'{window_size}'] = pd.crosstab(
        index=[Intermingling_Sliding_Window_Sweep[f'{window_size}']['Label'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Window_Midpoint'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Walk_Comp_Type'],
          ],
    columns=Intermingling_Sliding_Window_Sweep[f'{window_size}']['Inter_Largest_Step_Side_Step_Num'], 
    normalize = False).reset_index()


In [None]:
Intermingling_SlidingWindow_Melt_Sweep = {}

for window_size in [4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]:
    Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}'] = Intermingling_Sliding_Window_Sweep_CT[f'{window_size}'].melt(id_vars = [
    'Label', 
    'Window_Midpoint',
    'Walk_Comp_Type',
])


In [None]:
Intermingling_SlidingWindow_Sweep_ToPlot = {}
for window_size in [4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]:
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}']= Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}'].loc[
        (Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}']['Inter_Largest_Step_Side_Step_Num'] == 2)
    ]

In [None]:
labels = ['t0 Prometa', 't2 Ana/Telo', 't4 G1 DMSO', 't4 G1 ICRF-193', 't8 G1 DMSO', 't8 G1 ICRF-193']

In [None]:
heatmap_df = Intermingling_SlidingWindow_Sweep_ToPlot[f'{1.6e7}'].merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{1.4e7}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num'], suffixes = (f'_{1.6e7}', f'_{1.4e7}')).merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{1.2e7}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num']).merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{1e7}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num'], suffixes = (f'_{1.2e7}', f'_{1e7}')).merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{8e6}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num']).merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{6e6}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num'], suffixes = (f'_{8e6}', f'_{6e6}')).merge(
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{4e6}'], how = 'outer', 
    on = ['Label', 'Window_Midpoint', 'Walk_Comp_Type', 
          'Inter_Largest_Step_Side_Step_Num']).sort_values(by = ['Label', 'Window_Midpoint', 'Walk_Comp_Type'])

heatmap_dfs_bylabel = {}

for cond in labels:
    df = heatmap_df[heatmap_df['Label'] == cond]
    
    df = df[(df['Walk_Comp_Type'] == 'All')][[
        'Window_Midpoint', 'value_16000000.0', 'value_14000000.0', 'value_12000000.0',
        'value_10000000.0', 'value_8000000.0','value_6000000.0', 'value']].set_index('Window_Midpoint', drop = True)

    df.columns = [16, 14, 12, 10, 8, 6, 4]
    
    heatmap_dfs_bylabel[cond] = df.transpose()
    heatmap_dfs_bylabel[cond].columns = list(range(2, 28))

In [None]:
heatmap_dfs_bylabel

In [None]:
import cooltools.lib.plotting

In [None]:
fig = plt.figure(figsize=(24, 9))

gs = GridSpec(nrows= 3, ncols=4, wspace = 0.3, hspace = 1)
for i, cond in enumerate(labels):
    ax = plt.subplot(gs[i])
    sns.heatmap(heatmap_dfs_bylabel[cond], square = False, 
                vmin=0.4, 
                vmax=0.65, 
                cmap = 'fall', cbar_kws={'label': 'IM'},
                mask=heatmap_dfs_bylabel[cond].isnull())
    plt.ylabel('Window Size (Mb)')
    plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
    plt.title(cond)
    #ax.set_aspect('equal')
    
plt.savefig(f'{outDataDir}/figures/220518_Heatmaps_SlidingWindowSweep_MRICRF_GoodChroms_4to16Mb.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#log2 ratios

In [None]:
ctrl_labels = ['t2 Ana/Telo', 't4 G1 DMSO', 't8 G1 DMSO', 't4 G1 DMSO', 't8 G1 DMSO', 't8 G1 DMSO', 't2 Ana/Telo', 't4 G1 DMSO', 't2 Ana/Telo', 't8 G1 DMSO']
treat_labels = ['t0 Prometa', 't0 Prometa', 't0 Prometa', 't2 Ana/Telo', 't2 Ana/Telo', 't4 G1 DMSO', 't4 G1 ICRF-193', 't4 G1 ICRF-193', 't8 G1 ICRF-193', 't8 G1 ICRF-193']

In [None]:
fig = plt.figure(figsize=(24, 9))

gs = GridSpec(nrows= 3, ncols=4, wspace = 0.3, hspace = 1)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = plt.subplot(gs[i])
    log2df = np.log2(heatmap_dfs_bylabel[treatcond]) - np.log2(heatmap_dfs_bylabel[ctrlcond])                                      
    sns.heatmap(log2df, square = False, vmin=-.5, vmax=.5, cmap = 'coolwarm', cbar_kws={'label': 'log2FC treat/ctrl'})
    plt.ylabel('Window Size (Mb)')
    plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
    plt.title(f'{treatcond} vs {ctrlcond}')
    #ax.set_aspect('equal')

plt.savefig(f'{outDataDir}/figures/220518_InterminglingWindowSizeSweep_Heatmaps_TreatvsCtrl_goodchroms_CellCycleDMSO_4to16Mb.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#Just 4mb window size - all samples together

In [None]:
df_4mb_only = pd.DataFrame()

for cond in labels:
    df_4mb_only = df_4mb_only.append(heatmap_dfs_bylabel[cond].loc[4])

In [None]:
df_4mb_only.index = labels

In [None]:
df_4mb_only

In [None]:
sns.heatmap(
    df_4mb_only, 
    square = False,       
    vmin=0.35,                 
    vmax=0.65,                 
    cmap = 'fall', 
    cbar_kws={'label': 'IM'},                
    mask=df_4mb_only.isnull())
plt.ylabel('Sample')
plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
plt.savefig(f'{outDataDir}/figures/220518_MRICRF_IMHeatmap_4MbWindow.png', dpi = 300, bbox_inches = 'tight')

In [None]:
df_12mb_only = pd.DataFrame()

for cond in labels:
    df_12mb_only = df_12mb_only.append(heatmap_dfs_bylabel[cond].loc[12])

In [None]:
df_12mb_only.index = labels

In [None]:
df_12mb_only.dropna(axis = 'columns')

In [None]:
sns.heatmap(
    df_12mb_only.dropna(axis = 'columns'), 
    square = False,       
    vmin=0.4,                 
    vmax=0.65,                 
    cmap = 'fall', 
    cbar_kws={'label': 'IM'},                
    #mask=df_12mb_only.isnull()
)
plt.ylabel('Sample')
plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
plt.savefig(f'{outDataDir}/figures/220518_MRICRF_IMHeatmap_12MbWindow.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#line plots with error and permutations

window_sizes = [4e6, 6e6, 8e6, 1e7, 1.2e7, 1.4e7, 1.6e7]

In [None]:
Intermingling_Sliding_Window_Sweep_CT = {}

for window_size in window_sizes:
    Intermingling_Sliding_Window_Sweep_CT[f'{window_size}'] = pd.crosstab(
        index=[Intermingling_Sliding_Window_Sweep[f'{window_size}']['Condition'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Window_Midpoint'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Label'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Walk_Comp_Type'],
           Intermingling_Sliding_Window_Sweep[f'{window_size}']['Replicate']
          ],
    columns=Intermingling_Sliding_Window_Sweep[f'{window_size}']['Inter_Largest_Step_Side_Step_Num'], 
    normalize = 'index').reset_index()


In [None]:
Intermingling_SlidingWindow_Melt_Sweep = {}

for window_size in window_sizes:
    Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}'] = Intermingling_Sliding_Window_Sweep_CT[f'{window_size}'].melt(id_vars = [
    'Condition', 
    'Window_Midpoint',
    'Label',
    'Walk_Comp_Type',
    'Replicate'
])


In [None]:
Intermingling_SlidingWindow_Sweep_ToPlot = {}
for window_size in window_sizes:
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}']= Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}'].loc[
        (Intermingling_SlidingWindow_Melt_Sweep[f'{window_size}']['Inter_Largest_Step_Side_Step_Num'] == 2)
    ]

In [None]:
#permutations
iterations = range(100)

#read in pickled intermingling data for each, extract data to plot
Permuted_Intermingling_SlidingWindow_Sweep_ToPlot = {}

for window_size in window_sizes:
    Permuted_Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}'] = pd.DataFrame()

for i in iterations:
    for window_size in window_sizes:
        #run crosstab for each iteration, each window size, melt, and combine into full dataframe
        #read in pickled intermingling data
        f = open(f'{outDataDir}/data/permutations/220518_MRICRF_R1R2R3_PermutedInterminglingSlidingWindowSweep_window{window_size}.iter{i}.pkl', 'rb')
        Permuted_Intermingling_Sliding_Window_Sweep_OneSize = pickle.load(f)
        f.close()
        Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Iteration'] = i
        
        Permuted_CT = pd.crosstab(
            index=[
                Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Condition'],
                Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Window_Midpoint'],
                Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Label'],
                Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Iteration'],
                Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Walk_Comp_Type']
              ],
            columns=Permuted_Intermingling_Sliding_Window_Sweep_OneSize['Inter_Largest_Step_Side_Step_Num'], 
            normalize = 'index').reset_index()

        Permuted_Melt = Permuted_CT.melt(id_vars = [
            'Condition', 
            'Window_Midpoint',
            'Label',
            'Iteration',
            'Walk_Comp_Type'
        ])

        Permuted_ToPlot = Permuted_Melt.loc[
            (Permuted_Melt['Inter_Largest_Step_Side_Step_Num'] == 2)]

        Permuted_Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}'] = Permuted_Intermingling_SlidingWindow_Sweep_ToPlot[
            f'{window_size}'].append(Permuted_ToPlot, ignore_index = True)
    

In [None]:
Intermingling_BothWalkTypes_ToPlot = {}

for window_size in window_sizes:
    Permuted_Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}']['Walk_Type'] = 'Permuted'
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}']['Walk_Type'] = 'Real'
    Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}']['Iteration'] = np.nan
    
    Intermingling_BothWalkTypes_ToPlot[f'{window_size}'] = Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}'].append(Permuted_Intermingling_SlidingWindow_Sweep_ToPlot[f'{window_size}'], ignore_index = True)

    Intermingling_BothWalkTypes_ToPlot[f'{window_size}'].columns = ['Condition', 'Window Midpoint', 'Sample', 'Walk_Comp_Type', 'Replicate',
       'Inter_Largest_Step_Side_Step_Num', 'Fraction Intermingled', 'Walk Type', 'Iteration']
    
    Intermingling_BothWalkTypes_ToPlot[f'{window_size}']['Window Size'] = window_size


In [None]:
Intermingling_BothWalkTypes_ToPlot_AllWindowSizes = pd.DataFrame()
for window_size in window_sizes:
    Intermingling_BothWalkTypes_ToPlot_AllWindowSizes = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes.append(Intermingling_BothWalkTypes_ToPlot[f'{window_size}'])

In [None]:
labels = ['t0 Prometa', 't2 Ana/Telo', 't4 G1 DMSO', 't4 G1 ICRF-193', 't8 G1 DMSO', 't8 G1 ICRF-193']
ctrl_labels = ['t0 Prometa', 't0 Prometa', 't0 Prometa', 't2 Ana/Telo', 't2 Ana/Telo', 't4 G1 DMSO', 't2 Ana/Telo', 't4 G1 DMSO', 't2 Ana/Telo', 't8 G1 DMSO']
treat_labels = ['t2 Ana/Telo', 't4 G1 DMSO', 't8 G1 DMSO', 't4 G1 DMSO', 't8 G1 DMSO', 't8 G1 DMSO', 't4 G1 ICRF-193', 't4 G1 ICRF-193', 't8 G1 ICRF-193', 't8 G1 ICRF-193']

In [None]:
fig = plt.figure(figsize=(20, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'All'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.2, 0.85)

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_FractionIntermingled_RealvsPermuted_AllComps_{i}_AllWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
fig = plt.figure(figsize=(20, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'All'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, ci = 'sd')
    plt.ylim(0.2, 0.85)

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_FractionIntermingled_RealvsPermuted_AllComps_{i}_AllWindows_sd.png', dpi = 300, bbox_inches = "tight")

In [None]:
#4Mb window only

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 4e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'All'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.1, 0.8)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_FractionIntermingled_RealvsPermuted_AllComps_{i}_4MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#12Mb window only

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 12e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'All'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.35, 0.7)
    plt.xlim(0.5e7, 2.4e7)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/220518_MRICRF_FractionIntermingled_RealvsPermuted_AllComps_{i}_12MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
heatmap_dfs_bylabel_bycomp = {}

for cond in labels:
    heatmap_dfs_bylabel_bycomp[cond] = {}
    for comp in comp_types:
        df = heatmap_df[heatmap_df['Label'] == cond]

        df = df[(df['Walk_Comp_Type'] == comp)][[
            'Window_Midpoint', 'value_16000000.0', 'value_14000000.0', 'value_12000000.0',
            'value_10000000.0', 'value_8000000.0','value_6000000.0', 'value']].set_index('Window_Midpoint', drop = True)

        df.columns = [16, 14, 12, 10, 8, 6, 4]

        heatmap_dfs_bylabel_bycomp[cond][comp] = df.transpose()
        heatmap_dfs_bylabel_bycomp[cond][comp].columns = list(range(2, 48))

In [None]:
heatmap_dfs_bylabel_bycomp[cond][comp]

In [None]:
fig = plt.figure(figsize=(24, 18))

gs = GridSpec(nrows= 6, ncols=4, wspace = 0.3, hspace = 1)

for i, cond in enumerate(labels):
    for j, comp in enumerate(comp_types):
        ax = plt.subplot(gs[i, j])
        sns.heatmap(heatmap_dfs_bylabel_bycomp[cond][comp], square = False, 
                vmin=0.4, 
                vmax=0.7, 
                cmap = 'fall', cbar_kws={'label': 'IM'},
                mask=heatmap_dfs_bylabel_bycomp[cond][comp].isnull())
        plt.ylabel('Window Size (Mb)')
        plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
        plt.title(f'{cond}, {comp}')

plt.savefig(f'{outDataDir}/figures/Heatmaps_SlidingWindowSweep_AllChroms_ByComp.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#log2 ratios

In [None]:
fig = plt.figure(figsize=(24, 18))

gs = GridSpec(nrows= 6, ncols=4, wspace = 0.3, hspace = 1)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    for j, comp in enumerate(comp_types):
        ax = plt.subplot(gs[i, j])
        log2df = np.log2(heatmap_dfs_bylabel_bycomp[treatcond][comp]) - np.log2(heatmap_dfs_bylabel_bycomp[ctrlcond][comp])                                      
        sns.heatmap(log2df, square = False, vmin=-.5, vmax=.5, cmap = 'coolwarm', cbar_kws={'label': 'log2FC treat/ctrl'})
        plt.ylabel('Window Size (Mb)')
        plt.xlabel('Genomic Distance Between Interacting Domains \n(Window Midpoint, Mb)')
        plt.title(f'{treatcond} vs {ctrlcond}, {comp}')

plt.savefig(f'{outDataDir}/figures/InterminglingWindowSizeSweep_Heatmaps_TreatvsCtrl_ByComp.png', dpi = 300, bbox_inches = 'tight')

In [None]:
#12Mb window only, B comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 12e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'B'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.35, 0.9)
    plt.xlim(0.5e7, 4.4e7)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/Rad21ICRF_FractionIntermingled_RealvsPermuted_BComp_{i}_12MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#12Mb window only, A comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 12e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'A'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.35, 0.9)
    plt.xlim(0.5e7, 4.4e7)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/Rad21ICRF_FractionIntermingled_RealvsPermuted_AComp_{i}_12MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#12Mb window only, AB comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 12e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'AB'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.35, 0.9)
    plt.xlim(0.5e7, 4.4e7)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/Rad21ICRF_FractionIntermingled_RealvsPermuted_ABComp_{i}_12MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#4Mb window only, all comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 4e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'All'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.1, 1)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/Rad21ICRF_FractionIntermingled_RealvsPermuted_AllComp_{i}_4MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#4Mb window only, B comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 4e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'B'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.1, 1)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/FractionIntermingled_RealvsPermuted_BComp_{i}_4MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#4Mb window only, A comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 4e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'A'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.1, 1)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/FractionIntermingled_RealvsPermuted_AComp_{i}_4MbWindows.png', dpi = 300, bbox_inches = "tight")

In [None]:
#4Mb window only, AB comp types

fig = plt.figure(figsize=(3, 2))
gs = GridSpec(nrows= 10, ncols=1, wspace = 1, hspace = .4)

for i, (ctrlcond, treatcond) in enumerate(zip(ctrl_labels, treat_labels)):
    ax = gs[i]
    df = Intermingling_BothWalkTypes_ToPlot_AllWindowSizes[
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Sample'].isin([ctrlcond, treatcond])) &
        (Intermingling_BothWalkTypes_ToPlot_AllWindowSizes['Window Size'] == 4e6)
    ]
    cmap_bar = sns.color_palette([labelPlotColors[df['Sample'].unique()[0]], labelPlotColors[df['Sample'].unique()[1]]])
    sns.relplot(data = df[df['Walk_Comp_Type'] == 'AB'],
             x = 'Window Midpoint', y = 'Fraction Intermingled', hue = 'Sample', palette = cmap_bar, kind = 'line', 
                style = 'Walk Type', col = 'Window Size', height = 2, aspect = 1.5, seed = 25)
    plt.ylim(0.1, 1)
    plt.xlabel('Genomic Distance \nBetween Interacting Domains \n (Window Midpoint, bp)')
    plt.ylabel('IM')

    plt.savefig(f'{outDataDir}/figures/FractionIntermingled_RealvsPermuted_ABComp_{i}_4MbWindows.png', dpi = 300, bbox_inches = "tight")