In [None]:
import pandas as pd
import numpy as np
#import bootstrapped.bootstrap as bts
#import bootstrapped.stats_functions as bs_stats
import os
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pylab as pl
from os import path

# settings for making nice pdfs
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['font.family'] = "sans-serif"

In [None]:
# specify common string of the output folders (each folder contains results from independent runs, n>=3) 
base_folder_string = 'output_0520_tiling600_PercentBEstabilized_run'
folders = [foldername for foldername in os.listdir('Data/') if foldername.startswith(base_folder_string)]

base_stochasticity = 1 # use to convert step to time

# loop through different folders
for m in range(len(folders)):
    data_dir = folders[m]
    
    stats_file = [filename for filename in os.listdir('Data/'+data_dir) if filename.startswith("stats_df")]
    
    # parse file name
    # separation
    sep_list = list(set([stats_file[i].split('stats_df_sep')[1].split('_proc')[0] for i in range(len(stats_file))]))
    sep_numerical_list = [float(a) for a in sep_list]
    sep_list_sorted = [x for _,x in sorted(zip(sep_numerical_list,sep_list))]
    sep_numerical_sorted = [float(a) for a in sep_list_sorted]
    # processivity
    proc_list = list(set([stats_file[i].split('proc')[1].split('_superportion')[0] for i in range(len(stats_file))]))
    proc_numerical_list = [float(a) for a in proc_list]
    proc_list_sorted = [x for _,x in sorted(zip(proc_numerical_list,proc_list))]
    proc_numerical_sorted = [float(a) for a in proc_list_sorted]
    # %long-lived LEFs
    superportion_list = list(set([stats_file[i].split('superportion')[1].split('_superproc')[0] for i in range(len(stats_file))]))
    superportion_numerical_list = [float(a) for a in superportion_list]
    superportion_list_sorted = [x for _,x in sorted(zip(superportion_numerical_list,superportion_list))]
    superportion_numerical_sorted = [float(a) for a in superportion_list_sorted]
    # processivity of long-lived LEFs
    superproc_list = list(set([stats_file[i].split('superproc')[1].split('_ctcf')[0] for i in range(len(stats_file))]))
    superproc_numerical_list = [float(a) for a in superproc_list]
    superproc_list_sorted = [x for _,x in sorted(zip(superproc_numerical_list,superproc_list))]
    superproc_numerical_sorted = [float(a) for a in superproc_list_sorted]
    # fold stabilization of LEFs at BE
    ctcf_list = list(set([stats_file[i].split('ctcf')[1].split('_dsb')[0] for i in range(len(stats_file))]))
    ctcf_numerical_list = [float(a) for a in ctcf_list]
    ctcf_list_sorted = [x for _,x in sorted(zip(ctcf_numerical_list,ctcf_list))]
    ctcf_numerical_sorted = [float(a) for a in ctcf_list_sorted]
    # fold stabilization of LEF at DSB ends
    dsb_list = list(set([stats_file[i].split('dsb')[1].split('_superloading')[0] for i in range(len(stats_file))]))
    dsb_numerical_list = [float(a) for a in dsb_list]
    dsb_list_sorted = [x for _,x in sorted(zip(dsb_numerical_list,dsb_list))]
    dsb_numerical_sorted = [float(a) for a in dsb_list_sorted]
    # fold increase in loading probability at DSB
    superloading_list = list(set([stats_file[i].split('superloading')[1].split('_bs')[0] for i in range(len(stats_file))]))
    superloading_numerical_list = [float(a) for a in superloading_list]
    superloading_list_sorted = [x for _,x in sorted(zip(superloading_numerical_list,superloading_list))]
    superloading_numerical_sorted = [float(a) for a in superloading_list_sorted]
    # boundary strength
    bs_list = list(set([stats_file[i].split('bs')[1].split('.csv')[0] for i in range(len(stats_file))]))
    bs_numerical_list = [float(a) for a in bs_list]
    bs_list_sorted = [x for _,x in sorted(zip(bs_numerical_list,bs_list))]
    bs_numerical_sorted = [float(a) for a in bs_list_sorted]

    if m == 0:
        ctcf_bs_fp = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_success =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_time0 =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_LEFs_stabilized = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_TAD_with_stabilized_LEFs= np.zeros((len(folders),len(proc_list),len(ctcf_list)))
    
    count = 0 
    for i in range(len(proc_list)):
        for j in range(len(ctcf_list)):
            proc = proc_numerical_sorted[i]
            par_combo = 'sep' + sep_list[0] + '_proc'+ proc_list_sorted[i] +  '_superportion' + superportion_list_sorted[0] + '_superproc' + str(int(proc*20)) + '_ctcf' + ctcf_list_sorted[j] + '_dsb' + dsb_list_sorted[0]+ '_superloading' + superloading_list_sorted[0]+ '_bs' + bs_list_sorted[0]

            filename = 'Data/'+data_dir + '/stats_df_' + par_combo + '.csv'
            if path.exists(filename):

                count+=1

                stats_df = pd.read_csv(filename)
                stats_df = stats_df.drop(columns='Unnamed: 0')
                stats_df_threshold1 = stats_df.loc[stats_df['threshold']==1]
                sep_proc_1st_pass = stats_df_threshold1['first_passage_time'].to_numpy()
                if len(sep_proc_1st_pass)>0:
                    ctcf_bs_fp[m,i,j]=np.mean(sep_proc_1st_pass)*base_stochasticity/60*2
                else:
                    ctcf_bs_fp[m,i,j]=np.nan


                portion_df = pd.read_csv('Data/'+data_dir + '/restrained_df_' + par_combo + '.csv')
                portion_df = portion_df.drop(columns='Unnamed: 0')
                portion_df_threshold1 = portion_df.loc[portion_df['threshold']==1]
                ctcf_bs_time0[m,i,j] = portion_df_threshold1['restrained proportion time0'].to_numpy()[0] * 100
                ctcf_bs_success[m,i,j] = portion_df_threshold1['repaired proportion'].to_numpy()[0] * 100
                
                
                stabilized_df = pd.read_csv('Data/'+data_dir + '/stabilized_df_' + par_combo + '.csv')
                stabilized_df = stabilized_df.drop(columns='Unnamed: 0')
                percent_LEFs_stabilized[m,i,j] = stabilized_df ['percent LEF stabilized by BE'].to_numpy()[0] 
                percent_TAD_with_stabilized_LEFs[m,i,j] = stabilized_df ['percent TADs with stabilized LEF'].to_numpy()[0]

# Theory prediction from Mathematica Notebook BEstabilization_20211004.nb, Prob
theory = np.asarray([[0.0484416, 0.0766689, 0.104387, 0.131435, 0.157444, 0.182136, \
0.205379, 0.227153, 0.247505, 0.26652, 0.284297, 0.30094, 0.316549, \
0.331218, 0.345032, 0.358068, 0.370395, 0.382075, 0.393162, 0.403707, \
0.413751, 0.423335, 0.432493, 0.441258, 0.449656, 0.457713, 0.465454, \
0.472897, 0.480063, 0.486969, 0.493631],[0.113293, 0.166839, 0.214683, 0.257562, 0.29593, 0.330265, 0.361058, \
0.38877, 0.413809, 0.436534, 0.457245, 0.476202, 0.493621, 0.509688, \
0.524558, 0.538367, 0.551228, 0.56324, 0.574489, 0.585048, 0.594984, \
0.604352, 0.613202, 0.621578, 0.629521, 0.637063, 0.644238, 0.651072, \
0.657591, 0.663817, 0.669771],[0.204531, 0.280211, 0.341936, 0.393317, 0.436676, 0.473701, 0.505662, \
0.533524, 0.55803, 0.579758, 0.599162, 0.616603, 0.632372, 0.646705, \
0.659795, 0.671802, 0.682859, 0.69308, 0.702558, 0.711374, 0.719599, \
0.727292, 0.734506, 0.741284, 0.747668, 0.753692, 0.759387, 0.764781, \
0.769897, 0.774757, 0.779382]])*100

slist = np.asarray([1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6., 6.5, 7., 7.5, 8., \
8.5, 9., 9.5, 10., 10.5, 11., 11.5, 12., 12.5, 13., 13.5, 14., 14.5, \
15., 15.5, 16.])
    
colors = ['aqua','slateblue','violet','deeppink','mediumorchid']
matplotlib.rcParams.update({'font.size': 22})       
fig, axs = plt.subplots(2,1,figsize=(8, 12))
    
for i in range(len(proc_list_sorted)):
    axs[0].errorbar(ctcf_numerical_sorted, np.transpose(np.mean(ctcf_bs_success,axis=0)[i,:]),yerr =stats.sem(ctcf_bs_success,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)
for i in range(len(proc_list_sorted)):
    axs[0].plot(slist,theory[i],color= colors[i],linewidth=3,linestyle='dashed')
    
axs[0].set_ylim(0,100)    
axs[0].set_ylabel('Synapsis efficiency(%)')
axs[0].set_xlabel('BE stabilization factor')
axs[0].set_xticks(ctcf_numerical_sorted[0:])
axs[0].tick_params(direction='out', length=8, width=2)
for axis in ['top','bottom','left','right']:
    axs[0].spines[axis].set_linewidth(1.5)
    
# Hide the right and top spines
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[0].yaxis.set_ticks_position('left')
axs[0].xaxis.set_ticks_position('bottom')
    
for i in range(len(proc_list_sorted)):
    axs[1].errorbar(ctcf_numerical_sorted , np.transpose(np.mean(ctcf_bs_fp,axis=0)[i,:]),yerr =stats.sem(ctcf_bs_fp,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)

axs[1].set_ylabel('Mean synapsis time (min)')
axs[1].set_xlabel('BE stabilization factor')
axs[1].tick_params(direction='out', length=8, width=2)
axs[1].set_xticks(ctcf_numerical_sorted[0:])
for axis in ['top','bottom','left','right']:
    axs[1].spines[axis].set_linewidth(1.5)
    
legend = ['processivity='+proc for proc in proc_list_sorted]
lgd = axs[1].legend(legend,loc='upper center', bbox_to_anchor=(0.5, -0.2),
          fancybox=True, shadow=True, ncol=1)    
    
# Hide the right and top spines
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[1].yaxis.set_ticks_position('left')
axs[1].xaxis.set_ticks_position('bottom')

plt.savefig('Figures/'+'BE_stabilization.pdf',format='pdf',bbox_extra_artists=(lgd,),bbox_inches='tight')
plt.show()

In [None]:
base_folder_string = 'output_0520_with residence_10 Mb apart_tiling600_2populationLEFs_GapMethod_PercentBEstabilized_run'
folders = [foldername for foldername in os.listdir('Data/') if foldername.startswith(base_folder_string)]

base_stochasticity = 1 # use to convert step to time

for m in range(len(folders)):
    data_dir = folders[m]
    print(data_dir)
    stats_file = [filename for filename in os.listdir('Data/'+data_dir) if filename.startswith("stats_df")]

    sep_list = list(set([stats_file[i].split('stats_df_sep')[1].split('_proc')[0] for i in range(len(stats_file))]))
    sep_numerical_list = [float(a) for a in sep_list]
    sep_list_sorted = [x for _,x in sorted(zip(sep_numerical_list,sep_list))]
    sep_numerical_sorted = [float(a) for a in sep_list_sorted]
    proc_list = list(set([stats_file[i].split('proc')[1].split('_superportion')[0] for i in range(len(stats_file))]))
    proc_numerical_list = [float(a) for a in proc_list]
    proc_list_sorted = [x for _,x in sorted(zip(proc_numerical_list,proc_list))]
    proc_numerical_sorted = [float(a) for a in proc_list_sorted]
    superportion_list = list(set([stats_file[i].split('superportion')[1].split('_superproc')[0] for i in range(len(stats_file))]))
    superportion_numerical_list = [float(a) for a in superportion_list]
    superportion_list_sorted = [x for _,x in sorted(zip(superportion_numerical_list,superportion_list))]
    superportion_numerical_sorted = [float(a) for a in superportion_list_sorted]
    superproc_list = list(set([stats_file[i].split('superproc')[1].split('_ctcf')[0] for i in range(len(stats_file))]))
    superproc_numerical_list = [float(a) for a in superproc_list]
    superproc_list_sorted = [x for _,x in sorted(zip(superproc_numerical_list,superproc_list))]
    superproc_numerical_sorted = [float(a) for a in superproc_list_sorted]
    ctcf_list = list(set([stats_file[i].split('ctcf')[1].split('_dsb')[0] for i in range(len(stats_file))]))
    ctcf_numerical_list = [float(a) for a in ctcf_list]
    ctcf_list_sorted = [x for _,x in sorted(zip(ctcf_numerical_list,ctcf_list))]
    ctcf_numerical_sorted = [float(a) for a in ctcf_list_sorted]
    dsb_list = list(set([stats_file[i].split('dsb')[1].split('_superloading')[0] for i in range(len(stats_file))]))
    dsb_numerical_list = [float(a) for a in dsb_list]
    dsb_list_sorted = [x for _,x in sorted(zip(dsb_numerical_list,dsb_list))]
    dsb_numerical_sorted = [float(a) for a in dsb_list_sorted ]
    superloading_list = list(set([stats_file[i].split('superloading')[1].split('_bs')[0] for i in range(len(stats_file))]))
    superloading_numerical_list = [float(a) for a in superloading_list]
    superloading_list_sorted = [x for _,x in sorted(zip(superloading_numerical_list,superloading_list))]
    superloading_numerical_sorted = [float(a) for a in superloading_list_sorted]
    bs_list = list(set([stats_file[i].split('bs')[1].split('.csv')[0] for i in range(len(stats_file))]))
    bs_numerical_list = [float(a) for a in bs_list]
    bs_list_sorted = [x for _,x in sorted(zip(bs_numerical_list,bs_list))]
    bs_numerical_sorted = [float(a) for a in bs_list_sorted]

    if m == 0:
        ctcf_bs_fp = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_success =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_capture =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_time0 =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_LEFs_stabilized = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_TAD_with_stabilized_LEFs= np.zeros((len(folders),len(proc_list),len(ctcf_list)))
    
    count = 0 
    for i in range(len(proc_list)):
        for j in range(len(ctcf_list)):
            proc = proc_numerical_sorted[i]
            par_combo = 'sep' + sep_list[0] + '_proc'+ proc_list_sorted[i] +  '_superportion' + superportion_list_sorted[0] + '_superproc' + str(int(proc*20)) + '_ctcf' + ctcf_list_sorted[j] + '_dsb' + dsb_list_sorted[0]+ '_superloading' + superloading_list_sorted[0]+ '_bs' + bs_list_sorted[0]

            filename = 'Data/'+data_dir + '/stats_df_' + par_combo + '.csv'
            if path.exists(filename):

                count+=1

                stats_df = pd.read_csv(filename)
                stats_df = stats_df.drop(columns='Unnamed: 0')
                stats_df_threshold1 = stats_df.loc[stats_df['threshold']==1]
                sep_proc_1st_pass = stats_df_threshold1['first_passage_time'].to_numpy()
                if len(sep_proc_1st_pass)>0:
                    ctcf_bs_fp[m,i,j]=np.mean(sep_proc_1st_pass)*base_stochasticity/60*2
                else:
                    ctcf_bs_fp[m,i,j]=np.nan


                portion_df = pd.read_csv('Data/'+data_dir + '/restrained_df_' + par_combo + '.csv')
                portion_df = portion_df.drop(columns='Unnamed: 0')
                portion_df_threshold1 = portion_df.loc[portion_df['threshold']==1]
                ctcf_bs_time0[m,i,j] = portion_df_threshold1['restrained proportion time0'].to_numpy()[0] * 100
                ctcf_bs_success[m,i,j] = portion_df_threshold1['repaired proportion'].to_numpy()[0] * 100
                
                ctcf_bs_capture[m,i,j] = ctcf_bs_success[m,i,j]/ctcf_bs_time0[m,i,j]
                
                stabilized_df = pd.read_csv('Data/'+data_dir + '/stabilized_df_' + par_combo + '.csv')
                stabilized_df = stabilized_df.drop(columns='Unnamed: 0')
                percent_LEFs_stabilized[m,i,j] = stabilized_df ['percent LEF stabilized by BE'].to_numpy()[0] 
                percent_TAD_with_stabilized_LEFs[m,i,j] = stabilized_df ['percent TADs with stabilized LEF'].to_numpy()[0]

# Theory prediction from Mathematica Notebook BEstabilization_20211004.nb, Probconstrained
theory = np.asarray([[0.707582, 0.723565, 0.743412, 0.763302, 0.781692, 0.79811, 0.812548, \
0.825179, 0.836228, 0.845918, 0.854449, 0.861996, 0.868704, 0.874695, \
0.880073, 0.884923, 0.889314, 0.893309, 0.896955, 0.900297, 0.90337, \
0.906204, 0.908828, 0.911262, 0.913527, 0.915641, 0.917617, 0.919469, \
0.921208, 0.922845, 0.924389],[0.822697, 0.836857, 0.851558, 0.864668, 0.875844, 0.885264, 0.893215, \
0.899969, 0.905754, 0.910752, 0.915108, 0.918934, 0.922321, 0.92534, \
0.928048, 0.93049, 0.932706, 0.934725, 0.936573, 0.938272, 0.93984, \
0.941292, 0.942641, 0.943898, 0.945072, 0.946172, 0.947206, 0.948179, \
0.949096, 0.949963, 0.950785],[0.892005, 0.901986, 0.911057, 0.918585, 0.924746, 0.929821, 0.934052, \
0.937627, 0.940686, 0.943334, 0.94565, 0.947695, 0.949516, 0.951148, \
0.952623, 0.953961, 0.955184, 0.956306, 0.95734, 0.958297, 0.959185, \
0.960014, 0.960788, 0.961514, 0.962196, 0.962839, 0.963446, 0.964021, \
0.964566, 0.965084, 0.965577]])*100

# Theory prediction from Mathematica Notebook BEstabilization_20211004.nb, blist
theory2 = np.asarray([[0.166537, 0.209877, 0.241864, 0.266608, 0.286403, 0.302647, 0.316246, \
0.327814, 0.337787, 0.346481, 0.354133, 0.360923, 0.366992, 0.372452, \
0.37739, 0.38188, 0.385981, 0.389742, 0.393204, 0.396402, 0.399365, \
0.402119, 0.404685, 0.407083, 0.409327, 0.411433, 0.413413, 0.415279, \
0.417039, 0.418703, 0.420278],[0.241864, 0.286403, 0.316246, 0.337787, 0.354133, 0.366992, 0.37739, \
0.385981, 0.393204, 0.399365, 0.404685, 0.409327, 0.413413, 0.417039, \
0.420278, 0.42319, 0.425822, 0.428214, 0.430396, 0.432395, 0.434233, \
0.43593, 0.4375, 0.438958, 0.440315, 0.441582, 0.442767, 0.443877, \
0.44492, 0.445902, 0.446828],[0.316246, 0.354133, 0.37739, 0.393204, 0.404685, 0.413413, 0.420278, \
0.425822, 0.430396, 0.434233, 0.4375, 0.440315, 0.442767, 0.44492, \
0.446828, 0.44853, 0.450057, 0.451435, 0.452685, 0.453824, 0.454867, \
0.455824, 0.456706, 0.457522, 0.458279, 0.458983, 0.459639, 0.460252, \
0.460826, 0.461365, 0.461872]])*100

slist = np.asarray([1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6., 6.5, 7., 7.5, 8., \
8.5, 9., 9.5, 10., 10.5, 11., 11.5, 12., 12.5, 13., 13.5, 14., 14.5, \
15., 15.5, 16.])
    
colors = ['aqua','slateblue','violet','deeppink','mediumorchid']
matplotlib.rcParams.update({'font.size': 22})       
fig, axs = plt.subplots(2,1,figsize=(8, 12))
    
for i in range(len(proc_list_sorted)):
    axs[0].errorbar(ctcf_numerical_sorted, np.transpose(np.mean(ctcf_bs_time0,axis=0)[i,:]),yerr =stats.sem(ctcf_bs_time0,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)
for i in range(len(proc_list_sorted)):
    axs[0].plot(slist,theory[i],color= colors[i],linewidth=3,linestyle='dashed')
    
axs[0].set_ylim(0,100)    
axs[0].set_ylabel('% DSB constrained')
axs[0].set_xlabel('BE stabilization factor')
axs[0].set_xticks(ctcf_numerical_sorted[0:])
axs[0].tick_params(direction='out', length=8, width=2)
for axis in ['top','bottom','left','right']:
    axs[0].spines[axis].set_linewidth(1.5)
    
# Hide the right and top spines
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[0].yaxis.set_ticks_position('left')
axs[0].xaxis.set_ticks_position('bottom')
    
for i in range(len(proc_list_sorted)):
    axs[1].errorbar(ctcf_numerical_sorted, np.transpose(np.mean( percent_LEFs_stabilized,axis=0)[i,:]),yerr =stats.sem( percent_LEFs_stabilized,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)
for i in range(len(proc_list_sorted)):
    axs[1].plot(slist,theory2[i],color= colors[i],linewidth=3,linestyle='dashed')
    
axs[1].set_ylim(0,100)    
axs[1].set_ylabel('% LEFs stabilized by BE')
axs[1].set_xlabel('BE stabilization factor')
axs[1].set_xticks(ctcf_numerical_sorted[0:])
axs[1].tick_params(direction='out', length=8, width=2)
for axis in ['top','bottom','left','right']:
    axs[1].spines[axis].set_linewidth(1.5)
    
# Hide the right and top spines
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[1].yaxis.set_ticks_position('left')
axs[1].xaxis.set_ticks_position('bottom')
    

plt.savefig('Figures/'+'BE_stabilization_Pconstrained_percentLEFstabilized.pdf',format='pdf',bbox_extra_artists=(lgd,),bbox_inches='tight')
plt.show()

In [None]:
base_folder_string = 'output_0520_with residence_10 Mb apart_tiling600_2populationLEFs_GapMethod_PercentBEstabilized_run'
folders = [foldername for foldername in os.listdir('Data/') if foldername.startswith(base_folder_string)]

base_stochasticity = 1 # use to convert step to time

for m in range(len(folders)):
    data_dir = folders[m]
    print(data_dir)
    stats_file = [filename for filename in os.listdir('Data/'+data_dir) if filename.startswith("stats_df")]

    sep_list = list(set([stats_file[i].split('stats_df_sep')[1].split('_proc')[0] for i in range(len(stats_file))]))
    sep_numerical_list = [float(a) for a in sep_list]
    sep_list_sorted = [x for _,x in sorted(zip(sep_numerical_list,sep_list))]
    sep_numerical_sorted = [float(a) for a in sep_list_sorted]
    proc_list = list(set([stats_file[i].split('proc')[1].split('_superportion')[0] for i in range(len(stats_file))]))
    proc_numerical_list = [float(a) for a in proc_list]
    proc_list_sorted = [x for _,x in sorted(zip(proc_numerical_list,proc_list))]
    proc_numerical_sorted = [float(a) for a in proc_list_sorted]
    superportion_list = list(set([stats_file[i].split('superportion')[1].split('_superproc')[0] for i in range(len(stats_file))]))
    superportion_numerical_list = [float(a) for a in superportion_list]
    superportion_list_sorted = [x for _,x in sorted(zip(superportion_numerical_list,superportion_list))]
    superportion_numerical_sorted = [float(a) for a in superportion_list_sorted]
    superproc_list = list(set([stats_file[i].split('superproc')[1].split('_ctcf')[0] for i in range(len(stats_file))]))
    superproc_numerical_list = [float(a) for a in superproc_list]
    superproc_list_sorted = [x for _,x in sorted(zip(superproc_numerical_list,superproc_list))]
    superproc_numerical_sorted = [float(a) for a in superproc_list_sorted]
    ctcf_list = list(set([stats_file[i].split('ctcf')[1].split('_dsb')[0] for i in range(len(stats_file))]))
    ctcf_numerical_list = [float(a) for a in ctcf_list]
    ctcf_list_sorted = [x for _,x in sorted(zip(ctcf_numerical_list,ctcf_list))]
    ctcf_numerical_sorted = [float(a) for a in ctcf_list_sorted]
    dsb_list = list(set([stats_file[i].split('dsb')[1].split('_superloading')[0] for i in range(len(stats_file))]))
    dsb_numerical_list = [float(a) for a in dsb_list]
    dsb_list_sorted = [x for _,x in sorted(zip(dsb_numerical_list,dsb_list))]
    dsb_numerical_sorted = [float(a) for a in dsb_list_sorted ]
    superloading_list = list(set([stats_file[i].split('superloading')[1].split('_bs')[0] for i in range(len(stats_file))]))
    superloading_numerical_list = [float(a) for a in superloading_list]
    superloading_list_sorted = [x for _,x in sorted(zip(superloading_numerical_list,superloading_list))]
    superloading_numerical_sorted = [float(a) for a in superloading_list_sorted]
    bs_list = list(set([stats_file[i].split('bs')[1].split('.csv')[0] for i in range(len(stats_file))]))
    bs_numerical_list = [float(a) for a in bs_list]
    bs_list_sorted = [x for _,x in sorted(zip(bs_numerical_list,bs_list))]
    bs_numerical_sorted = [float(a) for a in bs_list_sorted]

    if m == 0:
        ctcf_bs_fp = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_success =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_capture =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_realtime =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        ctcf_bs_time0 =  np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_LEFs_stabilized = np.zeros((len(folders),len(proc_list),len(ctcf_list)))
        percent_TAD_with_stabilized_LEFs= np.zeros((len(folders),len(proc_list),len(ctcf_list)))
    
    count = 0 
    for i in range(len(proc_list)):
        for j in range(len(ctcf_list)):
            proc = proc_numerical_sorted[i]
            par_combo = 'sep' + sep_list[0] + '_proc'+ proc_list_sorted[i] +  '_superportion' + superportion_list_sorted[0] + '_superproc' + str(int(proc*20)) + '_ctcf' + ctcf_list_sorted[j] + '_dsb' + dsb_list_sorted[0]+ '_superloading' + superloading_list_sorted[0]+ '_bs' + bs_list_sorted[0]

            filename = 'Data/'+data_dir + '/stats_df_' + par_combo + '.csv'
            if path.exists(filename):

                count+=1

                stats_df = pd.read_csv(filename)
                stats_df = stats_df.drop(columns='Unnamed: 0')
                stats_df_threshold1 = stats_df.loc[stats_df['threshold']==1]
                sep_proc_1st_pass = stats_df_threshold1['first_passage_time'].to_numpy()
                if len(sep_proc_1st_pass)>0:
                    ctcf_bs_fp[m,i,j]=np.mean(sep_proc_1st_pass)*base_stochasticity/60*2
                else:
                    ctcf_bs_fp[m,i,j]=np.nan


                portion_df = pd.read_csv('Data/'+data_dir + '/restrained_df_' + par_combo + '.csv')
                portion_df = portion_df.drop(columns='Unnamed: 0')
                portion_df_threshold1 = portion_df.loc[portion_df['threshold']==1]
                ctcf_bs_time0[m,i,j] = portion_df_threshold1['restrained proportion time0'].to_numpy()[0] * 100
                ctcf_bs_realtime[m,i,j] = portion_df_threshold1['restrained proportion realtime'].to_numpy()[0] * 100
                ctcf_bs_success[m,i,j] = portion_df_threshold1['repaired proportion'].to_numpy()[0] * 100
                
                ctcf_bs_capture[m,i,j] = ctcf_bs_success[m,i,j]/ctcf_bs_time0[m,i,j]
                
                stabilized_df = pd.read_csv('Data/'+data_dir + '/stabilized_df_' + par_combo + '.csv')
                stabilized_df = stabilized_df.drop(columns='Unnamed: 0')
                percent_LEFs_stabilized[m,i,j] = stabilized_df ['percent LEF stabilized by BE'].to_numpy()[0] 
                percent_TAD_with_stabilized_LEFs[m,i,j] = stabilized_df ['percent TADs with stabilized LEF'].to_numpy()[0]

# Theory prediction from Mathematica Notebook BEstabilization_20211004.nb, Probconstrained
theory = np.asarray([[0.707582, 0.723565, 0.743412, 0.763302, 0.781692, 0.79811, 0.812548, \
0.825179, 0.836228, 0.845918, 0.854449, 0.861996, 0.868704, 0.874695, \
0.880073, 0.884923, 0.889314, 0.893309, 0.896955, 0.900297, 0.90337, \
0.906204, 0.908828, 0.911262, 0.913527, 0.915641, 0.917617, 0.919469, \
0.921208, 0.922845, 0.924389],[0.822697, 0.836857, 0.851558, 0.864668, 0.875844, 0.885264, 0.893215, \
0.899969, 0.905754, 0.910752, 0.915108, 0.918934, 0.922321, 0.92534, \
0.928048, 0.93049, 0.932706, 0.934725, 0.936573, 0.938272, 0.93984, \
0.941292, 0.942641, 0.943898, 0.945072, 0.946172, 0.947206, 0.948179, \
0.949096, 0.949963, 0.950785],[0.892005, 0.901986, 0.911057, 0.918585, 0.924746, 0.929821, 0.934052, \
0.937627, 0.940686, 0.943334, 0.94565, 0.947695, 0.949516, 0.951148, \
0.952623, 0.953961, 0.955184, 0.956306, 0.95734, 0.958297, 0.959185, \
0.960014, 0.960788, 0.961514, 0.962196, 0.962839, 0.963446, 0.964021, \
0.964566, 0.965084, 0.965577]])*100

# Theory prediction from Mathematica Notebook BEstabilization_20211004.nb, Pstabilizedlist
theory2 = np.asarray([[0.528593, 0.64, 0.714249, 0.76673, 0.805441, 0.83494, 0.858007, \
0.876427, 0.891395, 0.903739, 0.914051, 0.922759, 0.930185, 0.936573, \
0.94211, 0.946943, 0.951187, 0.954935, 0.958263, 0.961232, 0.963891, \
0.966284, 0.968444, 0.970402, 0.972181, 0.973804, 0.975287, 0.976648, \
0.977898, 0.97905, 0.980113],[0.714249, 0.805441, 0.858007, 0.891395, 0.914051, 0.930185, 0.94211, \
0.951187, 0.958263, 0.963891, 0.968444, 0.972181, 0.975287, 0.977898, \
0.980113, 0.98201, 0.983646, 0.985068, 0.986312, 0.987406, 0.988373, \
0.989233, 0.99, 0.990688, 0.991307, 0.991866, 0.992373, 0.992834, \
0.993254, 0.993639, 0.993991],[0.858007, 0.914051, 0.94211, 0.958263, 0.968444, 0.975287, 0.980113, \
0.983646, 0.986312, 0.988373, 0.99, 0.991307, 0.992373, 0.993254, \
0.993991, 0.994613, 0.995143, 0.995598, 0.995992, 0.996336, 0.996637, \
0.996902, 0.997137, 0.997347, 0.997534, 0.997702, 0.997853, 0.99799, \
0.998114, 0.998227, 0.998331]])*100

slist = np.asarray([1., 1.5, 2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6., 6.5, 7., 7.5, 8., \
8.5, 9., 9.5, 10., 10.5, 11., 11.5, 12., 12.5, 13., 13.5, 14., 14.5, \
15., 15.5, 16])
    
colors = ['aqua','slateblue','violet','deeppink','mediumorchid']
matplotlib.rcParams.update({'font.size': 22})       
fig, axs = plt.subplots(2,1,figsize=(8, 12))
    
for i in range(len(proc_list_sorted)):
    axs[0].errorbar(ctcf_numerical_sorted, np.transpose(np.mean(ctcf_bs_time0,axis=0)[i,:]),yerr =stats.sem(ctcf_bs_time0,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)
for i in range(len(proc_list_sorted)):
    axs[0].plot(slist,theory[i],color= colors[i],linewidth=3,linestyle='dashed')
    
axs[0].set_ylim(0,100)    
axs[0].set_ylabel('% DSB constrained')
axs[0].set_xlabel('BE stabilization factor')
axs[0].set_xticks(ctcf_numerical_sorted[0:])
axs[0].tick_params(direction='out', length=8, width=2)
for axis in ['top','bottom','left','right']:
    axs[0].spines[axis].set_linewidth(1.5)
    
# Hide the right and top spines
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[0].yaxis.set_ticks_position('left')
axs[0].xaxis.set_ticks_position('bottom')
    
for i in range(len(proc_list_sorted)):
    axs[1].errorbar(ctcf_numerical_sorted, np.transpose(np.mean(percent_TAD_with_stabilized_LEFs,axis=0)[i,:]),yerr =stats.sem(percent_TAD_with_stabilized_LEFs,axis=0)[i,:],fmt='s',markersize = 8,ecolor= colors[i],color= colors[i],capsize=10,linewidth=3)
for i in range(len(proc_list_sorted)):
    axs[1].plot(slist,theory2[i],color= colors[i],linewidth=3,linestyle='dashed')
    
axs[1].set_ylim(0,100)    
axs[1].set_ylabel('% TADs stabilized by BE')
axs[1].set_xlabel('BE stabilization factor')
axs[1].set_xticks(ctcf_numerical_sorted[0:])
axs[1].tick_params(direction='out', length=8, width=2)
for axis in ['top','bottom','left','right']:
    axs[1].spines[axis].set_linewidth(1.5)
    
# Hide the right and top spines
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)

# Only show ticks on the left and bottom spines
axs[1].yaxis.set_ticks_position('left')
axs[1].xaxis.set_ticks_position('bottom')
    

plt.savefig('Figures/'+'BE_stabilization_%TADstabilized.pdf',format='pdf',bbox_extra_artists=(lgd,),bbox_inches='tight')
plt.show()