In [None]:
import pandas as pd
import numpy as np
#import bootstrapped.bootstrap as bts
#import bootstrapped.stats_functions as bs_stats
import os
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pylab as pl
from os import path

# settings for making nice pdfs
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['font.family'] = "sans-serif"

In [None]:
#define helper functions
def round_sf(x,digits):
    if x != 0:
        x = float(np.format_float_positional(x, precision=digits, unique=False, fractional=False,trim='k'))
    return x

In [None]:
base_folder_string = 'output_0827_tiling600_ProcSepScan'
folders = [foldername for foldername in os.listdir('Data/') if foldername.startswith(base_folder_string)]

base_stochasticity = 1 # use to convert step to time

for m in range(len(folders)):
    data_dir = 'Data/'+ folders[m]
    print(data_dir)
    stats_file = [filename for filename in os.listdir(data_dir) if filename.startswith("stats_df")]

    sep_list = list(set([stats_file[i].split('stats_df_sep')[1].split('_proc')[0] for i in range(len(stats_file))]))
    sep_numerical_list = [float(a) for a in sep_list]
    sep_list_sorted = [x for _,x in sorted(zip(sep_numerical_list,sep_list))]
    proc_list = list(set([stats_file[i].split('_proc')[1].split('_superproc')[0] for i in range(len(stats_file))]))
    proc_numerical_list = [float(a) for a in proc_list]
    proc_list_sorted = [x for _,x in sorted(zip(proc_numerical_list,proc_list))]
    proc_numerical_list_sorted = [float(a) for a in proc_list_sorted]
    superproc_list = list(set([stats_file[i].split('superproc')[1].split('_ctcf')[0] for i in range(len(stats_file))]))
    superproc_numerical_list = [float(a) for a in superproc_list]
    superproc_list_sorted = [x for _,x in sorted(zip(superproc_numerical_list,superproc_list))]
    ctcf_list = list(set([stats_file[i].split('ctcf')[1].split('_dsb')[0] for i in range(len(stats_file))]))
    ctcf_numerical_list = [float(a) for a in ctcf_list]
    ctcf_list_sorted = [x for _,x in sorted(zip(ctcf_numerical_list,ctcf_list))]
    dsb_list = list(set([stats_file[i].split('dsb')[1].split('_supersep')[0] for i in range(len(stats_file))]))
    dsb_numerical_list = [float(a) for a in dsb_list]
    dsb_list_sorted = [x for _,x in sorted(zip(dsb_numerical_list,dsb_list))]
    supersep_list = list(set([stats_file[i].split('supersep')[1].split('_superloading')[0] for i in range(len(stats_file))]))
    supersep_numerical_list = [int(float(a)) for a in supersep_list]
    supersep_list_sorted = [x for _,x in sorted(zip(supersep_numerical_list,supersep_list))]
    superloading_list = list(set([stats_file[i].split('superloading')[1].split('_bs')[0] for i in range(len(stats_file))]))
    superloading_numerical_list = [float(a) for a in superloading_list]
    superloading_list_sorted = [x for _,x in sorted(zip(superloading_numerical_list,superloading_list))]
    bs_list = list(set([stats_file[i].split('bs')[1].split('.csv')[0] for i in range(len(stats_file))]))
    bs_numerical_list = [float(a) for a in bs_list]
    bs_list_sorted = [x for _,x in sorted(zip(bs_numerical_list,bs_list))]
    bs_numerical_sorted = [float(a) for a in bs_list_sorted]

    if m == 0:
        ctcf_bs_fp = np.zeros((len(folders),len(proc_list_sorted),len(sep_list_sorted)))
        ctcf_bs_success =  np.zeros((len(folders),len(proc_list_sorted),len(sep_list_sorted)))
        ctcf_bs_realtime =  np.zeros((len(folders),len(proc_list_sorted),len(sep_list_sorted)))
        ctcf_bs_time0 =   np.zeros((len(folders),len(proc_list_sorted),len(sep_list_sorted)))

    for i in range(len(proc_list_sorted)):
        for j in range(len(sep_list_sorted)):
            par_combo = 'sep' + sep_list_sorted[j] + '_proc' + proc_list_sorted[i] + '_superproc' + superproc_list_sorted[0] + '_ctcf' + ctcf_list_sorted[0] + '_dsb' + dsb_list_sorted[0] + '_supersep' + supersep_list_sorted[0]+ '_superloading' + superloading_list_sorted[0]+ '_bs' + bs_list_sorted[0]
            filename = data_dir + '/stats_df_' + par_combo + '.csv'
            if path.exists(filename):
                stats_df = pd.read_csv(filename)
                stats_df = stats_df.drop(columns='Unnamed: 0')
                stats_df_threshold1 = stats_df.loc[stats_df['threshold']==1]
                sep_proc_1st_pass = stats_df_threshold1['first_passage_time'].to_numpy()
                if len(sep_proc_1st_pass)>0:
                    ctcf_bs_fp[m,i,j]=np.mean(sep_proc_1st_pass)*base_stochasticity*2/60 
                else:
                    ctcf_bs_fp[m,i,j]=np.nan


                portion_df = pd.read_csv(data_dir + '/restrained_df_' + par_combo + '.csv')
                portion_df = portion_df.drop(columns='Unnamed: 0')
                portion_df_threshold1 = portion_df.loc[portion_df['threshold']==1]
                ctcf_bs_time0[m,i,j] = portion_df_threshold1['restrained proportion time0'].to_numpy()[0]
                ctcf_bs_realtime[m,i,j] = portion_df_threshold1['restrained proportion realtime'].to_numpy()[0]
                ctcf_bs_success[m,i,j] = portion_df_threshold1['repaired proportion'].to_numpy()[0] * 100

dim1, dim2, dim3 = ctcf_bs_success.shape
ctcf_bs_success_avg = np.mean(ctcf_bs_success,axis=0)
ctcf_bs_success_sem = stats.sem(ctcf_bs_success,axis=0)
annot_success = np.empty((dim2, dim3)).astype(str)
for i in range(dim2):
    for j in range(dim3):
        p = ctcf_bs_success_avg[i, j]
        c = ctcf_bs_success_sem[i, j]
        annot_success[i, j] = '%s\n(%s)' % (str(np.around(p,decimals=1)),str(np.around(c,decimals=1))) 
        
dim1, dim2, dim3 = ctcf_bs_fp.shape
ctcf_bs_fp_avg = np.nanmean(ctcf_bs_fp,axis=0)
ctcf_bs_fp_sem = stats.sem(ctcf_bs_fp,axis=0, nan_policy='omit')
annot_fp = np.empty((dim2, dim3)).astype(str)
for i in range(dim2):
    for j in range(dim3):
        p = ctcf_bs_fp_avg[i, j]
        c = ctcf_bs_fp_sem[i, j]
        annot_fp[i, j] = '%s\n(%s)' % (str(np.around(p,decimals=1)),str(np.around(c,decimals=1)))  

matplotlib.rcParams.update({'font.size': 22})       
fig, axs = plt.subplots(2,1,figsize=(9, 12))
sns.heatmap(pd.DataFrame(np.transpose(ctcf_bs_success_avg)),annot=np.transpose(annot_success),fmt='',xticklabels=proc_list_sorted,yticklabels=sep_list_sorted,cmap= 'Blues_r',cbar_kws={'label': '% successful repair'},ax=axs[0])
axs[0].set_ylabel('separation (kb)')
axs[0].set_xlabel('processivity (kb)')
axs[0].tick_params(direction='out', length=6, width=2)

sns.heatmap(pd.DataFrame(np.transpose(ctcf_bs_fp_avg)),annot=np.transpose(annot_fp),fmt='',xticklabels=proc_list_sorted,yticklabels=sep_list_sorted,cmap= 'Blues',cbar_kws={'label': 'first-passage time (min)'},ax=axs[1])
axs[1].set_ylabel('separation (kb)')
axs[1].set_xlabel('processivity (kb)')
axs[1].set_facecolor("0.8584083044982699")
axs[1].tick_params(direction='out', length=6, width=2)

fig.tight_layout(pad=1.5)
plt.savefig('Figures/'+'Pro_sep_scan.pdf',format='pdf')
plt.show()

