### Imports

In [None]:
# Package imports
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
# import libraries for biological data analysis
from coolpuppy import coolpup
from coolpuppy.lib import numutils
from coolpuppy.lib.puputils import divide_pups
from coolpuppy import plotpup
from matplotlib.colors import LogNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable
import cooler
import bioframe
import cooltools
from cooltools import expected_cis, expected_trans
from cooltools.lib import plotting
import cooler as clr
import h5py
# Import python package for working with cooler files and tools for analysis
import cooltools.lib.plotting
from cooltools import insulation
from itertools import product
from scipy.stats import pearsonr

### Read mcools

In [None]:
file_path = "" # path to mcools 
mcool_files = [""] # list of mcools for analysis

resolution = 1000
all_coolers = [read_cooler(os.path.join(file_path, mcool_file), resolution) for mcool_file in mcool_files]
windows = [resolution, 2*resolution, 5*resolution, 10*resolution, 25*resolution, 50*resolution]
insulation_tables = [insulation(a_cooler, windows, verbose=True, nproc=32) for a_cooler in all_coolers]

### Make into bedgraphs

In [None]:
names = [] # list of file names
resolution = 1000 # base resolution
make_bedgraphs(names, insulation_tables, resolution)

### Visualize insulation score tracks

In [None]:
plt.rcParams['font.size'] = 12
window_start = 5

# select region to visualize
start = 164_000_000 
end = 167_000_000
region = ('chr2', start, end)

norm = LogNorm(vmax=0.1, vmin=0.001)
f, axs = plt.subplots(3, 1, sharex=True, dpi=600, layout='compressed')# , figsize=(18, 6))
f.set_figwidth(12)

for i, ax in enumerate(axs[:-1]):
    data = all_coolers_low_res[i+4].matrix(balance=True).fetch(region)
    im = pcolormesh_45deg(ax, data, start=region[1], resolution=resolution, norm=norm, cmap='fall')
    ax.set_aspect(0.5)
    ax.set_ylim(0, 550000) # define off-diagional distance
    format_ticks(ax, rotate=False)
    ax.xaxis.set_visible(False)


f.colorbar(im, ax=axs)
ins_ax = axs[-1]

labels=[""] # set plot labels
ins_ax.set_prop_cycle(plt.cycler("color", plt.cm.plasma(np.linspace(0,1,5))))

for insulation_table, label in zip(insulation_tables, labels):
    insul_region = bioframe.select(insulation_table, region)
    ins_ax.plot(insul_region[['start', 'end']].mean(axis=1),
                insul_region['log2_insulation_score_'+str(windows[window_start])],
                label=label, alpha=0.8)

    res = windows[window_start]
    boundaries = insul_region[~np.isnan(insul_region[f'boundary_strength_{res}'])]
    weak_boundaries = boundaries[~boundaries[f'is_boundary_{res}']]
    strong_boundaries = boundaries[boundaries[f'is_boundary_{res}']]

    ins_ax.scatter(strong_boundaries[['start', 'end']].mean(axis=1),
                strong_boundaries[f'log2_insulation_score_{res}'], s=5, alpha=0.8)

ins_ax.legend(bbox_to_anchor=(0., -1), loc='lower left', ncol=4);

format_ticks(ins_ax, y=False, rotate=False)

ax.set_xlim(region[1], region[2])
plot_dir = "" # output directory for file
plt.savefig(plot_dir, format='svg')

### Plot CDFs of boundary strengths

In [None]:
histkwargs = dict(
    bins=10**np.linspace(-4,1,200),
    histtype='step',
    lw=2, 
    cumulative=True,
    density=True
)

f, axs = plt.subplots(len(windows[1:]),1, sharex=True, figsize=(12,12), constrained_layout=True, dpi=600)
for insulation_table in insulation_tables:
    for i, (w, ax) in enumerate(zip(windows[1:], axs)):
        ax.hist(
            insulation_table[f'boundary_strength_{w}'],
            **histkwargs
        )
        ax.text(0.02, 0.9,
                 f'Window {w//1000}kb',
                 ha='left',
                 va='top',
                 transform=ax.transAxes)

        ax.set(
            xscale='log',
            ylabel='# boundaries'
        )

axs[0].set_xlim([10e-5, 10])
labels = [] # list of labels for legend
axs[-1].legend(labels, bbox_to_anchor=(0., -0.5), loc='lower left', ncol = len(insulation_tables))
axs[-1].set(xlabel='Boundary strength');

sns.despine()
plot_dir = "" # output directory for plot
plt.savefig(plot_dir, format='svg')


### Plot correlation matrix

In [None]:
insulation_len = len(insulation_tables)
fig, ax = plt.subplots(insulation_len, insulation_len, figsize=(12,12), dpi=600,
                      sharex=True, sharey=True)

labels=[] # list of labels for correlation matrix
for i, j in product(range(insulation_len), range(insulation_len)):
    if i <= j:
        insulation_1, insulation_2 = insulation_tables[i], insulation_tables[j]
        insulation_score_1 = insulation_1['log2_insulation_score_'+str(windows[window_start])]
        insulation_score_2 = insulation_2['log2_insulation_score_'+str(windows[window_start])]
        nan_mask = np.logical_and(~np.isnan(insulation_score_1), ~np.isnan(insulation_score_2))
        insulation_score_1 = insulation_score_1[nan_mask].to_numpy()
        insulation_score_2 = insulation_score_2[nan_mask].to_numpy()
        
        #subsample
        selection = np.random.choice(len(insulation_score_1), 10000, replace=False)

        sns.kdeplot(x=insulation_score_1[selection], 
                    y=insulation_score_2[selection] + 1e-6 * np.random.standard_normal(selection.shape), 
                    ax=ax[i, j], thresh=0.02, fill=True, levels=8)
        corr_coefficient = pearsonr(insulation_score_1, 
                                    insulation_score_2).statistic
        ax[i, j].text(0.02, 0.9,
                 f'{corr_coefficient:.2f}',
                 ha='left',
                 va='top',
                 transform=ax[i, j].transAxes)
        if i == 0:
            ax[i, j].set_title(labels[j])
        
        if i == j:
            ax[i, j].set_ylabel(labels[i])
        
        ax[i, j].set_xlim([-1.5, 1.5])
        ax[i, j].set_ylim([-1.5, 1.5])
        ax[i, j].plot(np.linspace(-2, 2), np.linspace(-2, 2), 'k', linewidth=0.5)
    else:
        fig.delaxes(ax[i][j])
        
plot_dir = "" # output directory for plot
plt.savefig(plot_dir, format='svg')

# Functions

In [None]:
# Make bedgraphs
def make_bedgraphs(names, insulation_tables, resolution):
    """
    Takes output from cooltools insulation and saves it as a bedgraph. 
    """
    windows = [50*resolution]
    for name_insulation_table, w in product(zip(names, insulation_tables), windows):
        name, insulation_table = name_insulation_table

        # output bedgraph
        insulation_table[["chrom", "start", "end", f"log2_insulation_score_{w}"]].dropna().to_csv(os.path.join("insulations/", 
                                                                                                      ".".join([name[:-4],
                                                                                                                "res250",
                                                                                                               str(w),
                                                                                                              "bedgraph"])),
                                                                                                     header=False, sep="\t",
                                                                                        index=False)

        # output boundary locations
        insulation_table[insulation_table[f"is_boundary_{w}"]][["chrom", "start", "end", f"boundary_strength_{w}"]].dropna().to_csv(os.path.join("insulations/", 
                                                                                                  ".".join([name[:-4],
                                                                                                            "_boundaries",
                                                                                                           str(w),
                                                                                                          "bed"])),
                                                                                                 header=False, sep="\t", index=False)