In [None]:
import cooler
import cooltools
from coolpuppy import coolpup
from coolpuppy import plotpup
import pandas as pd
import bioframe
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
hg38_chromsizes = bioframe.fetch_chromsizes('hg38')
hg38_cens = bioframe.fetch_centromeres('hg38')
hg38_arms = bioframe.make_chromarms(hg38_chromsizes, hg38_cens)

In [None]:
region_df = pd.read_csv('/mnt/md0/clarice/src/region_idx.txt', sep='\t').rename(columns={'region_id':'name'})
region_df = bioframe.sort_bedframe(region_df, view_df=hg38_chromsizes)

In [None]:
all_rcmc_loops = pd.read_csv('../example_data/loop_examples/GM12878_rcmc_all_1kb.bedpe', sep='\t', names=['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'loop_id'])

In [None]:
# from capture_hic_comparisons.Rmd
rcmc_unique_loops = pd.read_csv('mifsud_et_al_2015/GM12878_rcmc_unique_loops_vs_mifsud.txt', sep='\t')
rcmc_unique_loops = rcmc_unique_loops[rcmc_unique_loops['overlap'] == 'RCMC_unique']

In [None]:
rcmc_unique_loop_coords = rcmc_unique_loops.join(all_rcmc_loops.set_index('loop_id'), on = 'loop_id', how = 'inner')

In [None]:
# from capture_hic_comparisons.Rmd
mifsud_unique_loops = pd.read_csv('mifsud_et_al_2015/mifsud_promoter_capture_HiC_unique_loops.txt', sep='\t')

In [None]:
# significant interactions downloaded from Mifsud et al. and converted to hg38 coordinates
mifsud_loop_coords = pd.read_csv('mifsud_et_al_2015/GM12878_signif_interactions.bedpe', sep='\t',
            names=['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'gene_ids', 'mifsud_loop_id'])

In [None]:
mifsud_unique_loop_coords_with_annotation = mifsud_unique_loops.join(mifsud_loop_coords.set_index('mifsud_loop_id'), on='mifsud_loop_id', how='inner')

In [None]:
pup_loops = pd.concat([mifsud_unique_loop_coords_with_annotation, rcmc_unique_loop_coords]) 

In [None]:
GM12878_clr = cooler.Cooler('/mnt/md0/clarice/realigned_rcmc_merged/GM12878_merged_realigned.50.mcool::resolutions/200')

In [None]:
expected_df = cooltools.expected_cis(
                    clr=GM12878_clr,
                    view_df=region_df,
                    smooth=True,
                    aggregate_smoothed=True,
                    smooth_sigma=0.1,
                    nproc=16
                )

In [None]:
pup = coolpup.pileup(GM12878_clr, pup_loops, features_format='bedpe', view_df=region_df, expected_df=expected_df, nproc=16, flank=20000, groupby=['overlap'])

In [None]:
fg = plotpup.plot(pup.reset_index(),
                  cols='overlap',
                  col_order=['RCMC_unique', 'RCMC_overlap', 'pcHiC_unique'],
                  score=True, cmap='coolwarm',
                  scale='log', sym=False, vmax=2.5,
                  height=5, despine=False, font_scale=2,
                  font='Helvetica')

plt.savefig('figures/mifsud_rcmc_loop_pileup.pdf')