In [None]:
%load_ext autoreload
%autoreload 2
# from saddle import saddleplot

%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
import pandas as pd
import numpy as np
from itertools import chain

# Hi-C utilities imports:
import cooler
import bioframe
import cooltools
from cooltools.lib.numutils import fill_diag

# Visualization imports:
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm, Normalize
from matplotlib import colors
import matplotlib.patches as patches
from matplotlib.ticker import EngFormatter

from itertools import cycle

# from ipywidgets import interact, fixed

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from itertools import product

In [None]:
# import jscatter
import scipy
import logging
import multiprocess as mp
# bbi for stackups ...
import bbi

# functions and assets specific to this repo/project ...
from data_catalog import bws, bws_vlim, telo_dict, mega_telo_dict, pubclr_dict
from helper_func import (
    get_stack,
    show_stacks,
    plot_stackups_lite,
    plot_stackups_sets,
    to_bigbed3,
    merge_nested,
)
# from mypileup_module import trans_pileup

from tqdm import tqdm
from tqdm.notebook import trange, tqdm
import warnings
import h5py



import matplotlib.lines as lines
from matplotlib.lines import Line2D
from matplotlib.patches import ConnectionPatch, Rectangle
from mpl_toolkits.axes_grid1 import Divider, Size
from mpl_toolkits.axes_grid1.inset_locator import BboxConnector
from matplotlib import cm
# from mpl_toolkits.axes_grid1.Size import Fixed


# enable editable text ...
mpl.rcParams["pdf.fonttype"]=42
mpl.rcParams["svg.fonttype"]="none"
mpl.rcParams['axes.linewidth'] = 0.5

In [None]:
# in a specific region, and exposing importnat plotting parameters
def rectangles_around_dots_ww(dots_bins_df, the_tile, loc="upper", lw=1, ec="cyan", fc="none", halo=30_000, ext_width=0):
    rectangle_kwargs = dict(lw=lw, ec=ec, fc=fc)
    # parse the tile
    _, tspan1, tspan2 = the_tile
    # select only visible "boxes" :
    _the_dots = dots_bins_df \
        .query("""(@tspan1[0] - @halo < bin1_id < @tspan1[1] + @halo) & \
                  (@tspan2[0] - @halo < bin2_id < @tspan2[1] + @halo) """) \
        .eval("""
                b1 = bin1_id - @tspan1[0] - @ext_width
                b2 = bin2_id - @tspan2[0] - @ext_width
                bin1_width = bin1_width + @ext_width
                bin2_width = bin2_width + @ext_width
            """)
    print(f"{len(_the_dots)} pixels are visible out of {len(dots_bins_df)} ...")
    for b1, b2, w1, w2 in _the_dots[["b1", "b2", "bin1_width", "bin2_width"]].itertuples(index=False):
        if loc == "upper":
            yield patches.Rectangle((b2, b1), w2+1, w1+1, **rectangle_kwargs)
        elif loc == "lower":
            yield patches.Rectangle((b1, b2), w1+1, w2+1, **rectangle_kwargs)
        else:
            raise ValueError("loc has to be uppper or lower")

# Pre-define relevant coolers just in case ...

In [None]:
# cooler files that we'll work on :
binsize10 = 10_000
telo_clrs10 = { _k: cooler.Cooler(f"{_path}::/resolutions/{binsize10}") for _k, _path in telo_dict.items() }

# cooler files that we'll work on :
binsize25 = 25_000
telo_clrs25 = { _k: cooler.Cooler(f"{_path}::/resolutions/{binsize25}") for _k, _path in telo_dict.items() }

# Chrom arms as a view

In [None]:
# Use bioframe to fetch the genomic features from the UCSC.
hg38_chromsizes = bioframe.fetch_chromsizes('hg38')
hg38_cens = bioframe.fetch_centromeres('hg38')
hg38_arms_full = bioframe.make_chromarms(hg38_chromsizes, hg38_cens)
# # remove "bad" chromosomes and near-empty arms ...
# excluded_arms = ["chr13_p", "chr14_p", "chr15_p", "chr21_p", "chr22_p", "chrM_p", "chrY_p", "chrY_q", "chrX_p", "chrX_q"]
# hg38_arms = hg38_arms_full[~hg38_arms_full["name"].isin(excluded_arms)].reset_index(drop=True)

# can do 1 chromosome (or arm) as well ..
included_arms = hg38_arms_full["name"].to_list()[:44] # all autosomal ones ...
hg38_arms = hg38_arms_full[hg38_arms_full["name"].isin(included_arms)].reset_index(drop=True)

# There is a problem with our arms view of the chromosomes ...

the way we do it now - end of p-arm is alsways equal to the start of q-arm ...

After binning this could lead to the situation where last bin of p-arm is upstream of the first q-arm bin ...

This makes `cooltools.api.is_valid_expected` crash ...

Let's try solving that by adding 1 bp to the start of every q-arm ...

In [None]:
def adjust_arm_view(
    view_df,
    binsize,
):
    """
    adjust arm-based view of the genome to fix slightly overlapping p and q arms ...
    """
    _iter_view = view_df.itertuples(index=False)
    return pd.DataFrame(
        [(c,s+binsize,e,n) if ("q" in n) else (c,s,e,n) for c,s,e,n in _iter_view],
        columns=hg38_arms.columns
    )


### Read pre-called native compartments
## ... and Pick one list of anchors and annotate it with epigenetic marks ...

In [None]:
id_anchor_fnames = {
    "mega_2X_enrichment": "ID_anchors/mega_2X_enrichment.fourth_mega.max_size.bed",
    "5hr_2X_enrichment_old": "ID_anchors/5hr_2X_enrichment.second_bulk.max_size.bed",
    "5hr_2X_enrichment": "ID_anchors/5hr_2X_enrichment.pixel_derived.bed",
    "5hr_2X_enrichment_nosing": "ID_anchors/5hr_2X_enrichment.pixel_derived.no_singletons.bed",
    "5hr_notinCyto_2X_enrichment_signal": "ID_anchors/p5notin_pCyto_anchors_2X_enrichment.pixel_derived.signal_peaks.bed",
    "5hr_2X_enrichment_signal": "ID_anchors/5hr_2X_enrichment.pixel_derived.signal_peaks.bed",
    "10hr_2X_enrichment_signal": "ID_anchors/10hrs_2X_enrichment.pixel_derived.signal_peaks.bed",
    "N93p5_2X_enrichment_signal": "ID_anchors/N93p5_2X_enrichment.pixel_derived.signal_peaks.bed",
    "pCyto_2X_enrichment_signal": "ID_anchors/pCyto_2X_enrichment.pixel_derived.signal_peaks.bed",
    "mCyto_2X_enrichment_signal": "ID_anchors/mCyto_2X_enrichment.pixel_derived.signal_peaks.bed",
    "mega_3X_enrichment": "ID_anchors/mega_3X_enrichment.fifth_mega3x.max_size.bed",
    "MEGA_2X_enrichment": "ID_anchors/MEGAp5_2X_enrichment.pixel_derived.signal_peaks.bed",
    "MEGA_weaker_2X_enrichment": "ID_anchors/MEGA_plus_weak_anchors_2X_enrichment.pixel_derived.signal_peaks.bed",
    "MEGAN93_2X_enrichment": "ID_anchors/MEGAN93p5_2X_enrichment.pixel_derived.signal_peaks.bed",
    "MEGAminus_2X_enrichment": "ID_anchors/MEGA_minus_ctrl_2X_enrichment.pixel_derived.signal_peaks.bed",
    "cyto_2x_enrichment": "ID_anchors/cyto_2x_enrichment.third_mCyto.max_size.bed",
}

id_anchors_dict = {}
for id_name, fname in id_anchor_fnames.items():
    id_anchors_dict[id_name] = pd.read_csv(fname, sep="\t")
    # ...
    print(f"loaded {len(id_anchors_dict[id_name]):5d} ID anchors {id_name:>20} in BED format ...")

## Pick one list of anchors and annotate it with epigenetic marks ...

In [None]:
_anchors = id_anchors_dict["5hr_2X_enrichment_signal"]
_anchors = _anchors.drop(columns=["size.1","valency","start","end"])
bw_kyes_to_use = [
    # "mG.atac",
    # "H3K27ac",
    # "ctcf",
    "dots",
]

# summit annotation first ...
# bws["dots"] = "mega_dots_anchors.bb"
bws["dots"] = "mega_final_dots_anchors.bb"

print("\nannotating footprints ...\n")
# additional anchor characterization - using footprint
for k, bw in bws.items():
    if k in ["dots",]:
        # left anchor annotation ...
        print(f"working on peak {k} ...")
        _anchors[f"peak_{k}"] = bbi.stackup(
                bw,
                _anchors["chrom"],
                _anchors["peak_start"] - 2_000,
                _anchors["peak_end"] + 2_000,
                bins=1
            ).flatten()


In [None]:
plt.hist(
    [
        _anchors.query("peak_dots == 0")["size"],
        _anchors.query("peak_dots > 0")["size"],
    ],
    bins=np.linspace(20_000,250_000, 50),
    stacked=True,
    label=["peak_dots == 0","peak_dots > 0"]
    # color = ['r','g']
)
plt.legend()
plt.gca().set_xlabel("ID anchor footprint")
plt.gca().set_title("ID set: 5hr_2X_enrichment_signal")

# We need to do more annotation for the `_anchors` to demonstrate pruning ...

## Load dots and anchors to perform the ID-ID as a function of dot-overlap analysis ...

In [None]:
# ! ls dots*
# ! wc -l dots_10kb_samples/m5hR1R2_10kb_wheader.bedpe
# ! wc -l dots_10kb_MEGA_samples/mG1s_MEGA_10kb_wheader.bedpe
# ! wc -l dots_10kb/RGmR1R2_10kb_wheader.bedpe

# ! wc -l dots_10kb/RGmR2_10kb_wheader.bedpe
# ! wc -l dots_10kb/RGmR1_10kb_wheader.bedpe

In [None]:
# ############################################# anchors ...
# anchor_fnames = {
#     "mega_ctrl": "dot_anchors_10kb_MEGA/mG1s_MEGA.bed",
# }
# # ...
# dot_anchors_dict = {}
# for id_name, fname in anchor_fnames.items():
#     dot_anchors_dict[id_name] = pd.read_csv(fname, sep="\t")
#     # ...
#     print(f"loaded {len(dot_anchors_dict[id_name]):5d} ID anchors {id_name:>20} in BED format ...")
# # ...
############################################# dots themselves ...
dot_fnames = {
    # "mega_ctrl": "dots_10kb_MEGA_samples/mG1s_MEGA_10kb_wheader.bedpe",
    # "mega_depl": "dots_10kb_MEGA_samples/pG1s_MEGA_10kb_wheader.bedpe",
    # "mega_mito": "dots_10kb_MEGA_samples/Ms_MEGA_10kb_wheader.bedpe",
    "mega_ctrl": "dots_10kb_MEGA_final/mG1s_MEGA_10kb_wheader_convergent.bedpe",
    "mega_depl": "dots_10kb_MEGA_filtered_samples/pG1s_MEGA_10kb_wheader.bedpe",
    "mega_mito": "dots_10kb_MEGA_filtered_samples/Ms_MEGA_10kb_wheader.bedpe",
    "cyto": "dots_10kb_samples/mCyto_10kb_wheader.bedpe",
    "m5": "dots_10kb_samples/m5hR1R2_10kb_wheader.bedpe",
}
# ...
# let's load them all into a dictionary ...
dots_dict = {}
for id_name, fname in dot_fnames.items():
    dots_dict[id_name] = pd.read_csv(fname, sep="\t")
    # ...
    print(f"loaded {len(dots_dict[id_name]):5d} dots {id_name:>20} in BEDPE format ...")

_select_dots = "mega_ctrl"
# _select_dots = "m5"

# # pick specific anchors and dots ...
# _the_anchors = dot_anchors_dict["mega_ctrl"]
_the_dots = bioframe.sort_bedframe(
    dots_dict[_select_dots],
    view_df=hg38_arms_full,
    cols=("chrom1","start1","end1")
)
display(_the_dots)

# Load domains
merge nested and "almost" nested regions (regions overlapping a lot !)

In [None]:
_domains = pd.read_csv(
    "extrusion_domains/mG1s_MEGA_10kb_double_filtered.bedpe",
    sep="\t",
)
display(_domains.head())

In [None]:
len(_domains)

### assign domains to anchors ...

In [None]:
_anchor_cols = ('chrom', 'peak_start', 'peak_end')

_anchors_domained = bioframe.overlap(
    _anchors.eval("""
        peak_start = peak_start - 2_000
        peak_end = peak_end + 2_000
    """),
    _domains,
    return_input=False,
    return_index=True,
    return_overlap=True,
    suffixes=("","_dom"),
    keep_order=True,
    cols1=_anchor_cols,
).fillna(
    {
        "index_dom": -1,
        "overlap_peak_start": 0,
        "overlap_peak_end": 0
    }
)

# overlap statistics ..
print(f"total number of ID anchors {len(_anchors)}")
print(
    "Number of ID anchors that overlap >1 domain at once :",
    (_anchors_domained.groupby("index").size() > 1).sum()
)
print(
    "Number of ID anchors that do not overlap any domains: ",
    (_anchors_domained["index_dom"]<0).sum()
)

# # make overlaps unique (THIS IS WRONG)...
# _uniq_idx = _anchors_domained.eval("_over = overlap_peak_end - overlap_peak_start").groupby("index")["_over"].idxmax()
# _anchors_domained = _anchors_domained.loc[_uniq_idx].reset_index(drop=True)

# keeping info about all overlaps ...
print("assigning domain indices back to the table of anchors ...")
_anchors["index_dom"] = _anchors_domained.groupby("index")["index_dom"].unique()
print(_anchors)


# Try to assign dots as well ...

In [None]:

## Characterize ID-anchors by their "domainnes" and "dottedness" ...
_1 = _the_dots[["chrom2","start2","end2"]].rename(columns={"chrom2":"chrom","start2":"start","end2":"end"}).reset_index()
_2 = _the_dots[["chrom1","start1","end1"]].rename(columns={"chrom1":"chrom","start1":"start","end1":"end"}).reset_index()

_dot_anchors_redundant = bioframe.sort_bedframe(
    pd.concat([_1, _2]).rename(columns={"index":"index_dot"}),
    view_df=hg38_arms_full,
)

# dot overlaps anchors ...
_dot_assigned = bioframe.overlap(
    _anchors.eval("""
    peak_start = peak_start - 2_000
    peak_end = peak_end + 2_000
    """),
    _dot_anchors_redundant,
    return_input=True,
    return_index=False,
    return_overlap=False,
    suffixes=("","_"),
    keep_order=True,
    cols1=("chrom", "peak_start", "peak_end"),
    cols2=("chrom", "start", "end"),
)
_dot_assigned["index_dot_"] = _dot_assigned["index_dot_"].fillna(-1).astype(int)
_dot_assigned = _dot_assigned.groupby("cluster")["index_dot_"].unique()

_anchors.loc[_dot_assigned.index, "index_dot"] = _dot_assigned
_anchors
# # _anchor

## Now let's load HDF5 file with all of the pileups and anchor indices for the all-by-all dataframes ...

In [None]:

# fr.items()
def print_attrs(name, obj):
    # Create indent
    shift = name.count('/') * '    '
    item_name = name.split("/")[-1]
    print(shift + item_name)
    try:
        for key, val in obj.attrs.items():
            print(shift + '    ' + f"{key}: {val}")
    except:
        pass


with h5py.File("/data/sergpolly/tmp/Pileups_ID_by_distance.hdf5", 'r') as fr:
    fr.visititems(print_attrs)

    # check general metadata ...
    _pileup_meta = dict(fr.attrs)
    for k,v in _pileup_meta.items():
        print(f"{k}: {v}")

    print("...")
    print("restoring cis all-by-all table ...")
    # extract indices to recreate all-by-all in cis:
    cis_left = fr.get("cis/indices").get("anchor1")[()]
    cis_right = fr.get("cis/indices").get("anchor2")[()]
    # assuming index and cluster - are the same ...
    _df_intra_arm = pd.concat(
        [
            _anchors.iloc[cis_left].add_suffix("1").reset_index(drop=True),
            _anchors.iloc[cis_right].add_suffix("2").reset_index(drop=True)
        ],
        axis=1
     )
    _df_intra_arm = _df_intra_arm.reset_index(drop=True)
    _df_intra_arm["dist"] = _df_intra_arm.eval(".5*(peak_start2+peak_end2) - .5*(peak_start1+peak_end1)")

    print("restoring trans all-by-all table ...")
    # extract indices to recreate all-by-all in trans:
    trans_left = fr.get("trans/indices").get("anchor1")[()]
    trans_right = fr.get("trans/indices").get("anchor2")[()]
    # assuming index and cluster - are the same ...
    tr_feat = pd.concat(
        [
            _anchors.iloc[trans_left].add_suffix("1").reset_index(drop=True),
            _anchors.iloc[trans_right].add_suffix("2").reset_index(drop=True)
        ],
        axis=1
     )
    tr_feat = tr_feat.reset_index(drop=True)



    print("extracting cis pileups as is...")
    # sort out the results per sample ...
    fullstacks_cis = {}
    cis_pileups_grp = fr.get("cis/pileups")
    for _sample in cis_pileups_grp.keys():
        fullstacks_cis[_sample] = cis_pileups_grp.get(_sample)[()]


    print("extracting trans pileups and calculating means ...")
    # create indexes for pileup groups
    _dotless_idx = tr_feat.query("(peak_dots1==0)&(peak_dots2==0)").index
    _dotted_idx = tr_feat.query("(peak_dots1>0)&(peak_dots2>0)").index
    len(tr_feat), len(_dotless_idx), len(_dotted_idx)

    # now average those sub-pileups :
    stack_means = {}
    trans_pileups_grp = fr.get("trans/pileups")
    for _sample in trans_pileups_grp.keys():
        print(f"    processing trans pileup {_sample} ...")
        #
        _stack = trans_pileups_grp.get(_sample)[()]
        stack_means[_sample] = [
            np.nanmean(_stack[_dotless_idx], axis=0),
            np.nanmean(_stack[_dotted_idx], axis=0),
            np.nanmean(_stack, axis=0),
        ]


In [None]:
# _xxx = bioframe.overlap(
#     _df_intra_arm,
#     _domains,
#     return_input=True,
#     return_index=True,
#     return_overlap=True,
#     suffixes=('', '_dom'),
#     keep_order=True,
#     cols1=("chrom1","peak_start1","peak_end2"),
#     # cols2=None,
#     # on=None,
#     # ensure_int=True,
# )

# _yyy = _xxx.eval("""
#     over = overlap_peak_end2 - overlap_peak_start1
#     dom_size = end_dom - start_dom
#     id_size = peak_end2 - peak_start1
# """)[[
#     "chrom1",
#     "peak_start1",
#     # "peak_end1",
#     # "peak_start2",
#     "peak_end2",
#     "start_dom",
#     "end_dom",
#     # "overlap_peak_start1",
#     # "overlap_peak_end2",
#     "index",
#     "index_dom",
#     "dom_size",
#     "id_size",
#     "over",
# ]] \
#     .astype({"over":float, "dom_size":float, "id_size":float}) \
#     .eval("""
#     frac_dom = over / dom_size
#     frac_id = over / id_size
# """)

# _dom_overs = _yyy.query("frac > 0.6").groupby("index").size()
# _df_intra_arm.loc[_dom_overs.index, "dom_over"] = _dom_overs


# _idid_domained = _df_intra_arm.eval(
#     """
#     _inside_domain1 = ( index_dom1 >= 0 )
#     _inside_domain2 = ( index_dom2 >= 0 )
#     _same_domain = ((index_dom1 == index_dom2))
#     _dotted1 = peak_dots1 > 0
#     _dotted2 = peak_dots2 > 0
#     """
# )


# _idid_domained["dot_status"] = _idid_domained.apply(get_dotted_status, axis=1)
# _idid_domained["dom_status"] = _idid_domained.apply(get_domain_status, axis=1)
# _idid_domained["dom_status"].unique()
# _idid_domained.query("dom_over.isnull()")["dom_status"].value_counts()

In [None]:
_df_intra_arm

In [None]:
# _df_intra_arm[["index_dot1","index_dot2"]]
def get_dot_order(_row):
    _x1, _x2 = _row["index_dot1"], _row["index_dot2"]
    if (-1 in _x1) or (-1 in _x2):
        return -1
    else:
        return min( abs(dx[1] - dx[0]) for dx in product(_x1, _x2) )

_df_intra_arm["dot_rank"] = _df_intra_arm.apply(get_dot_order, axis=1)
_dot_match_rank = _df_intra_arm["dot_rank"]

_dot_match_rank[_dot_match_rank > -1].value_counts().plot(marker="o")
plt.gca().set_ylabel("number of ID-ID with a given dot_order (min)")


In [None]:
# Do the same thing with domain ranking ...

# _df_intra_arm[["index_dot1","index_dot2"]]
def get_dom_order(_row):
    _x1, _x2 = _row["index_dom1"], _row["index_dom2"]
    if (-1 in _x1) or (-1 in _x2):
        return -1
    else:
        return min( abs(dx[1] - dx[0]) for dx in product(_x1, _x2) )

_df_intra_arm["dom_rank"] = _df_intra_arm.apply(get_dom_order, axis=1)
_dom_match_rank = _df_intra_arm["dom_rank"]

_dom_match_rank[_dom_match_rank > -1].value_counts().plot(marker="o")
plt.gca().set_ylabel("number of ID-ID with a given dom_order (min)")


In [None]:
def get_dotted_status(_row):
    _exact = _row["dot_rank"]
    # if (0 <= _exact < 2):
    if _exact == 0:
        return "dot-exact"
    else:
        x = (_row["_dotted1"], _row["_dotted2"])
        if x == (True, True):
            return "dot-anchor-2X"
        elif x == (False, True):
            return "dot-anchor-1X"
        elif x == (True, False):
            return "dot-anchor-1X"
        elif x == (False, False):
            return "dot-anchor-0X"
        else:
            return "blah"

def get_domain_status(_row):
    # x = (_row["_inside_domain1"], _row["_inside_domain2"])
    y = _row["dom_rank"]
    if y < 0:
        return "inter-domain<2X"
    elif y == 0:
        return "intra-domain"
    elif y > 0:
        return "inter-domain-2X"
    else:
        return "blah"

# lambda r: {"dotted": get_dotted_status(r), "domain": get_domain_status(r)}



# # _idid_domained =
# _grp = _df_intra_arm.eval(
#     """
#     _inside_domain1 = ( index_dom1 >= 0 )
#     _inside_domain2 = ( index_dom2 >= 0 )
#     _same_domain = ((index_dom1 == index_dom2))
#     _dotted1 = peak_dots1 > 0
#     _dotted2 = peak_dots2 > 0
#     """
# ).apply(
#     lambda r: pd.Series({"dot_status": get_dotted_status(r), "dom_status": get_domain_status(r)}),
#     axis=1
# ).groupby(["dom_status", "dot_status"])
_grp = _df_intra_arm.eval(
    """
    _dotted1 = peak_dots1 > 0
    _dotted2 = peak_dots2 > 0
    """
).apply(
    lambda r: pd.Series({"dot_status": get_dotted_status(r), "dom_status": get_domain_status(r)}),
    axis=1
).groupby(["dom_status", "dot_status"])

_s = _grp.size()
print(_s)
_s[_s>20].plot(kind="barh", figsize=(8,6))
plt.tight_layout()
# .groupby(["_inside_domain1","_inside_domain2","_same_domain"]).size()


In [None]:
# _idid_domained = _df_intra_arm.eval(
#     """
#     _inside_domain1 = ( index_dom1 >= 0 )
#     _inside_domain2 = ( index_dom2 >= 0 )
#     _same_domain = ((index_dom1 == index_dom2))
#     _dotted1 = peak_dots1 > 0
#     _dotted2 = peak_dots2 > 0
#     """
# )
# _idid_domained["dot_status"] = _idid_domained.apply(get_dotted_status, axis=1)
# _idid_domained["dom_status"] = _idid_domained.apply(get_domain_status, axis=1)

In [None]:
# _select_df = _df_intra_arm.loc[_grp.get_group(("inter-domain-2X", "dot-exact")).index].query("dist < 311_000_000")

# display(
#     _select_df[
#         ['chrom1', 'peak_start1', 'peak_end2', 'dot_rank', 'index_dom1', 'index_dom2','index_dot1', 'index_dot2',"dist"]
#     ]
# )
# #.iloc[98:]
# # display(_select_df)
# _select_single_domain = _select_df[['chrom1', 'peak_start1', 'peak_end2']].iloc[0]
# _chrom, _start, _end, *_ = _select_single_domain

In [None]:
# _select_df = _df_intra_arm.loc[_grp.get_group(("inter-domain<2X", "dot-anchor-2X")).index].query("dist < 4_000_000")

# display(
#     _select_df[
#         ['chrom1', 'peak_start1', 'peak_end2', 'dot_rank', 'index_dom1', 'index_dom2','index_dot1', 'index_dot2',"dist"]
#     ]
# )
# #.iloc[98:]
# # display(_select_df)
# _select_single_domain = _select_df[['chrom1', 'peak_start1', 'peak_end2']].iloc[0]
# _chrom, _start, _end, *_ = _select_single_domain

In [None]:
# _select_single_domain = _df_intra_arm.loc[
#     _grp.get_group(("inter-domain<2X", "dot-exact")).index
# ][
#     ['chrom1', 'peak_start1', 'peak_end2', 'dot_rank', 'index_dom1', 'index_dom2',]
# ].iloc[100]
# print(_select_single_domain)
# _chrom, _start, _end, *_ = _select_single_domain

# Explore some of the edge cases - visual checking ...

In [None]:
_sample = "m5hR1R2"
clr = telo_clrs10[_sample]
# Calculate domain defining dots ...

_region1 = ('chr7', 27_275_000-2_750_000, 27_275_000+2_750_000)
# _region1 = ('chr7', 38_000_000, 38_000_000+12_500_000)
# # _region2 = ('chr7', 38_000_000, 38_000_000+12_500_000)
# # _region1 = ('chr7', 20_500_000, 20_500_000+12_500_000)
# # _region2 = ('chr7', 20_500_000, 20_500_000+12_500_000)
# _region1 = (_chrom, _start-1_500_000, _end+1_500_000)
# _region1 = ('chr1', 28_500_000-3_500_000, 28_910_000+3_500_000)
_region2 = _region1

# domains within selected region - turn it back to bedpe ...
_domains_region = \
_domains.eval("""
    chrom1 = chrom
    chrom2 = chrom
    start1 = start
    end1 = start + 10_000
    start2 = end - 10_000
    end2 = end
""")[['chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'n_intervals']]

# select domains in the region ...
_domains_region = bioframe.select(
    bioframe.select(_domains_region, _region1, cols=("chrom1","start1","end1")),
    _region2, cols=("chrom2","start2","end2"),
).reset_index(drop=True)
_domains_region["bin1_id"] = _domains_region[["chrom1","start1","end1"]].apply(clr.offset,axis=1,result_type="expand")
_domains_region["bin1_width"] = _domains_region[["chrom1","start1","end1"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]
_domains_region["bin2_id"] = _domains_region[["chrom2","start2","end2"]].apply(clr.offset,axis=1,result_type="expand")
_domains_region["bin2_width"] = _domains_region[["chrom2","start2","end2"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]

# select dots in the region ...
_the_dots_region = bioframe.select(
    bioframe.select(_the_dots, _region1, cols=("chrom1","start1","end1")),
    _region2, cols=("chrom2","start2","end2"),
).reset_index(drop=True)
_the_dots_region["bin1_id"] = _the_dots_region[["chrom1","start1","end1"]].apply(clr.offset,axis=1,result_type="expand")
_the_dots_region["bin1_width"] = _the_dots_region[["chrom1","start1","end1"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]
_the_dots_region["bin2_id"] = _the_dots_region[["chrom2","start2","end2"]].apply(clr.offset,axis=1,result_type="expand")
_the_dots_region["bin2_width"] = _the_dots_region[["chrom2","start2","end2"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]

# select all-by-all ID domains in the region ...
_bedpe_region = bioframe.select(
    bioframe.select(_df_intra_arm, _region1, cols=("chrom1","peak_start1","peak_end1")),
    _region2, cols=("chrom2","peak_start2","peak_end2"),
).reset_index(drop=True)
_bedpe_region["bin1_id"] = _bedpe_region[["chrom1","peak_start1","peak_end1"]].apply(clr.offset,axis=1,result_type="expand")
_bedpe_region["bin1_width"] = _bedpe_region[["chrom1","peak_start1","peak_end1"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]
_bedpe_region["bin2_id"] = _bedpe_region[["chrom2","peak_start2","peak_end2"]].apply(clr.offset,axis=1,result_type="expand")
_bedpe_region["bin2_width"] = _bedpe_region[["chrom2","peak_start2","peak_end2"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]

# # select ID domains to be highlighted in the region ...
# _select_df_region = bioframe.select(
#     bioframe.select(_select_df, _region1, cols=("chrom1","peak_start1","peak_end1")),
#     _region2, cols=("chrom2","peak_start2","peak_end2"),
# ).reset_index(drop=True)
# _select_df_region["bin1_id"] = _select_df_region[["chrom1","peak_start1","peak_end1"]].apply(clr.offset,axis=1,result_type="expand")
# _select_df_region["bin1_width"] = _select_df_region[["chrom1","peak_start1","peak_end1"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]
# _select_df_region["bin2_id"] = _select_df_region[["chrom2","peak_start2","peak_end2"]].apply(clr.offset,axis=1,result_type="expand")
# _select_df_region["bin2_width"] = _select_df_region[["chrom2","peak_start2","peak_end2"]].apply(clr.extent,axis=1,result_type="expand").apply(np.diff,axis=1,result_type="expand")[0]


In [None]:
_the_dots_region.iloc[2]

In [None]:
region1_name = bioframe.select(hg38_arms, _region1).iat[0,-1]
region2_name = bioframe.select(hg38_arms, _region2).iat[0,-1]
assert region1_name == region2_name
region_name = region2_name

tile_span_i = clr.extent(_region1)
tile_span_j = clr.extent(_region2)
_the_tile = (region_name, tile_span_i, tile_span_j )
_reg1w = np.diff(tile_span_i).item()
_reg2w = np.diff(tile_span_j).item()

# observed matrix slice ...
_mat = scipy.ndimage.gaussian_filter(
    clr.matrix()[slice(*tile_span_i), slice(*tile_span_j)],
    sigma=0.4,
    order=0,
    mode='reflect',
    cval=0.0,
    # radius=3,
    truncate=1.0,
)
imshow_kwargs = dict(
        norm=LogNorm(vmin=0.0001, vmax=0.01),
        cmap="YlOrBr",
        interpolation="nearest",
        # interpolation="none",
)

fig, ax = plt.subplots(1, 1, figsize=(8,8) )
ax.imshow(_mat, **imshow_kwargs)
ax.set_xlim(0, _reg2w)
ax.set_ylim(_reg1w, 0)
ax.set_xticks([])
ax.set_yticks([])

# draw boxes around clustered pixels ...
_big_boxes_kwargs = dict(loc="upper", lw=1.5, ec="k", fc="none", halo=0, ext_width=0)
for box in rectangles_around_dots_ww( _bedpe_region, _the_tile, **_big_boxes_kwargs ):
    ax.add_patch(box)
# draw boxes around clustered pixels ...
_big_boxes_kwargs = dict(loc="upper", lw=1.5, ec="blue", fc="none", halo=0, ext_width=0)
for box in rectangles_around_dots_ww( _the_dots_region, _the_tile, **_big_boxes_kwargs ):
    ax.add_patch(box)
# draw boxes around clustered pixels ...
_big_boxes_kwargs = dict(loc="upper", lw=1.5, ec="red", fc="none", halo=0, ext_width=0)
for box in rectangles_around_dots_ww( _domains_region, _the_tile, **_big_boxes_kwargs ):
    ax.add_patch(box)


# # draw boxes around clustered pixels ...
# _big_boxes_kwargs = dict(loc="upper", lw=1.5, ec="green", fc="none", halo=0, ext_width=1)
# for box in rectangles_around_dots_ww( _select_df_region, _the_tile, **_big_boxes_kwargs ):
#     ax.add_patch(box)

## plotting - individual pups ...

In [None]:
# pileup select samples only !
_select_sample_groups = [
    [
        "mMito",
        "mTelo",
        "mCyto",
        "m5hR1R2",
        "m10hR1R2"
    ],
    # # p-ones
    [
        "pMito",
        "pTelo",
        "pCyto",
        "p5hR1R2",
        "p10hR1R2",
    ],
]

In [None]:
def to_mb(bp_val):
    # check MB
    if np.mod(bp_val, 1_000_000):
        # just give 1 decimal if not even Mb
        return f"{bp_val/1_000_000:.1f}"
    else:
        return f"{bp_val//1_000_000}"

# given the range - generate pretty axis name
def _get_name(_left, _right, _amount):
    if np.isclose(_left, 0.0):
        return f"<{to_mb(_right)} Mb: {_amount}"
    elif _right > 80_000_000:
        return f">{to_mb(_left)} Mb: {_amount}"
    else:
        return f"{to_mb(_left)}-{to_mb(_right)} Mb: {_amount}"

In [None]:
_the_clr_m5 = telo_clrs10["m5hR1R2"]
_the_clr_p5 = telo_clrs10["p5hR1R2"]

# _the_clr_mm = mega_clrs10["dldmicroc"]
# _the_clr_m5 = mega_clrs10["mG1s_MEGA"]
# _the_clr_p5 = mega_clrs10["pG1s_MEGA"]

# ['N93pG1s_MEGA', 'N93mG1s_MEGA', 'pG1s_MEGA', 'Ms_MEGA', 'mG1s_MEGA']

In [None]:
def get_score(
    mat,
    hw_in=2,
    hw_out=2
):
    """
    get pileup enrichment score ...
    """
    _mid, _ = mat.shape
    _mid = (_mid - 1)//2
    # deal with the center - enriched part
    _from ,_to = _mid-hw_in, _mid+hw_in+1
    mid_mat = mat[ _from:_to, _from:_to]
    # deal with the periphery - "background" part
    _from ,_to = _mid-hw_out, _mid+hw_out+1
    mat_copy = mat.copy()
    mat_copy[_from:_to] = np.nan
    mat_copy[:, _from:_to] = np.nan
    # ...
    return np.nanmean(mid_mat)/np.nanmean(mat_copy)

In [None]:
21-

In [None]:
# dom_status       dot_status
# inter-domain-2X  dot-anchor-0X     1076
#                  dot-anchor-1X    15059
#                  dot-anchor-2X    50258
#                  dot-exact            1
# inter-domain<2X  dot-anchor-0X      822
#                  dot-anchor-1X     4792
#                  dot-anchor-2X      233
#                  dot-exact            4
# intra-domain     dot-anchor-0X       18
#                  dot-anchor-1X      286
#                  dot-anchor-2X      744
#                  dot-exact          677

In [None]:
# ! head idid_scores.tsv

# Make 5F pruning subpanel ... the all by all versus the dot sizes ...

In [None]:
dot_sizes = _the_dots.eval("end2 - end1").to_numpy()
# [0, 250_000, 500_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000]
f, ax = plt.subplots(
    nrows=1,
    ncols=1,
    figsize=(2.5,2.5),
)

ym = pd.DataFrame(_qtable).query("label=='all-all'")["m5hR1R2"]
yp = pd.DataFrame(_qtable).query("label=='all-all'")["p5hR1R2"]

ax.plot(_distX, ym, label=k, marker=".", color="blue")
ax.plot(_distX, yp, label=k, marker=".", color="red")
# ax.plot(_trans_idx, mmm_list[-1], label=k, marker=".", color=color)
ax.set_xscale("log")
# ax.set_yscale("log")
ax.yaxis.set_major_locator(plt.MaxNLocator(3))
ax.set_yticks([1,1.5,2], minor=False)
ax.set_yticks([], minor=True)
ax.set_ylim((0.95, 2.175))
ax.set_xlim((_distX[0]*0.7, _trans_idx*1.2))
# ax.set_xticks(np.append(_distX, _trans_idx), labels=[], minor=True)
ax.set_xticks(
    np.append(_distX, _trans_idx),
    labels=[ f"{_d/1_000_000:.2f}" if not _i%2 else "" for _i,_d in enumerate(_distX)]+["trans"],
    minor=True,
    rotation=90,
)
_90th = np.percentile(dot_sizes, 90)
ax.set_xticks([_90th], labels=["90th"], minor=False)
ax.grid(visible=True, which="major")
ax.spines[['right', 'top']].set_visible(False)

ax_hist = ax.twinx()  # instantiate a second Axes that shares the same x-axis
# ax2.plot(t, data2, color=color)
# ax2.tick_params(axis='y', labelcolor=color)

ax_hist.hist(
    dot_sizes,
    bins=np.geomspace(90_000, 20_000_000, 20),
    histtype='stepfilled',
    color='darkgoldenrod',
    edgecolor='k',
    alpha=0.5
);

# ax_hist.
# # _the_dots.eval("end2 - end1").hist(bins=np.r_[300_000, np.geomspace(1_000_000, 10_000_000, 50)])
# # plt.gca().set_xscale("log")
# np.geomspace(10_000, 10_000_000, 100)
ax.set_zorder(1)  # default zorder is 0 for ax1 and ax2
ax.patch.set_visible(False)  # prevents ax1 from hiding ax2


plt.savefig("Fig5F.svg")

# Make 5G pruning subpanel ... timecourse quantifications ...

In [None]:
_flank = 100_000
# dist_bins = [0, 300_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 50_000_000]
dist_bins = [0, 250_000, 500_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000]


# dist_bins = (np.asarray([0.125, 0.375, 0.75, 1.75, 3.75, 7.50, 17.5, 37.5, 300])*1_000_000).astype(int)
# # 0.125, 0.375, 0.75, 1.75, 3.75, 7.50, 17.5, 37.5

# pileup select samples only !
quant_sample_groups = [
    [
        "mMito",
        "mTelo",
        "mCyto",
        "m5hR1R2",
        # "m10hR1R2",
    ],
    # # p-ones
    [
        "pMito",
        "pTelo",
        "pCyto",
        "p5hR1R2",
        # "p10hR1R2",
    ],
]

_oo = np.asarray(dist_bins)
_distX = (_oo[:-1]+_oo[1:])/2
# _oo[0] = _oo[1]
# # _www = (_distX[-1] - _distX[0])
# _distX = np.sqrt((_oo[:-1])*(_oo[1:]))
# _distX_trans = np.append(_distX, 70_000_000)

quant_cat_dict = {}

qcat_name = "all-all"
quant_cat_dict[qcat_name] = _df_intra_arm.loc[:]

qcat_name = "dotted"
_groups_of_interest = [ _g for _g in _grp.groups.keys() if (_g[1]!="dot-anchor-0X") ]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
quant_cat_dict[qcat_name] = _df_intra_arm.loc[_index_of_interest]

qcat_name = "dotless"
_groups_of_interest = [ _g for _g in _grp.groups.keys() if _g[1]=="dot-anchor-0X" ]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
quant_cat_dict[qcat_name] = _df_intra_arm.loc[_index_of_interest]


_qtable = {}
_qtable["label"] = []
_qtable["dist_min"] = []
_qtable["dist_max"] = []
for _kkk in sum(quant_sample_groups, []):
    _qtable[_kkk] = []


_trans_idx = 70_000_000

for _name, _df in quant_cat_dict.items():
    ggg = _df.groupby(pd.cut( _df["dist"], dist_bins ))
    nquants = len(ggg)
    f, axs = plt.subplots(
        nrows=1,
        ncols=len(quant_sample_groups[0]),
        figsize=(10,2.5),
        sharey=True,
        sharex=True,
    )
    # ...
    for _sample_group in quant_sample_groups:
        for ax, k in zip(axs, _sample_group):
            # ... samples ...
            _stacks = fullstacks_cis[k]
            mmm_list = []
            for _dist_min, _dist_max, (_q, _mtx) in zip(dist_bins[:-1], dist_bins[1:], ggg.groups.items()):
                # ... groupings (by dist, or whatever ...)
                mmm = np.nanmean(_stacks[_mtx], axis=0)
                _score = get_score( mmm, hw_in=2, hw_out=4)
                # print(k, _q, _score)
                mmm_list.append(_score)
                # ...
                _qtable[k].append(_score)
                if k == quant_sample_groups[0][0]:
                    _qtable["label"].append(_name)
                    _qtable["dist_min"].append(_dist_min)
                    _qtable["dist_max"].append(_dist_max)
                # ...
            # # add trans score right after
            if _name == "all-all":
                mmm = stack_means[k][2]
            elif _name == "dotted":
                mmm = stack_means[k][1]
            elif _name == "dotless":
                mmm = stack_means[k][0]
            else:
                raise("wtf ?!")
            # do the score ...
            _score = get_score( mmm, hw_in=0, hw_out=2)
            mmm_list.append(_score)
            if k.startswith("m"):
                ax.set_title(k.lstrip("m").rstrip("R12"))
                color="blue"
            else:
                color="red"
            ax.plot(_distX, mmm_list[:-1], label=k, marker=".", color=color)
            ax.plot(_trans_idx, mmm_list[-1], label=k, marker=".", color=color)
            ax.set_xscale("log")
            # ax.set_yscale("log")
            ax.yaxis.set_major_locator(plt.MaxNLocator(3))
            ax.set_yticks([1,1.5,2], minor=False)
            ax.set_yticks([], minor=True)
            ax.set_ylim((0.95, 2.175))
            ax.set_xlim((_distX[0]*0.8, _trans_idx*1.2))
            ax.set_xticks(
                np.append(_distX, _trans_idx),
                labels=[ f"{_d/1_000_000:.2f}" if not _i%2 else "" for _i,_d in enumerate(_distX)]+["trans"],
                minor=True,
                rotation=90,
            )
            ax.set_xticks([_90th], labels=["90th"], minor=False)
            ax.grid(visible=True, which="major")
            ax.spines[['right', 'top']].set_visible(False)
    # # add trans score right after
    plt.savefig(f"Fig5G_{_name}.svg")
    # ...
    # _qtable[]
# #
# # save this as a table ...
# pd.DataFrame(_qtable).to_csv("idid_scores.tsv", sep="\t", index=False)

# plt.savefig("Fig5F.svg")

In [None]:
[ f"{_d/1_000_000:.2f}" if not _i%2 else "" for _i,_d in enumerate(_distX)]

In [None]:
_distX[::2]

# Make 5F pruning subpanel ... the all by all versus the dot sizes ...

In [None]:
dot_sizes = _the_dots.eval("end2 - end1").to_numpy()
# [0, 250_000, 500_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000]
f, ax = plt.subplots(
    nrows=1,
    ncols=1,
    figsize=(2.5,2.5),
)

ym = pd.DataFrame(_qtable).query("label=='all-all'")["m5hR1R2"]
yp = pd.DataFrame(_qtable).query("label=='all-all'")["p5hR1R2"]

ax.plot(_distX, ym, label=k, marker=".", color="blue")
ax.plot(_distX, yp, label=k, marker=".", color="red")
# ax.plot(_trans_idx, mmm_list[-1], label=k, marker=".", color=color)
ax.set_xscale("log")
# ax.set_yscale("log")
ax.yaxis.set_major_locator(plt.MaxNLocator(3))
ax.set_yticks([1,1.5,2], minor=False)
ax.set_yticks([], minor=True)
ax.set_ylim((0.95, 2.175))
ax.set_xlim((_distX[0]*0.7, _trans_idx*1.2))
ax.set_xticks(np.append(_distX, _trans_idx), labels=[], minor=True)
# np.percentile(dot_sizes, 90)
ax.set_xticks([910000], labels=["0.91"], minor=False)
ax.grid(visible=True, which="major")
ax.spines[['right', 'top']].set_visible(False)

ax_hist = ax.twinx()  # instantiate a second Axes that shares the same x-axis
# ax2.plot(t, data2, color=color)
# ax2.tick_params(axis='y', labelcolor=color)

ax_hist.hist(
    dot_sizes,
    bins=np.geomspace(90_000, 20_000_000, 20),
    histtype='stepfilled',
    color='darkgoldenrod',
    edgecolor='k',
    alpha=0.5
);

# ax_hist.
# # _the_dots.eval("end2 - end1").hist(bins=np.r_[300_000, np.geomspace(1_000_000, 10_000_000, 50)])
# # plt.gca().set_xscale("log")
# np.geomspace(10_000, 10_000_000, 100)
ax.set_zorder(1)  # default zorder is 0 for ax1 and ax2
ax.patch.set_visible(False)  # prevents ax1 from hiding ax2

In [None]:
_flank = 100_000
# dist_bins = [0, 300_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 50_000_000]
dist_bins = [0, 250_000, 500_000, 1_000_000, 2_500_000, 5_000_000, 10_000_000, 25_000_000, 50_000_000]

_oo = np.asarray(dist_bins)
_distX = (_oo[:-1]+_oo[1:])/2
_www = (_distX[-1] - _distX[0])

_dfff_dict = {}

_dfff_id = "all-all"
# all ID-IDs
_dfff_dict[_dfff_id] = _df_intra_arm.loc[:]

_dfff_id = "inter-dotted"
# inter-domain dotted (X2 and >0):
_groups_of_interest = [('inter-domain-2X', 'dot-anchor-2X'), ]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
_dfff_dict[_dfff_id] = _df_intra_arm.loc[_index_of_interest]

_dfff_id = "inter-dotless"
# inter-domain not-dotted (X2 and >0):
_groups_of_interest = [('inter-domain-2X', 'dot-anchor-0X'), ('inter-domain<2X', 'dot-anchor-0X')]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
_dfff_dict[_dfff_id] = _df_intra_arm.loc[_index_of_interest]

_dfff_id = "intra-dotted"
# intra-domain dotted (X2):
_groups_of_interest = [('intra-domain', 'dot-anchor-2X'), ('intra-domain', 'dot-anchor-1X')]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
_dfff_dict[_dfff_id] = _df_intra_arm.loc[_index_of_interest]


_dfff_id = "intra-exact"
# intra-domain dot exact:
_groups_of_interest = [('intra-domain', 'dot-exact'), ]
_index_of_interest = np.concatenate([_grp.indices[_g] for _g in _groups_of_interest])
_dfff_dict[_dfff_id] = _df_intra_arm.loc[_index_of_interest]


_table = {}
_table["label"] = []
_table["dist_min"] = []
_table["dist_max"] = []
for _kkk in sum(_select_sample_groups, []):
    _table[_kkk] = []


for _id, _dfff in _dfff_dict.items():
    ggg = _dfff.groupby(pd.cut( _dfff["dist"], dist_bins ))
    nquants = len(ggg)
    f, axs = plt.subplots(
        nrows=1,
        ncols=len(_select_sample_groups[0]),
        figsize=(15,2.5),
        sharey=True,
        sharex=True,
    )
    # ...
    for _sample_group in _select_sample_groups:
        for ax, k in zip(axs, _sample_group):
            # ... samples ...
            _stacks = fullstacks_cis[k]
            mmm_list = []
            for _dist_min, _dist_max, (_q, _mtx) in zip(dist_bins[:-1], dist_bins[1:], ggg.groups.items()):
                # ... groupings (by dist, or whatever ...)
                mmm = np.nanmean(_stacks[_mtx], axis=0)
                _score = get_score( mmm, hw_in=2, hw_out=4)
                # print(k, _q, _score)
                mmm_list.append(_score)
                # ...
                _table[k].append(_score)
                if k == _select_sample_groups[0][0]:
                    _table["label"].append(_id)
                    _table["dist_min"].append(_dist_min)
                    _table["dist_max"].append(_dist_max)
                # ...
            ax.plot(_distX, mmm_list, label=k, marker=".")
            # ax.legend(frameon=False)
            if k.startswith("m"):
                ax.set_title(k.lstrip("m").rstrip("R12"))
            ax.set_xscale("log")
            ax.yaxis.set_major_locator(plt.MaxNLocator(3))
            ax.set_ylim((0.9, 3.0))
            ax.set_xlim((_distX[0]*0.8, _distX[-1]*1.2))
    #
    # ...
    # _table[]
# #
# # save this as a table ...
# pd.DataFrame(_table).to_csv("idid_scores.tsv", sep="\t", index=False)

# Actual figure plotting for publication ...

In [None]:
margin = 0.2
tcourse_spacing = 0.1
matw = 0.35
cbarh = 0.07

imshow_kwargs = dict(
        norm=LogNorm(vmin=1/2.5, vmax=2.5),
        cmap="RdBu_r",
        interpolation="nearest",
)

# timecourse_samples = ["Mito", "Telo", "Cyto", "5hR1R2", "10hR1R2"]
timecourse_samples = ["Mito", "Telo", "Cyto", "5hR1R2"]
_nsamples = len(timecourse_samples)

_plot_conditions = [
    "inter-dotless",
    "inter-dotted",
    "intra-dotted",
    "intra-exact",
]
_nconds = len(_plot_conditions)

# The first items are for padding and the second items are for the axes, sizes are in inch.
h = [ Size.Fixed(margin) ] + \
    (_nsamples-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(tcourse_spacing) ] + \
    (_nsamples-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(margin) ] + [ Size.Fixed(matw), Size.Fixed(margin) ]
# goes from bottom to the top ...
v = [ Size.Fixed(margin), Size.Fixed(cbarh), Size.Fixed(0.5*margin), ] + \
    (_nconds-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin), ] + \
    [ Size.Fixed(matw), Size.Fixed(margin), ]
# ...
# set figsize based on the tiling provided ...
fig_width = sum(_h.fixed_size for _h in h)
fig_height = sum(_v.fixed_size for _v in v)
fig = plt.figure(
    figsize=(fig_width, fig_height),
    # facecolor='lightblue'
)
print(f"figure size {fig_width=} {fig_height=}")
# ...
divider = Divider(fig, (0, 0, 1, 1), h, v, aspect=False)
# ...
axs_m = {}
axs_p = {}
for i, _sample in enumerate(timecourse_samples):
    axs_p[_sample] = {}
    axs_m[_sample] = {}
    nxm = 2*i + 1
    nxp = 2*(i+_nsamples) + 1
    for j, _cond in enumerate(_plot_conditions):
        ny = 2*(j+1) + 1
        axs_p[_sample][_cond] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nxp, ny=ny))
        axs_m[_sample][_cond] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nxm, ny=ny))


# ...
axq = {}
nx = 4*_nsamples + 1
for j, _cond in enumerate(_plot_conditions):
    ny = 2*(j+1) + 1
    axq[_cond] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nx, ny=ny))


for ax in (
        sum( [list(_d.values()) for c,_d in axs_m.items()], start=[] ) +
        sum( [list(_d.values()) for c,_d in axs_p.items()], start=[] ) +
        list( axq.values() )
    ):
    ax.set_xticks([])
    ax.set_yticks([])

cbar_ax = fig.add_axes(
    divider.get_position(),
    axes_locator=divider.new_locator(nx=nxp, ny=1)
)
cbar_ax.set_xticks([])
cbar_ax.set_yticks([])



for i, _sample in enumerate(timecourse_samples):
    for j, _cond in enumerate(_plot_conditions):
        axp = axs_p[_sample][_cond]
        axm = axs_m[_sample][_cond]
        # ...
        _mtx = _dfff_dict[_cond].index
        # going over groupings (by dist, or whatever ...)
        Cm = np.nanmean(fullstacks_cis[f'm{_sample}'][_mtx], axis=0)
        Cp = np.nanmean(fullstacks_cis[f'p{_sample}'][_mtx], axis=0)
        _ccc = axm.imshow( Cm, **imshow_kwargs )
        _ccc.cmap.set_over("#300000")
        _ccc = axp.imshow( Cp, **imshow_kwargs )
        _ccc.cmap.set_over("#300000")
        # ...
        for _ax in [axp, axm,]:
            _ax.set_xticks([])
            _ax.set_yticks([])
        # ylabel
        if i == 0:
            axm.set_ylabel(f"{_cond}", fontsize=4, labelpad=1)
        if j == len(_plot_conditions) - 1:
            axm.set_title(f"m{_sample}", fontsize=4, pad=1)
            axp.set_title(f"p{_sample}", fontsize=4, pad=1)
        # add ticks ...
        _mat_size = Cm.shape[0]
        if j == 0:
            axm.set_xticks([0-0.5, _mat_size/2-0.5, _mat_size-0.5])
            axm.set_xticklabels([-_flank//1000, 0, _flank//1000], fontsize=4)
            axm.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(axm.xaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_horizontalalignment("left")
                elif _tidx == 2:
                    tick.set_horizontalalignment("right")
                else:
                    tick.set_horizontalalignment("center")
            axp.set_xticks([0-0.5, _mat_size/2-0.5, _mat_size-0.5])
            axp.set_xticklabels([-_flank//1000, 0, _flank//1000], fontsize=4)
            axp.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(axp.xaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_horizontalalignment("left")
                elif _tidx == 2:
                    tick.set_horizontalalignment("right")
                else:
                    tick.set_horizontalalignment("center")
        # for the very last one ... - do ticks again ...
        if i == len(timecourse_samples) - 1:
            axp.yaxis.tick_right()
            axp.set_yticks(
                [0-0.5, _mat_size/2-0.5, _mat_size-0.5],
                labels=[-_flank//1000, 0, _flank//1000],
                rotation=90,
                fontsize=4,
            )
            axp.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(axp.yaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_verticalalignment("top")
                elif _tidx == 2:
                    tick.set_verticalalignment("bottom")
                else:
                    tick.set_verticalalignment("center")


_timepoints = [0,1,2,4]
_tp_labels = ["M","T","C","G1"]
for j, _cond in enumerate(_plot_conditions):
    ax = axq[_cond]
    _mtx = _dfff_dict[_cond].index
    m_course = []
    p_course = []
    for i, _sample in enumerate(timecourse_samples):
        # going over groupings (by dist, or whatever ...)
        Cm = np.nanmean(fullstacks_cis[f'm{_sample}'][_mtx], axis=0)
        Cp = np.nanmean(fullstacks_cis[f'p{_sample}'][_mtx], axis=0)
        m_course.append( get_score( Cm, hw_in=2, hw_out=4) )
        p_course.append( get_score( Cp, hw_in=2, hw_out=4) )
        _score = get_score( Cm, hw_in=2, hw_out=4)
    # put number of items overhere ...
    ax.text(0.5, 0.5, f'#{len(_mtx)}', ha='center', va='center', fontsize=6, transform = ax.transAxes)
    ax.plot(_timepoints, m_course, lw=0.5, color="blue")
    ax.plot(_timepoints, p_course, lw=0.5, color="red")
    ax.set_ylim(1.125, 2.275)
    ax.set_yticks(
        [1.125, 1.7, 2.275],
        labels=["1.1", "1.7", "2.3"],
        rotation=90,
        fontsize=4,
    )
    ax.tick_params(length=1.0, pad=0.0)
    for _tidx, tick in enumerate(ax.yaxis.get_majorticklabels()):
        if _tidx == 0:
            tick.set_verticalalignment("bottom")
        elif _tidx ==1:
            tick.set_verticalalignment("center")
        else:
            tick.set_verticalalignment("top")
    ax.set_xlim(_timepoints[0], _timepoints[-1])
    if j == 0:
        ax.tick_params(axis="x", length=1.5, pad=1)
        ax.set_xticks(
            _timepoints,
            labels=_tp_labels,
            fontsize=4,
        )


# add a single colorbar ...
fig.colorbar(
    cm.ScalarMappable(norm=imshow_kwargs["norm"], cmap=imshow_kwargs["cmap"]),
    cax=cbar_ax,
    orientation="horizontal",
)
cbar_ax.set_xticks([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax])
cbar_ax.set_xticklabels([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax], fontsize=5)
cbar_ax.minorticks_off()
cbar_ax.tick_params(length=1.5, pad=1)  #,direction='out', length=6, width=2, colors='r', grid_color='r', grid_alpha=0.5)
for _tidx, tick in enumerate(cbar_ax.xaxis.get_majorticklabels()):
    if _tidx == 0:
        tick.set_horizontalalignment("left")
    elif _tidx == 2:
        tick.set_horizontalalignment("right")
    else:
        tick.set_horizontalalignment("center")


plt.savefig("fig5H_timecourse.svg", dpi=300)

! cairosvg --format pdf -o fig5H_timecourse.pdf fig5H_timecourse.svg
! cairosvg --format png --background white -o fig5H_timecourse.png fig5H_timecourse.svg

In [None]:
margin = 0.2
tcourse_spacing = 0.1
matw = 0.35
cbarh = 0.07

imshow_kwargs = dict(
        norm=LogNorm(vmin=1/2.5, vmax=2.5),
        cmap="RdBu_r",
        interpolation="nearest",
)

timecourse_samples = ["N93m5", "N93p5"]
# timecourse_samples = ["N93m5","N93m10", "N93p5", "N93p10"]
_nsamples = len(timecourse_samples)

_plot_conditions = [
    "inter-dotless",
    "inter-dotted",
    "intra-dotted",
    "intra-exact",
]
_nconds = len(_plot_conditions)

# The first items are for padding and the second items are for the axes, sizes are in inch.
h = [ Size.Fixed(margin) ] + \
    (_nsamples-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(margin)  ]
# goes from bottom to the top ...
v = [ Size.Fixed(margin), Size.Fixed(cbarh), Size.Fixed(0.5*margin), ] + \
    (_nconds-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin), ] + \
    [ Size.Fixed(matw), Size.Fixed(margin), ]
# ...
# set figsize based on the tiling provided ...
fig_width = sum(_h.fixed_size for _h in h)
fig_height = sum(_v.fixed_size for _v in v)
fig = plt.figure(
    figsize=(fig_width, fig_height),
    # facecolor='lightblue'
)
print(f"figure size {fig_width=} {fig_height=}")
# ...
divider = Divider(fig, (0, 0, 1, 1), h, v, aspect=False)
# ...
axs = {}
for i, _sample in enumerate(timecourse_samples):
    axs[_sample] = {}
    nx = 2*i + 1
    for j, _cond in enumerate(_plot_conditions):
        ny = 2*(j+1) + 1
        axs[_sample][_cond] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nx, ny=ny))


for ax in sum( [list(_d.values()) for c,_d in axs.items()], start=[] ):
    ax.set_xticks([])
    ax.set_yticks([])

cbar_ax = fig.add_axes(
    divider.get_position(),
    axes_locator=divider.new_locator(nx=nx, ny=1)
)
cbar_ax.set_xticks([])
cbar_ax.set_yticks([])



for i, _sample in enumerate(timecourse_samples):
    for j, _cond in enumerate(_plot_conditions):
        ax = axs[_sample][_cond]
        _mtx = _dfff_dict[_cond].index
        # going over groupings (by dist, or whatever ...)
        C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
        _ccc = ax.imshow( C, **imshow_kwargs )
        _ccc.cmap.set_over("#300000")
        # ...
        ax.set_xticks([])
        ax.set_yticks([])
        # ylabel
        if i == 0:
            ax.set_ylabel(f"{_cond}", fontsize=4, labelpad=1)
        if j == len(_plot_conditions) - 1:
            ax.set_title(f"{_sample}", fontsize=4, pad=1)
        # add ticks ...
        _mat_size = C.shape[0]
        if j == 0:
            ax.set_xticks([0-0.5, _mat_size/2-0.5, _mat_size-0.5])
            ax.set_xticklabels([-_flank//1000, 0, _flank//1000], fontsize=4)
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.xaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_horizontalalignment("left")
                elif _tidx == 2:
                    tick.set_horizontalalignment("right")
                else:
                    tick.set_horizontalalignment("center")
        # for the very last one ... - do ticks again ...
        if i == len(timecourse_samples) - 1:
            ax.yaxis.tick_right()
            ax.set_yticks(
                [0-0.5, _mat_size/2-0.5, _mat_size-0.5],
                labels=[-_flank//1000, 0, _flank//1000],
                rotation=90,
                fontsize=4,
            )
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.yaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_verticalalignment("top")
                elif _tidx == 2:
                    tick.set_verticalalignment("bottom")
                else:
                    tick.set_verticalalignment("center")

# add a single colorbar ...
fig.colorbar(
    cm.ScalarMappable(norm=imshow_kwargs["norm"], cmap=imshow_kwargs["cmap"]),
    cax=cbar_ax,
    orientation="horizontal",
)
cbar_ax.set_xticks([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax])
cbar_ax.set_xticklabels([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax], fontsize=5)
cbar_ax.minorticks_off()
cbar_ax.tick_params(length=1.5, pad=1)  #,direction='out', length=6, width=2, colors='r', grid_color='r', grid_alpha=0.5)
for _tidx, tick in enumerate(cbar_ax.xaxis.get_majorticklabels()):
    if _tidx == 0:
        tick.set_horizontalalignment("left")
    elif _tidx == 2:
        tick.set_horizontalalignment("right")
    else:
        tick.set_horizontalalignment("center")


plt.savefig("figExt5D_nup.svg", dpi=300)

! cairosvg --format pdf -o figExt5D_nup.pdf figExt5D_nup.svg
! cairosvg --format png --background white -o figExt5D_nup.png figExt5D_nup.svg

In [None]:
_df_intra_arm

In [None]:
margin = 0.2
tcourse_spacing = 0.1
matw = 0.35
cbarh = 0.07

imshow_kwargs = dict(
        norm=LogNorm(vmin=1/2.5, vmax=2.5),
        cmap="RdBu_r",
        interpolation="nearest",
)

timecourse_samples = list(reversed(["m10hR1R2", "p10hR1R2", "mp10hR1R2",]))
_nsamples = len(timecourse_samples)

_plot_conditions_sbgrp1 = [
    "short",
    "mid",
    "long",
    "trans",
]

# deal with short and long range stuff ...
dist_bins = [0, 1_000_000, 10_000_000, 300_000_000]

_plot_conditions_sbgrp2 = [
    "inter-dotless",
    "inter-dotted",
    "intra-dotted",
    "intra-exact",
]
_plot_conditions_sbgrp2 = list(reversed(_plot_conditions_sbgrp2))
_nconds1 = len(_plot_conditions_sbgrp1)
_nconds2 = len(_plot_conditions_sbgrp2)
_nconds = _nconds1 + _nconds2

# The first items are for padding and the second items are for the axes, sizes are in inch.
h = [ Size.Fixed(margin) ] + \
    (_nconds1-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(tcourse_spacing) ] + \
    (_nconds2-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(margin) ]
# goes from bottom to the top ...
v = [ Size.Fixed(margin), Size.Fixed(cbarh), Size.Fixed(0.5*margin), ] + \
    (_nsamples-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin), ] + \
    [ Size.Fixed(matw), Size.Fixed(margin), ]
# ...
# set figsize based on the tiling provided ...
fig_width = sum(_h.fixed_size for _h in h)
fig_height = sum(_v.fixed_size for _v in v)
fig = plt.figure(
    figsize=(fig_width, fig_height),
    # facecolor='lightblue'
)
print(f"figure size {fig_width=} {fig_height=}")
# ...
divider = Divider(fig, (0, 0, 1, 1), h, v, aspect=False)
# ...
axs = {}
for i, _cond in enumerate((_plot_conditions_sbgrp1 + _plot_conditions_sbgrp2)):
    axs[_cond] = {}
    nx = 2*i + 1
    for j, _sample in enumerate(timecourse_samples):
        ny = 2*(j+1) + 1
        axs[_cond][_sample] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nx, ny=ny))


for ax in sum( [list(_d.values()) for c,_d in axs.items()], start=[] ):
    ax.set_xticks([])
    ax.set_yticks([])

cbar_ax = fig.add_axes(
    divider.get_position(),
    axes_locator=divider.new_locator(nx=nx, ny=1)
)
cbar_ax.set_xticks([])
cbar_ax.set_yticks([])

# stack_means[_sample]

_trans_idx = 2  # all !

for i, _cond in enumerate(_plot_conditions_sbgrp1 + _plot_conditions_sbgrp2):
    for j, _sample in enumerate(timecourse_samples):
        ax = axs[_cond][_sample]
        if _cond.startswith("in"):
            _mtx = _dfff_dict[_cond].index
            # going over groupings (by dist, or whatever ...)
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "trans":
            C = stack_means[_sample][_trans_idx]
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "short":
            _mtx = _df_intra_arm.query(" dist < @dist_bins[1] ").index
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "mid":
            _mtx = _df_intra_arm.query(" @dist_bins[1] < dist < @dist_bins[2] ").index
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "long":
            _mtx = _df_intra_arm.query(" @dist_bins[2] < dist ").index
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        # ylabel
        if i == 0:
            ax.set_ylabel(f"{_sample}", fontsize=4, labelpad=1)
        if j == len(timecourse_samples) - 1:
            ax.set_title(f"{_cond}", fontsize=4, pad=1)
            if _cond == "short":
                ax.set_title(f"{_cond}: <{dist_bins[1]//1000000}Mb", fontsize=4, pad=1)
            elif _cond == "mid":
                ax.set_title(f"{_cond}: {dist_bins[1]//1000000}-{dist_bins[2]//1000000}Mb", fontsize=4, pad=1)
            elif _cond == "long":
                ax.set_title(f"{_cond}: >{dist_bins[2]//1000000}Mb", fontsize=4, pad=1)
        # add ticks ...
        _mat_size = C.shape[0]
        if j == 0:
            ax.set_xticks([0-0.5, _mat_size/2-0.5, _mat_size-0.5])
            ax.set_xticklabels([-_flank//1000, 0, _flank//1000], fontsize=4)
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.xaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_horizontalalignment("left")
                elif _tidx == 2:
                    tick.set_horizontalalignment("right")
                else:
                    tick.set_horizontalalignment("center")
        # for the very last one ... - do ticks again ...
        if i == _nconds - 1:
            ax.yaxis.tick_right()
            ax.set_yticks(
                [0-0.5, _mat_size/2-0.5, _mat_size-0.5],
                labels=[-_flank//1000, 0, _flank//1000],
                rotation=90,
                fontsize=4,
            )
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.yaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_verticalalignment("top")
                elif _tidx == 2:
                    tick.set_verticalalignment("bottom")
                else:
                    tick.set_verticalalignment("center")


# add a single colorbar ...
fig.colorbar(
    cm.ScalarMappable(norm=imshow_kwargs["norm"], cmap=imshow_kwargs["cmap"]),
    cax=cbar_ax,
    orientation="horizontal",
)
cbar_ax.set_xticks([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax])
cbar_ax.set_xticklabels([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax], fontsize=5)
cbar_ax.minorticks_off()
cbar_ax.tick_params(length=1.5, pad=1)  #,direction='out', length=6, width=2, colors='r', grid_color='r', grid_alpha=0.5)
for _tidx, tick in enumerate(cbar_ax.xaxis.get_majorticklabels()):
    if _tidx == 0:
        tick.set_horizontalalignment("left")
    elif _tidx == 2:
        tick.set_horizontalalignment("right")
    else:
        tick.set_horizontalalignment("center")


plt.savefig("figExt7H_pileups.svg", dpi=300)

# ! cairosvg --format pdf -o figExt7H_pileups.pdf figExt7H_pileups.svg
# ! cairosvg --format png --background white -o figExt7H_pileups.png figExt7H_pileups.svg

In [None]:
margin = 0.2
tcourse_spacing = 0.1
matw = 0.35
cbarh = 0.07

imshow_kwargs = dict(
        norm=LogNorm(vmin=1/2.5, vmax=2.5),
        cmap="RdBu_r",
        interpolation="nearest",
)

timecourse_samples = list(reversed(["m10hR1R2", "p10hR1R2", "mp10hR1R2",]))
_nsamples = len(timecourse_samples)

_plot_conditions_sbgrp1 = [
    "short",
    "long",
    "trans",
]

# deal with short and long range stuff ...
dist_bins = [0, 1_000_000, 300_000_000]

_plot_conditions_sbgrp2 = [
    "inter-dotless",
    "inter-dotted",
    "intra-dotted",
    "intra-exact",
]
_plot_conditions_sbgrp2 = list(reversed(_plot_conditions_sbgrp2))
_nconds1 = len(_plot_conditions_sbgrp1)
_nconds2 = len(_plot_conditions_sbgrp2)
_nconds = _nconds1 + _nconds2

# The first items are for padding and the second items are for the axes, sizes are in inch.
h = [ Size.Fixed(margin) ] + \
    (_nconds1-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(tcourse_spacing) ] + \
    (_nconds2-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin) ] + \
    [ Size.Fixed(matw), Size.Fixed(margin) ]
# goes from bottom to the top ...
v = [ Size.Fixed(margin), Size.Fixed(cbarh), Size.Fixed(0.5*margin), ] + \
    (_nsamples-1)*[ Size.Fixed(matw), Size.Fixed(0.25*margin), ] + \
    [ Size.Fixed(matw), Size.Fixed(margin), ]
# ...
# set figsize based on the tiling provided ...
fig_width = sum(_h.fixed_size for _h in h)
fig_height = sum(_v.fixed_size for _v in v)
fig = plt.figure(
    figsize=(fig_width, fig_height),
    # facecolor='lightblue'
)
print(f"figure size {fig_width=} {fig_height=}")
# ...
divider = Divider(fig, (0, 0, 1, 1), h, v, aspect=False)
# ...
axs = {}
for i, _cond in enumerate((_plot_conditions_sbgrp1 + _plot_conditions_sbgrp2)):
    axs[_cond] = {}
    nx = 2*i + 1
    for j, _sample in enumerate(timecourse_samples):
        ny = 2*(j+1) + 1
        axs[_cond][_sample] = fig.add_axes(divider.get_position(), axes_locator=divider.new_locator(nx=nx, ny=ny))


for ax in sum( [list(_d.values()) for c,_d in axs.items()], start=[] ):
    ax.set_xticks([])
    ax.set_yticks([])

cbar_ax = fig.add_axes(
    divider.get_position(),
    axes_locator=divider.new_locator(nx=nx, ny=1)
)
cbar_ax.set_xticks([])
cbar_ax.set_yticks([])

# stack_means[_sample]

_trans_idx = 2  # all !

for i, _cond in enumerate(_plot_conditions_sbgrp1 + _plot_conditions_sbgrp2):
    for j, _sample in enumerate(timecourse_samples):
        ax = axs[_cond][_sample]
        if _cond.startswith("in"):
            _mtx = _dfff_dict[_cond].index
            # going over groupings (by dist, or whatever ...)
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "trans":
            C = stack_means[_sample][_trans_idx]
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "short":
            _mtx = _df_intra_arm.query(" dist < @dist_bins[1] ").index
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        elif _cond == "long":
            _mtx = _df_intra_arm.query(" @dist_bins[1] < dist ").index
            C = np.nanmean(fullstacks_cis[_sample][_mtx], axis=0)
            _ccc = ax.imshow( C, **imshow_kwargs )
            _ccc.cmap.set_over("#300000")
        # ylabel
        if i == 0:
            ax.set_ylabel(f"{_sample}", fontsize=4, labelpad=1)
        if j == len(timecourse_samples) - 1:
            ax.set_title(f"{_cond}", fontsize=4, pad=1)
            if _cond == "short":
                ax.set_title(f"{_cond}: <{dist_bins[1]//1000000}Mb", fontsize=4, pad=1)
            elif _cond == "long":
                ax.set_title(f"{_cond}: >{dist_bins[1]//1000000}Mb", fontsize=4, pad=1)
            else:
                pass
        # add ticks ...
        _mat_size = C.shape[0]
        if j == 0:
            ax.set_xticks([0-0.5, _mat_size/2-0.5, _mat_size-0.5])
            ax.set_xticklabels([-_flank//1000, 0, _flank//1000], fontsize=4)
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.xaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_horizontalalignment("left")
                elif _tidx == 2:
                    tick.set_horizontalalignment("right")
                else:
                    tick.set_horizontalalignment("center")
        # for the very last one ... - do ticks again ...
        if i == _nconds - 1:
            ax.yaxis.tick_right()
            ax.set_yticks(
                [0-0.5, _mat_size/2-0.5, _mat_size-0.5],
                labels=[-_flank//1000, 0, _flank//1000],
                rotation=90,
                fontsize=4,
            )
            ax.tick_params(length=1.5, pad=1)
            for _tidx, tick in enumerate(ax.yaxis.get_majorticklabels()):
                if _tidx == 0:
                    tick.set_verticalalignment("top")
                elif _tidx == 2:
                    tick.set_verticalalignment("bottom")
                else:
                    tick.set_verticalalignment("center")


# add a single colorbar ...
fig.colorbar(
    cm.ScalarMappable(norm=imshow_kwargs["norm"], cmap=imshow_kwargs["cmap"]),
    cax=cbar_ax,
    orientation="horizontal",
)
cbar_ax.set_xticks([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax])
cbar_ax.set_xticklabels([imshow_kwargs["norm"].vmin, 1, imshow_kwargs["norm"].vmax], fontsize=5)
cbar_ax.minorticks_off()
cbar_ax.tick_params(length=1.5, pad=1)  #,direction='out', length=6, width=2, colors='r', grid_color='r', grid_alpha=0.5)
for _tidx, tick in enumerate(cbar_ax.xaxis.get_majorticklabels()):
    if _tidx == 0:
        tick.set_horizontalalignment("left")
    elif _tidx == 2:
        tick.set_horizontalalignment("right")
    else:
        tick.set_horizontalalignment("center")


plt.savefig("figExt7H_pileups.svg", dpi=300)

# ! cairosvg --format pdf -o figExt7H_pileups.pdf figExt7H_pileups.svg
# ! cairosvg --format png --background white -o figExt7H_pileups.png figExt7H_pileups.svg

In [None]:
_flank = 100_000
# dist_bins = [0, 250_000, 1_500_000, 15_000_000, 100_000_000]
dist_bins = [0, 300_000_000]

# reusing the same groups from _dfff_dict ...

f, axs = plt.subplots(
    nrows=len(_dfff_dict),
    ncols=1,
    figsize=(2.5,12),
    sharey=True,
    sharex=True,
)


for ax, (_id, _dfff) in zip(axs, _dfff_dict.items()):
    print(_id)
    ggg = _dfff.groupby(pd.cut( _dfff["dist"], dist_bins ))
    nquants = len(ggg)
    # ...
    # ...
    for _sample_group in _select_sample_groups:
        _tp = []
        for i, k in enumerate(_sample_group):
            # going over samples ...
            _stacks = fullstacks_cis[k]
            for j, (_q, _mtx) in enumerate(ggg.groups.items()):
                # going over groupings (by dist, or whatever ...)
                mmm = np.nanmean(_stacks[_mtx], axis=0)
                _score = get_score( mmm, hw_in=2, hw_out=4)
                _tp.append(_score)
        ax.plot(_tp)
        ax.set_title(_id)
        ax.set_xticks([0,1,2,3,4])
        ax.set_xticklabels(["m","t","c","5","10"])


In [None]:
[s.lstrip("m") for s in _select_sample_groups[0]]

In [None]:
_flank = 100_000
# dist_bins = [0, 250_000, 1_500_000, 15_000_000, 100_000_000]
dist_bins = [0, 300_000_000]

# reusing the same groups from _dfff_dict ...

f, axs = plt.subplots(
    nrows=len(_dfff_dict),
    ncols=len(_select_sample_groups[0]),
    figsize=(12,12),
    sharey=True,
    sharex=True,
)




for i, (_axs, (_id, _dfff)) in enumerate(zip(axs, _dfff_dict.items())):
    print(_id)
    ggg = _dfff.groupby(pd.cut( _dfff["dist"], dist_bins ))
    nquants = len(ggg)
    # going over samples ...
    for j, k in enumerate([s.lstrip("m") for s in _select_sample_groups[0]]):
        ax = _axs[j]
        for _q, _mtx in ggg.groups.items():
            # going over groupings (by dist, or whatever ...)
            mmmm = np.nanmean(fullstacks_cis[f'm{k}'][_mtx], axis=0)
            mmmp = np.nanmean(fullstacks_cis[f'p{k}'][_mtx], axis=0)
            # _score = get_score( mmm, hw_in=2, hw_out=4)
            # _tp.append(_score)
            _ccc = ax.imshow(
                mmmm/mmmp,
                cmap='RdBu_r',
                # cmap='coolwarm',
                norm=LogNorm(vmin=1/2.25,vmax=2.25),
            )
            _ccc.cmap.set_over("#400000")
            ticks_pixels = np.linspace(0, _flank*2//binsize10, 5)
            ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*binsize10//1000).astype(int)
            if j == 0:
                # top row
                ax.set_ylabel(f"{_id}: m/p", fontsize=12)
            if i == 0:
                ax.set_title(k)
            if i == len(_dfff_dict)-1:
                ax.set_xticks(ticks_pixels, ticks_kbp)
                ax.set_xlabel('relative position, kbp')
            ax.set_yticks(ticks_pixels, ticks_kbp)


In [None]:
ggg.groups.items()

In [None]:
_flank = 100_000
# dist_bins = [0, 250_000, 1_500_000, 15_000_000, 100_000_000]
dist_bins = [0, 300_000_000]

# reusing the same groups from _dfff_dict ...



for _id, _dfff in _dfff_dict.items():
    ggg = _dfff.groupby(pd.cut( _dfff["dist"], dist_bins ))
    nquants = len(ggg)
    # f, axs = plt.subplots(
    #     nrows=1,
    #     ncols=len(_select_sample_groups[0]),
    #     figsize=(15,2.5),
    #     sharey=True,
    #     sharex=True,
    # )
    # # ...
    for _sample_group in [_select_sample_groups[0]]:
        f, axs = plt.subplots(
            ncols=len(_sample_group),
            nrows=len(ggg),
            figsize=(3*len(_sample_group), 3*len(ggg)),
            # width_ratios=[1]*nquants,
            sharex=True,
            sharey=True,
            squeeze=False,
        )
        # gs = axs[0, -1].get_gridspec()
        # # remove axes for the last column ...
        # for ax in axs[:, -1]:
        #     ax.remove()
        #
        # axcb = f.add_subplot(gs[1:3, -1])
        #
        for i, (_axs, k) in enumerate(zip(axs.T,_sample_group)):
            # going over samples ...
            _stacks = fullstacks_cis[k]
            print(k)
            for j, (ax, (_q, _mtx)) in enumerate(zip(_axs, ggg.groups.items())):
                # going over groupings (by dist, or whatever ...)
                _ccc = ax.imshow(
                    np.nanmean(_stacks[_mtx], axis=0),
                    cmap='RdBu_r',
                    norm=LogNorm(vmin=1/2.5,vmax=2.5),
                    # norm=LogNorm(vmin=1/1250,vmax=1250),
                    # norm=LogNorm(vmin=1/3,vmax=3),
                )
                _ccc.cmap.set_over("#400000")
                ticks_pixels = np.linspace(0, _flank*2//binsize10, 5)
                ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*binsize10//1000).astype(int)
                if j == 0:
                    # top row
                    ax.set_title(f"{k}", fontsize=14)
                if i == 0:
                    _axname = _get_name(_q.left, _q.right, len(_mtx))
                    ax.set_ylabel(f"{_id}::{_axname}")
                if i == len(_sample_group)-1:
                    ax.set_xticks(ticks_pixels, ticks_kbp)
                    ax.set_xlabel('relative position, kbp')
                ax.set_yticks(ticks_pixels, ticks_kbp)
                # if j<1:
                #     ax.set_title(f"{k}", fontsize=14)
                #
        # plt.colorbar(_ccc, label="obs/exp", cax=axcb)
    # cs.cmap.set_under('k')

In [None]:
# pileup select samples only !
_select_sample_groups = [
    [
        "m10hR1R2",
        "p10hR1R2",
        "mp10hR1R2",
        "N93m5",
        "N93m10",
        "N93p5",
        "N93p10",
        "N93mp10",
    ],
    # # # p-ones
    # [
    #     "pMito",
    #     "pTelo",
    #     "pCyto",
    #     "p5hR1R2",
    #     "p10hR1R2",
    # ],
]


_flank = 100_000
# dist_bins = [0, 250_000, 1_500_000, 15_000_000, 100_000_000]
dist_bins = [0, 300_000_000]

# reusing the same groups from _dfff_dict ...



for _id, _dfff in _dfff_dict.items():
    ggg = _dfff.groupby(pd.cut( _dfff["dist"], dist_bins ))
    nquants = len(ggg)
    # f, axs = plt.subplots(
    #     nrows=1,
    #     ncols=len(_select_sample_groups[0]),
    #     figsize=(15,2.5),
    #     sharey=True,
    #     sharex=True,
    # )
    # # ...
    for _sample_group in [_select_sample_groups[0]]:
        f, axs = plt.subplots(
            ncols=len(_sample_group),
            nrows=len(ggg),
            figsize=(3*len(_sample_group), 3*len(ggg)),
            # width_ratios=[1]*nquants,
            sharex=True,
            sharey=True,
            squeeze=False,
        )
        # gs = axs[0, -1].get_gridspec()
        # # remove axes for the last column ...
        # for ax in axs[:, -1]:
        #     ax.remove()
        #
        # axcb = f.add_subplot(gs[1:3, -1])
        #
        for i, (_axs, k) in enumerate(zip(axs.T,_sample_group)):
            # going over samples ...
            _stacks = fullstacks_cis[k]
            print(k)
            for j, (ax, (_q, _mtx)) in enumerate(zip(_axs, ggg.groups.items())):
                # going over groupings (by dist, or whatever ...)
                _ccc = ax.imshow(
                    np.nanmean(_stacks[_mtx], axis=0),
                    cmap='RdBu_r',
                    norm=LogNorm(vmin=1/2.25,vmax=2.25),
                    # norm=LogNorm(vmin=1/1250,vmax=1250),
                    # norm=LogNorm(vmin=1/3,vmax=3),
                )
                _ccc.cmap.set_over("#400000")
                ticks_pixels = np.linspace(0, _flank*2//binsize10, 5)
                ticks_kbp = ((ticks_pixels-ticks_pixels[-1]/2)*binsize10//1000).astype(int)
                if j == 0:
                    # top row
                    ax.set_title(f"{k}", fontsize=14)
                if i == 0:
                    _axname = _get_name(_q.left, _q.right, len(_mtx))
                    ax.set_ylabel(f"{_id}::{_axname}")
                if i == len(_sample_group)-1:
                    ax.set_xticks(ticks_pixels, ticks_kbp)
                    ax.set_xlabel('relative position, kbp')
                ax.set_yticks(ticks_pixels, ticks_kbp)
                # if j<1:
                #     ax.set_title(f"{k}", fontsize=14)
                #
        # plt.colorbar(_ccc, label="obs/exp", cax=axcb)
    # cs.cmap.set_under('k')