In [1]:
from pycisTopic.pseudobulk_peak_calling import export_pseudobulk, peak_calling
import pyranges as pr
import requests
import os
import pandas as pd
import glob
import pickle

In [2]:
%load_ext lab_black

In [3]:
# get chromosome sizes (hg38)
if not os.path.exists("chromsizes.txt"):
    target_url = (
        "http://hgdownload.cse.ucsc.edu/goldenPath/mm10/bigZips/mm10.chrom.sizes"
    )
    chromsizes = pd.read_csv(target_url, sep="\t", header=None)
    chromsizes.columns = ["Chromosome", "End"]
    chromsizes["Start"] = [0] * chromsizes.shape[0]
    chromsizes = chromsizes.loc[:, ["Chromosome", "Start", "End"]]
    chromsizes = pr.PyRanges(chromsizes)
    chromsizes.to_csv("chromsizes.txt")
    chromsizes
else:
    chromsizes = pd.read_csv("chromsizes.txt")

In [4]:
cto_consensus_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob("cistopic_objects/*singlets.model*dimreduc.pkl"))
}
cto_consensus_path_dict

{'BIO_ddseq_m1c1.FULL': 'cistopic_objects/BIO_ddseq_m1c1.FULL__cto.scrublet0-4.fmx.singlets.model_12topics.dimreduc.pkl',
 'BIO_ddseq_m1c2.FULL': 'cistopic_objects/BIO_ddseq_m1c2.FULL__cto.scrublet0-4.fmx.singlets.model_12topics.dimreduc.pkl',
 'BIO_ddseq_m1c3.FULL': 'cistopic_objects/BIO_ddseq_m1c3.FULL__cto.scrublet0-4.fmx.singlets.model_15topics.dimreduc.pkl',
 'BIO_ddseq_m1c4.FULL': 'cistopic_objects/BIO_ddseq_m1c4.FULL__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.pkl',
 'BIO_ddseq_m1c5.FULL': 'cistopic_objects/BIO_ddseq_m1c5.FULL__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.pkl',
 'BIO_ddseq_m1c6.FULL': 'cistopic_objects/BIO_ddseq_m1c6.FULL__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.pkl',
 'BIO_ddseq_m1c7.FULL': 'cistopic_objects/BIO_ddseq_m1c7.FULL__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.pkl',
 'BIO_ddseq_m1c8.FULL': 'cistopic_objects/BIO_ddseq_m1c8.FULL__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.pkl',
 'BIO_ddseq_m2c1.FULL': 

In [5]:
filenames = sorted(
    glob.glob("../1_data_repository/publicdata_full_fragments_vsn/*.fragments.tsv.gz")
)
fragments_path_dict = {}
for filename in filenames:
    sample = filename.split("/")[-1].split(".fragments.tsv.gz")[0]
    fragments_path_dict[sample] = filename
fragments_path_dict

{'BIO_ddseq_m1c1.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c1.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c2.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c2.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c3.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c3.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c4.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c4.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c5.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c5.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c6.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c6.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c7.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c7.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c8.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c8.FULL.fragments.tsv.gz',
 'BIO_ddseq_m2c1.FULL': '../1_data_repository/publicdata_full_fragments_vsn/BIO_

In [6]:
import gc
import logging
import os
import re
import subprocess
import sys
from typing import Dict, List, Optional, Union

import numpy as np
import pandas as pd
import pyBigWig
import pyranges as pr
import ray

from pycisTopic.cistopic_class import *
from pycisTopic.utils import *


def export_pseudobulk(
    input_data: Union["CistopicObject", pd.DataFrame, Dict[str, pd.DataFrame]],
    variable: str,
    chromsizes: Union[pd.DataFrame, pr.PyRanges],
    bed_path: str,
    bigwig_path: str,
    path_to_fragments: Optional[Dict[str, str]] = None,
    sample_id_col: Optional[str] = "sample_id",
    n_cpu: Optional[int] = 1,
    normalize_bigwig: Optional[bool] = True,
    remove_duplicates: Optional[bool] = True,
    split_pattern: Optional[str] = "___",
    use_polars: Optional[bool] = True,
    **kwargs
):
    """
    Create pseudobulks as bed and bigwig from single cell fragments file given a barcode annotation.
    Parameters
    ---------
    input_data: CistopicObject or pd.DataFrame
            A :class:`CistopicObject` containing the specified `variable` as a column in :class:`CistopicObject.cell_data` or a cell metadata
            :class:`pd.DataFrame` containing barcode as rows, containing the specified `variable` as a column (additional columns are
            possible) and a `sample_id` column. Index names must contain the BARCODE (e.g. ATGTCGTC-1), additional tags are possible separating with -
            (e.g. ATGCTGTGCG-1-Sample_1). The levels in the sample_id column must agree with the keys in the path_to_fragments dictionary.
            Alternatively, if the cell metadata contains a column named barcode it will be used instead of the index names.
    variable: str
            A character string indicating the column that will be used to create the different group pseudobulk. It must be included in
            the cell metadata provided as input_data.
    chromsizes: pd.DataFrame or pr.PyRanges
            A data frame or :class:`pr.PyRanges` containing size of each chromosome, containing 'Chromosome', 'Start' and 'End' columns.
    bed_path: str
            Path to folder where the fragments bed files per group will be saved. If None, files will not be generated.
    bigwig_path: str
            Path to folder where the bigwig files per group will be saved. If None, files will not be generated.
    path_to_fragments: str or dict, optional
            A dictionary of character strings, with sample name as names indicating the path to the fragments file/s from which pseudobulk profiles have to
            be created. If a :class:`CistopicObject` is provided as input it will be ignored, but if a cell metadata :class:`pd.DataFrame` is provided it
            is necessary to provide it. The keys of the dictionary need to match with the sample_id tag added to the index names of the input data frame.
    sample_id_col: str, optional
            Name of the column containing the sample name per barcode in the input :class:`CistopicObject.cell_data` or class:`pd.DataFrame`. Default: 'sample_id'.
    n_cpu: int, optional
            Number of cores to use. Default: 1.
    normalize_bigwig: bool, optional
            Whether bigwig files should be CPM normalized. Default: True.
    remove_duplicates: bool, optional
            Whether duplicates should be removed before converting the data to bigwig.
    split_pattern: str, optional
            Pattern to split cell barcode from sample id. Default: ___ .
    use_polars: bool, optional
            Whether to use polars to read fragments files. Default: True.
    **kwargs
            Additional parameters for ray.init()
    Return
    ------
    dict
            A dictionary containing the paths to the newly created bed fragments files per group a dictionary containing the paths to the
            newly created bigwig files per group.
    """
    # Create logger
    level = logging.INFO
    log_format = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
    handlers = [logging.StreamHandler(stream=sys.stdout)]
    logging.basicConfig(level=level, format=log_format, handlers=handlers)
    log = logging.getLogger("cisTopic")

    # Get fragments file
    if isinstance(input_data, CistopicObject):
        path_to_fragments = input_data.path_to_fragments
        if path_to_fragments is None:
            log.error("No path_to_fragments in this cisTopic object.")
        cell_data = input_data.cell_data
    elif isinstance(input_data, pd.DataFrame):
        if path_to_fragments is None:
            log.error("Please, provide path_to_fragments.")
        cell_data = input_data
    # Check for sample_id column
    try:
        sample_ids = list(set(cell_data[sample_id_col]))
    except ValueError:
        print(
            'Please, include a sample identification column (e.g. "sample_id") in your cell metadata!'
        )

    # Get fragments
    fragments_df_dict = {}
    for sample_id in path_to_fragments.keys():
        if sample_id not in sample_ids:
            log.info(
                "The following path_to_fragments entry is not found in the cell metadata sample_id_col: ",
                sample_id,
                ". It will be ignored.",
            )
        else:
            log.info("Reading fragments from " + path_to_fragments[sample_id])
            fragments_df = read_fragments_from_file(
                path_to_fragments[sample_id], use_polars=use_polars
            ).df
            # Convert to int32 for memory efficiency
            fragments_df.Start = np.int32(fragments_df.Start)
            fragments_df.End = np.int32(fragments_df.End)
            if "Score" in fragments_df:
                fragments_df.Score = np.int32(fragments_df.Score)
            if "barcode" in cell_data:
                fragments_df = fragments_df.loc[
                    fragments_df["Name"].isin(cell_data["barcode"].tolist())
                ]
            else:
                fragments_df = fragments_df.loc[
                    fragments_df["Name"].isin(
                        prepare_tag_cells(cell_data.index.tolist(), split_pattern)
                    )
                ]
            fragments_df_dict[sample_id] = fragments_df

    # Set groups
    if "barcode" in cell_data:
        cell_data = cell_data.loc[:, [variable, sample_id_col, "barcode"]]
    else:
        cell_data = cell_data.loc[:, [variable, sample_id_col]]
    cell_data[variable] = cell_data[variable].replace(" ", "", regex=True)
    cell_data[variable] = cell_data[variable].replace("[^A-Za-z0-9]+", "_", regex=True)
    groups = sorted(list(set(cell_data[variable])))
    # Check chromosome sizes
    if isinstance(chromsizes, pd.DataFrame):
        chromsizes = chromsizes.loc[:, ["Chromosome", "Start", "End"]]
        chromsizes = pr.PyRanges(chromsizes)
    # Check that output dir exist and generate output paths
    if isinstance(bed_path, str):
        if not os.path.exists(bed_path):
            os.makedirs(bed_path)
        bed_paths = {
            group: os.path.join(bed_path, str(group) + ".bed.gz") for group in groups
        }
    else:
        bed_paths = {}
    if isinstance(bigwig_path, str):
        if not os.path.exists(bigwig_path):
            os.makedirs(bigwig_path)
        bw_paths = {
            group: os.path.join(bigwig_path, str(group) + ".bw") for group in groups
        }
    else:
        bw_paths = {}
    # Create pseudobulks
    if n_cpu > 1:
        ray.init(num_cpus=n_cpu, **kwargs)
        ray_handle = ray.wait(
            [
                export_pseudobulk_ray.remote(
                    cell_data,
                    group,
                    fragments_df_dict,
                    chromsizes,
                    bigwig_path,
                    bed_path,
                    sample_id_col,
                    normalize_bigwig,
                    remove_duplicates,
                    split_pattern,
                )
                for group in groups
            ],
            num_returns=len(groups),
        )
        ray.shutdown()
    else:
        [
            export_pseudobulk_one_sample(
                cell_data,
                group,
                fragments_df_dict,
                chromsizes,
                bigwig_path,
                bed_path,
                sample_id_col,
                normalize_bigwig,
                remove_duplicates,
                split_pattern,
            )
            for group in groups
        ]

    return bw_paths, bed_paths


def export_pseudobulk_one_sample(
    cell_data: pd.DataFrame,
    group: str,
    fragments_df_dict: Dict[str, pd.DataFrame],
    chromsizes: pr.PyRanges,
    bigwig_path: str,
    bed_path: str,
    sample_id_col: Optional[str] = "sample_id",
    normalize_bigwig: Optional[bool] = True,
    remove_duplicates: Optional[bool] = True,
    split_pattern: Optional[str] = "___",
):
    """
    Create pseudobulk as bed and bigwig from single cell fragments file given a barcode annotation and a group.
    Parameters
    ---------
    cell_data: pd.DataFrame
            A cell metadata :class:`pd.Dataframe` containing barcodes, their annotation and their sample of origin.
    group: str
            A character string indicating the group for which pseudobulks will be created.
    fragments_df_dict: dict
            A dictionary containing data frames as values with 'Chromosome', 'Start', 'End', 'Name', and 'Score' as columns; and sample label
            as keys. 'Score' indicates the number of times that a fragments is found assigned to that barcode.
    chromsizes: pr.PyRanges
            A :class:`pr.PyRanges` containing size of each column, containing 'Chromosome', 'Start' and 'End' columns.
    bigwig_path: str
            Path to folder where the bigwig file will be saved.
    bed_path: str
            Path to folder where the fragments bed file will be saved.
    sample_id_col: str, optional
            Name of the column containing the sample name per barcode in the input :class:`CistopicObject.cell_data` or class:`pd.DataFrame`. Default: 'sample_id'.
    normalize_bigwig: bool, optional
            Whether bigwig files should be CPM normalized. Default: True.
    remove_duplicates: bool, optional
            Whether duplicates should be removed before converting the data to bigwig.
    split_pattern: str
            Pattern to split cell barcode from sample id. Default: ___ .
    """
    # Create logger
    level = logging.INFO
    log_format = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
    handlers = [logging.StreamHandler(stream=sys.stdout)]
    logging.basicConfig(level=level, format=log_format, handlers=handlers)
    log = logging.getLogger("cisTopic")

    log.info("Creating pseudobulk for " + str(group))
    group_fragments_list = []
    group_fragments_dict = {}
    for sample_id in fragments_df_dict:
        sample_data = cell_data[cell_data.loc[:, sample_id_col].isin([sample_id])]
        if "barcode" in sample_data:
            sample_data.index = sample_data["barcode"].tolist()
        else:
            sample_data.index = prepare_tag_cells(
                sample_data.index.tolist(), split_pattern
            )
        group_var = sample_data.iloc[:, 0]
        barcodes = group_var[group_var.isin([group])].index.tolist()
        fragments_df = fragments_df_dict[sample_id]
        group_fragments = fragments_df.loc[fragments_df["Name"].isin(barcodes)]
        if len(fragments_df_dict) > 1:
            group_fragments_dict[sample_id] = group_fragments

    if len(fragments_df_dict) > 1:
        group_fragments_list = [
            group_fragments_dict[list(group_fragments_dict.keys())[x]]
            for x in range(len(fragments_df_dict))
        ]
        group_fragments = group_fragments_list[0].append(group_fragments_list[1:])

    group_fragments = group_fragments[
        group_fragments["Chromosome"].isin(chromsizes.Chromosome)
    ]

    del group_fragments_dict
    del group_fragments_list
    del fragments_df
    gc.collect()

    group_pr = pr.PyRanges(group_fragments)
    if isinstance(bigwig_path, str):
        bigwig_path_group = os.path.join(bigwig_path, str(group) + ".bw")
        if remove_duplicates:
            group_pr.to_bigwig(
                path=bigwig_path_group,
                chromosome_sizes=chromsizes,
                rpm=normalize_bigwig,
            )
        else:
            group_pr.to_bigwig(
                path=bigwig_path_group,
                chromosome_sizes=chromsizes,
                rpm=normalize_bigwig,
                value_col="Score",
            )
    if isinstance(bed_path, str):
        bed_path_group = os.path.join(bed_path, str(group) + ".bed.gz")
        group_pr.to_bed(
            path=bed_path_group, keep=False, compression="infer", chain=False
        )

    log.info(str(group) + " done!")


@ray.remote
def export_pseudobulk_ray(
    cell_data: pd.DataFrame,
    group: str,
    fragments_df_dict: Dict[str, pd.DataFrame],
    chromsizes: pr.PyRanges,
    bigwig_path: str,
    bed_path: str,
    sample_id_col: Optional[str] = "sample_id",
    normalize_bigwig: Optional[bool] = True,
    remove_duplicates: Optional[bool] = True,
    split_pattern: Optional[str] = "___",
):
    """
    Create pseudobulk as bed and bigwig from single cell fragments file given a barcode annotation and a group.
    Parameters
    ---------
    cell_data: pd.DataFrame
            A cell metadata :class:`pd.Dataframe` containing barcodes, their annotation and their sample of origin.
    group: str
            A character string indicating the group for which pseudobulks will be created.
    fragments_df_dict: dict
            A dictionary containing data frames as values with 'Chromosome', 'Start', 'End', 'Name', and 'Score' as columns; and sample label
            as keys. 'Score' indicates the number of times that a fragments is found assigned to that barcode.
    chromsizes: pr.PyRanges
            A :class:`pr.PyRanges` containing size of each column, containing 'Chromosome', 'Start' and 'End' columns.
    bed_path: str
            Path to folder where the fragments bed file will be saved.
    bigwig_path: str
            Path to folder where the bigwig file will be saved.
    sample_id_col: str, optional
            Name of the column containing the sample name per barcode in the input :class:`CistopicObject.cell_data` or class:`pd.DataFrame`. Default: 'sample_id'.
    normalize_bigwig: bool, optional
            Whether bigwig files should be CPM normalized. Default: True.
    remove_duplicates: bool, optional
            Whether duplicates should be removed before converting the data to bigwig.
    split_pattern: str
            Pattern to split cell barcode from sample id. Default: ___ .
    """
    export_pseudobulk_one_sample(
        cell_data,
        group,
        fragments_df_dict,
        chromsizes,
        bigwig_path,
        bed_path,
        sample_id_col,
        normalize_bigwig,
        remove_duplicates,
        split_pattern,
    )

In [7]:
sample = "OHS_s3atac_mouse.FULL"
cto_path = cto_consensus_path_dict[sample]
with open(cto_path, "rb") as f:
    cto = pickle.load(f)

In [8]:
# for sample in ["BRO_mtscatac_1.LIBDS"]:
bw_paths_dict = {}
bed_paths_dict = {}

import ray

if ray.is_initialized():
    print("Shutting down Ray")
    ray.shutdown()

for sample in cto_consensus_path_dict.keys():
    # for sample in ["BRO_mtscatac_1.LIBDS"]:
    bed_path = os.path.join(
        "final_consensus_peaks", f"{sample}__SCREEN_pseudobulk_bed_files"
    )
    bw_path = os.path.join(
        "final_consensus_peaks", f"{sample}__SCREEN_pseudobulk_bw_files"
    )
    if not os.path.exists(bed_path):
        cto_path = cto_consensus_path_dict[sample]
        with open(cto_path, "rb") as f:
            cto = pickle.load(f)

        cto.cell_data["sample_id_fixed"] = [
            x.replace(".fragments.tsv.gz", "") for x in cto.cell_data["sample_id"]
        ]
        if sample == "OHS_s3atac_mouse.FULL":
            cto.cell_data["sample_id_fixed"] = "OHS_s3atac_mouse.FULL"

        bw_paths, bed_paths = export_pseudobulk(
            input_data=cto,
            variable="pycisTopic_leiden_10_0.4",
            sample_id_col="sample_id_fixed",
            chromsizes=chromsizes,
            bed_path=bed_path,
            bigwig_path=bw_path,
            path_to_fragments=fragments_path_dict[sample],
            n_cpu=16,
            normalize_bigwig=True,
            remove_duplicates=True,
        )

        if ray.is_initialized():
            print("Shutting down Ray")
            ray.shutdown()
    else:
        print(f"{bed_path} exists, skipping...")

final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_pseudobulk_bed_files exists, skipping...
final_consensus_peaks/BIO_ddseq_m2c3.FULL__SCREEN_pseudobulk_bed_files exists, skipping...

# consensus

In [9]:
bw_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob("final_consensus_peaks/*_pseudobulk_bw_files"))
}
bw_path_dict

{'BIO_ddseq_m1c1.FULL': 'final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c2.FULL': 'final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c3.FULL': 'final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c4.FULL': 'final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c5.FULL': 'final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c6.FULL': 'final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c7.FULL': 'final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m1c8.FULL': 'final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m2c1.FULL': 'final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m2c2.FULL': 'final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_pseudobulk_bw_files',
 'BIO_ddseq_m2c3.FULL': 'final

In [10]:
bed_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob("final_consensus_peaks/*_pseudobulk_bed_files"))
}
bed_path_dict

{'BIO_ddseq_m1c1.FULL': 'final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c2.FULL': 'final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c3.FULL': 'final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c4.FULL': 'final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c5.FULL': 'final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c6.FULL': 'final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c7.FULL': 'final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m1c8.FULL': 'final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m2c1.FULL': 'final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m2c2.FULL': 'final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_pseudobulk_bed_files',
 'BIO_ddseq_m2c3.FUL

In [11]:
for sample in bed_path_dict.keys():
    bed_paths = {
        x.split("/")[-1].split("__")[0].split(".bed.gz")[0]: x
        for x in glob.glob(bed_path_dict[sample] + "/*")
    }

In [12]:
from pycisTopic.pseudobulk_peak_calling import *

In [13]:
import ray

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [14]:
cto_path = cto_consensus_path_dict[sample]
with open(cto_path, "rb") as f:
    cto = pickle.load(f)

In [15]:
cto.cell_data["pycisTopic_leiden_10_0.4"].unique()

array(['2', '3', '4', '0', '10', '9', '8', '7', '6', '12', '1', '11',
       '13', '5', '15', '14', '16'], dtype=object)

In [16]:
narrow_peaks_dict = {}
ray.shutdown()
for sample in bed_path_dict.keys():
    narrow_peaks_dict_path = bed_path_dict[sample].replace(
        "_pseudobulk_bed_files", "_narrow_peaks_dict.pkl"
    )
    peak_path = os.path.join(
        "final_consensus_peaks", f"{sample}__SCREEN_consensus_peaks"
    )
    if not os.path.exists(peak_path):
        os.mkdir(peak_path)

    if not os.path.exists(narrow_peaks_dict_path):
        cto_path = cto_consensus_path_dict[sample]
        with open(cto_path, "rb") as f:
            cto = pickle.load(f)

        cto_celltypes = set(cto.cell_data["pycisTopic_leiden_10_0.4"].unique())
        bed_celltypes = set(
            [
                x.split(".")[0].replace("+", "").replace("_", "")
                for x in os.listdir(bed_path_dict[sample])
            ]
        )

        if cto_celltypes == bed_celltypes:
            print(f"Starting {narrow_peaks_dict_path}")
            bed_paths = {
                x.split("/")[-1].split("__")[0].split(".bed.gz")[0]: x
                for x in glob.glob(bed_path_dict[sample] + "/*")
            }
            narrow_peaks_dict = peak_calling(
                macs_path="macs2",
                bed_paths=bed_paths,
                outdir=peak_path,
                genome_size="mm",
                n_cpu=20,
                input_format="BEDPE",
                shift=73,
                ext_size=146,
                keep_dup="all",
                q_value=0.05,
            )
            with open(narrow_peaks_dict_path, "wb") as f:
                pickle.dump(narrow_peaks_dict, f)
        else:
            print(f"{sample} cell types not matching!! Rerun bed file writing.")
            print(f"\t{bed_celltypes}")
            print(f"\t{cto_celltypes}")
    else:
        print(f"{narrow_peaks_dict_path} already exists")

Starting final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_narrow_peaks_dict.pkl


2022-11-04 14:59:17,267	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2856476)[0m 2022-11-04 14:59:22,856 cisTopic     INFO     Calling peaks for 7 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_pseudobulk_bed_files/7.bed.gz --name 7  --outdir final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2856473)[0m 2022-11-04 14:59:22,822 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_pseudobulk_bed_files/4.bed.gz --name 4  --outdir final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2856481)[0m 2022-11-04 14:59:22,855 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment final_

2022-11-04 15:01:38,759	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2857513)[0m 2022-11-04 15:01:44,079 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_pseudobulk_bed_files/1.bed.gz --name 1  --outdir final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2857520)[0m 2022-11-04 15:01:44,049 cisTopic     INFO     Calling peaks for 9 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_pseudobulk_bed_files/9.bed.gz --name 9  --outdir final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2857519)[0m 2022-11-04 15:01:44,098 cisTopic     INFO     Calling peaks for 2 with macs2 callpeak --treatment final_

2022-11-04 15:04:08,937	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2858622)[0m 2022-11-04 15:04:14,228 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2858618)[0m 2022-11-04 15:04:14,327 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_pseudobulk_bed_files/5.bed.gz --name 5  --outdir final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2858612)[0m 2022-11-04 15:04:14,328 cisTopic     INFO     Calling peaks for 10 with macs2 callpeak --treatment final

2022-11-04 15:06:11,461	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2859602)[0m 2022-11-04 15:06:16,531 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_pseudobulk_bed_files/4.bed.gz --name 4  --outdir final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2859614)[0m 2022-11-04 15:06:16,548 cisTopic     INFO     Calling peaks for 2 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_pseudobulk_bed_files/2.bed.gz --name 2  --outdir final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2859615)[0m 2022-11-04 15:06:16,566 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment final_

2022-11-04 15:09:00,183	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2860692)[0m 2022-11-04 15:09:06,268 cisTopic     INFO     Calling peaks for 8 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_pseudobulk_bed_files/8.bed.gz --name 8  --outdir final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2860686)[0m 2022-11-04 15:09:06,241 cisTopic     INFO     Calling peaks for 14 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_pseudobulk_bed_files/14.bed.gz --name 14  --outdir final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2860684)[0m 2022-11-04 15:09:06,319 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment fin

2022-11-04 15:11:50,754	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2861806)[0m 2022-11-04 15:11:55,942 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2861809)[0m 2022-11-04 15:11:55,950 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_pseudobulk_bed_files/4.bed.gz --name 4  --outdir final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2861804)[0m 2022-11-04 15:11:55,952 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_

2022-11-04 15:14:12,868	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2862847)[0m 2022-11-04 15:14:17,976 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_pseudobulk_bed_files/4.bed.gz --name 4  --outdir final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2862841)[0m 2022-11-04 15:14:17,992 cisTopic     INFO     Calling peaks for 9 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_pseudobulk_bed_files/9.bed.gz --name 9  --outdir final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2862849)[0m 2022-11-04 15:14:18,012 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_

2022-11-04 15:16:19,699	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2863837)[0m 2022-11-04 15:16:25,248 cisTopic     INFO     Calling peaks for 11 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_pseudobulk_bed_files/11.bed.gz --name 11  --outdir final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2863842)[0m 2022-11-04 15:16:25,298 cisTopic     INFO     Calling peaks for 6 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_pseudobulk_bed_files/6.bed.gz --name 6  --outdir final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2863839)[0m 2022-11-04 15:16:25,313 cisTopic     INFO     Calling peaks for 9 with macs2 callpeak --treatment fin

2022-11-04 15:18:22,079	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2864830)[0m 2022-11-04 15:18:27,340 cisTopic     INFO     Calling peaks for 10 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_pseudobulk_bed_files/10.bed.gz --name 10  --outdir final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2864844)[0m 2022-11-04 15:18:27,334 cisTopic     INFO     Calling peaks for 9 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_pseudobulk_bed_files/9.bed.gz --name 9  --outdir final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2864839)[0m 2022-11-04 15:18:27,321 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment fin

2022-11-04 15:20:31,499	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2865899)[0m 2022-11-04 15:20:37,175 cisTopic     INFO     Calling peaks for 8 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_pseudobulk_bed_files/8.bed.gz --name 8  --outdir final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2865903)[0m 2022-11-04 15:20:37,204 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2865904)[0m 2022-11-04 15:20:37,220 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment final_

2022-11-04 15:22:12,291	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2866914)[0m 2022-11-04 15:22:17,764 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c3.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/BIO_ddseq_m2c3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2866903)[0m 2022-11-04 15:22:17,796 cisTopic     INFO     Calling peaks for 8 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c3.FULL__SCREEN_pseudobulk_bed_files/8.bed.gz --name 8  --outdir final_consensus_peaks/BIO_ddseq_m2c3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2866916)[0m 2022-11-04 15:22:17,793 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_

2022-11-04 15:24:12,480	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2867937)[0m 2022-11-04 15:24:17,473 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c4.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/BIO_ddseq_m2c4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2867948)[0m 2022-11-04 15:24:17,470 cisTopic     INFO     Calling peaks for 6 with macs2 callpeak --treatment final_consensus_peaks/BIO_ddseq_m2c4.FULL__SCREEN_pseudobulk_bed_files/6.bed.gz --name 6  --outdir final_consensus_peaks/BIO_ddseq_m2c4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2867936)[0m 2022-11-04 15:24:17,495 cisTopic     INFO     Calling peaks for 8 with macs2 callpeak --treatment final_

2022-11-04 15:25:49,950	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2868900)[0m 2022-11-04 15:25:54,760 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/OHS_s3atac_mouse.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/OHS_s3atac_mouse.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2868916)[0m 2022-11-04 15:25:54,749 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_consensus_peaks/OHS_s3atac_mouse.FULL__SCREEN_pseudobulk_bed_files/5.bed.gz --name 5  --outdir final_consensus_peaks/OHS_s3atac_mouse.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2868913)[0m 2022-11-04 15:25:54,752 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatmen

2022-11-04 15:27:30,473	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2869810)[0m 2022-11-04 15:27:35,616 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xmultiome_e18mousebrainfresh.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/TXG_10xmultiome_e18mousebrainfresh.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2869815)[0m 2022-11-04 15:27:35,593 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xmultiome_e18mousebrainfresh.FULL__SCREEN_pseudobulk_bed_files/5.bed.gz --name 5  --outdir final_consensus_peaks/TXG_10xmultiome_e18mousebrainfresh.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2869819)[0m 2022-11-04 15:27:35,602 

2022-11-04 15:29:31,145	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2870820)[0m 2022-11-04 15:29:36,925 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv11_adultmousecortexchromiumx.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/TXG_10xv11_adultmousecortexchromiumx.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2870829)[0m 2022-11-04 15:29:36,980 cisTopic     INFO     Calling peaks for 6 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv11_adultmousecortexchromiumx.FULL__SCREEN_pseudobulk_bed_files/6.bed.gz --name 6  --outdir final_consensus_peaks/TXG_10xv11_adultmousecortexchromiumx.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2870834)[0m 2022-11-04 15:29

2022-11-04 15:32:48,318	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2872004)[0m 2022-11-04 15:32:54,463 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv1_adultmousefresh.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/TXG_10xv1_adultmousefresh.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2872014)[0m 2022-11-04 15:32:54,826 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv1_adultmousefresh.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/TXG_10xv1_adultmousefresh.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2872012)[0m 2022-11-04 15:32:54,766 cisTopic     INFO     Calling peaks 

2022-11-04 15:35:12,719	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2873061)[0m 2022-11-04 15:35:18,529 cisTopic     INFO     Calling peaks for 3 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv2_adultmousecortex.FULL__SCREEN_pseudobulk_bed_files/3.bed.gz --name 3  --outdir final_consensus_peaks/TXG_10xv2_adultmousecortex.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2873044)[0m 2022-11-04 15:35:18,521 cisTopic     INFO     Calling peaks for 11 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv2_adultmousecortex.FULL__SCREEN_pseudobulk_bed_files/11.bed.gz --name 11  --outdir final_consensus_peaks/TXG_10xv2_adultmousecortex.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2873042)[0m 2022-11-04 15:35:18,506 cisTopic     INFO     Calling

2022-11-04 15:38:22,110	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2874155)[0m 2022-11-04 15:38:27,694 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv2_adultmousecortexchromiumx.FULL__SCREEN_pseudobulk_bed_files/0.bed.gz --name 0  --outdir final_consensus_peaks/TXG_10xv2_adultmousecortexchromiumx.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2874154)[0m 2022-11-04 15:38:27,770 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_consensus_peaks/TXG_10xv2_adultmousecortexchromiumx.FULL__SCREEN_pseudobulk_bed_files/5.bed.gz --name 5  --outdir final_consensus_peaks/TXG_10xv2_adultmousecortexchromiumx.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2874147)[0m 2022-11-04 15:38:27,

2022-11-04 15:42:09,752	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2875370)[0m 2022-11-04 15:42:16,769 cisTopic     INFO     Calling peaks for 37 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_1.FULL__SCREEN_pseudobulk_bed_files/37.bed.gz --name 37  --outdir final_consensus_peaks/VIB_hydrop_1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2875372)[0m 2022-11-04 15:42:16,875 cisTopic     INFO     Calling peaks for 28 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_1.FULL__SCREEN_pseudobulk_bed_files/28.bed.gz --name 28  --outdir final_consensus_peaks/VIB_hydrop_1.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2875357)[0m 2022-11-04 15:42:16,952 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_co

2022-11-04 15:43:52,840	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2876475)[0m 2022-11-04 15:43:59,283 cisTopic     INFO     Calling peaks for 15 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_2.FULL__SCREEN_pseudobulk_bed_files/15.bed.gz --name 15  --outdir final_consensus_peaks/VIB_hydrop_2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2876478)[0m 2022-11-04 15:43:59,535 cisTopic     INFO     Calling peaks for 7 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_2.FULL__SCREEN_pseudobulk_bed_files/7.bed.gz --name 7  --outdir final_consensus_peaks/VIB_hydrop_2.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2876481)[0m 2022-11-04 15:43:59,573 cisTopic     INFO     Calling peaks for 9 with macs2 callpeak --treatment final_conse

2022-11-04 15:45:06,057	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2877478)[0m 2022-11-04 15:45:13,011 cisTopic     INFO     Calling peaks for 21 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_3.FULL__SCREEN_pseudobulk_bed_files/21.bed.gz --name 21  --outdir final_consensus_peaks/VIB_hydrop_3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2877475)[0m 2022-11-04 15:45:13,094 cisTopic     INFO     Calling peaks for 18 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_3.FULL__SCREEN_pseudobulk_bed_files/18.bed.gz --name 18  --outdir final_consensus_peaks/VIB_hydrop_3.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2877473)[0m 2022-11-04 15:45:13,150 cisTopic     INFO     Calling peaks for 4 with macs2 callpeak --treatment final_co

2022-11-04 15:46:37,120	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2878528)[0m 2022-11-04 15:46:44,111 cisTopic     INFO     Calling peaks for 26 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_4.FULL__SCREEN_pseudobulk_bed_files/26.bed.gz --name 26  --outdir final_consensus_peaks/VIB_hydrop_4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2878518)[0m 2022-11-04 15:46:44,146 cisTopic     INFO     Calling peaks for 1 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_4.FULL__SCREEN_pseudobulk_bed_files/1.bed.gz --name 1  --outdir final_consensus_peaks/VIB_hydrop_4.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2878524)[0m 2022-11-04 15:46:44,237 cisTopic     INFO     Calling peaks for 0 with macs2 callpeak --treatment final_conse

2022-11-04 15:47:54,671	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(macs_call_peak_ray pid=2879545)[0m 2022-11-04 15:48:01,076 cisTopic     INFO     Calling peaks for 16 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_5.FULL__SCREEN_pseudobulk_bed_files/16.bed.gz --name 16  --outdir final_consensus_peaks/VIB_hydrop_5.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2879557)[0m 2022-11-04 15:48:01,147 cisTopic     INFO     Calling peaks for 5 with macs2 callpeak --treatment final_consensus_peaks/VIB_hydrop_5.FULL__SCREEN_pseudobulk_bed_files/5.bed.gz --name 5  --outdir final_consensus_peaks/VIB_hydrop_5.FULL__SCREEN_consensus_peaks --format BEDPE --gsize mm --qvalue 0.05 --nomodel --shift 73 --extsize 146 --keep-dup all --call-summits --nolambda
[2m[36m(macs_call_peak_ray pid=2879554)[0m 2022-11-04 15:48:01,192 cisTopic     INFO     Calling peaks for 10 with macs2 callpeak --treatment final_cons

# call consensus peaks

In [17]:
from pycisTopic.iterative_peak_calling import get_consensus_peaks

In [18]:
narrow_peaks_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob("final_consensus_peaks/*_narrow_peaks_dict.pkl"))
}
narrow_peaks_path_dict

{'BIO_ddseq_m1c1.FULL': 'final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c2.FULL': 'final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c3.FULL': 'final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c4.FULL': 'final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c5.FULL': 'final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c6.FULL': 'final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c7.FULL': 'final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m1c8.FULL': 'final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m2c1.FULL': 'final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddseq_m2c2.FULL': 'final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_narrow_peaks_dict.pkl',
 'BIO_ddse

In [19]:
path_to_blacklist = "../0_resources/regions/hg38-blacklist.v2.bed"

In [20]:
peak_half_width = 250

# Get consensus peaks
consensus_peaks_dict = {}
for sample in narrow_peaks_path_dict.keys():
    print(sample)
    consensus_out_path = narrow_peaks_path_dict[sample].replace(
        "_narrow_peaks_dict.pkl", "_consensus_peaks.bed"
    )
    if not os.path.exists(consensus_out_path):
        cto_path = cto_consensus_path_dict[sample]
        with open(cto_path, "rb") as f:
            cto = pickle.load(f)

        cto_celltypes = set(cto.cell_data["pycisTopic_leiden_10_0.4"].unique())

        with open(narrow_peaks_path_dict[sample], "rb") as f:
            narrow_peaks_dict = pickle.load(f)
        peaks_celltypes = set(
            [
                x.replace(" ", "").replace("+", "").replace("_", "")
                for x in set(narrow_peaks_dict.keys())
            ]
        )

        if cto_celltypes == peaks_celltypes:
            consensus_peaks = get_consensus_peaks(
                narrow_peaks_dict,
                peak_half_width,
                chromsizes=chromsizes,
                path_to_blacklist=path_to_blacklist,
            )

            consensus_peaks.to_bed(
                path=consensus_out_path, keep=True, compression="infer", chain=False
            )
        else:
            print("CELL TYPE SETS NOT MATCHING! Rerun peak calling.")
            print(peaks_celltypes - cto_celltypes)
            print(cto_celltypes - peaks_celltypes)
    else:
        print(f"{consensus_out_path} already exists, skipping...")

BIO_ddseq_m1c1.FULL
2022-11-04 15:49:00,066 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 15:50:23,829 cisTopic     INFO     Normalizing peak scores
2022-11-04 15:50:24,234 cisTopic     INFO     Merging peaks
2022-11-04 15:51:39,804 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c2.FULL
2022-11-04 15:51:42,132 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 15:53:14,562 cisTopic     INFO     Normalizing peak scores
2022-11-04 15:53:14,945 cisTopic     INFO     Merging peaks
2022-11-04 15:54:32,105 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c3.FULL
2022-11-04 15:54:34,444 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 15:56:14,059 cisTopic     INFO     Normalizing peak scores
2022-11-04 15:56:14,499 cisTopic     INFO     Merging peaks
2022-11-04 15:57:36,596 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c4.FULL
2022-11-04 15:57:38,904 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 15:59:08,127 cisTopic     INFO     Normalizing peak scores
2022-11-04 15:59:08,509 cisTopic     INFO     Merging peaks
2022-11-04 16:00:24,706 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c5.FULL
2022-11-04 16:00:27,080 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:02:27,791 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:02:28,340 cisTopic     INFO     Merging peaks
2022-11-04 16:04:00,788 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c6.FULL
2022-11-04 16:04:03,359 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:05:34,990 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:05:35,413 cisTopic     INFO     Merging peaks
2022-11-04 16:06:55,459 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c7.FULL
2022-11-04 16:06:57,738 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:08:16,856 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:08:17,274 cisTopic     INFO     Merging peaks
2022-11-04 16:09:27,517 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m1c8.FULL
2022-11-04 16:09:30,076 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:11:05,394 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:11:05,863 cisTopic     INFO     Merging peaks
2022-11-04 16:12:25,080 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m2c1.FULL
2022-11-04 16:12:27,573 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:13:41,273 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:13:41,593 cisTopic     INFO     Merging peaks
2022-11-04 16:14:45,137 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m2c2.FULL
2022-11-04 16:14:47,285 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:16:01,417 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:16:01,789 cisTopic     INFO     Merging peaks
2022-11-04 16:17:08,209 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m2c3.FULL
2022-11-04 16:17:10,508 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:18:23,524 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:18:23,880 cisTopic     INFO     Merging peaks
2022-11-04 16:19:31,372 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

BIO_ddseq_m2c4.FULL
2022-11-04 16:19:33,443 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:20:37,398 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:20:37,754 cisTopic     INFO     Merging peaks
2022-11-04 16:21:35,365 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

OHS_s3atac_mouse.FULL
2022-11-04 16:21:37,361 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:22:24,272 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:22:24,603 cisTopic     INFO     Merging peaks
2022-11-04 16:23:21,723 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

TXG_10xmultiome_e18mousebrainfresh.FULL
2022-11-04 16:23:24,243 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:24:17,692 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:24:18,020 cisTopic     INFO     Merging peaks
2022-11-04 16:25:07,840 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

TXG_10xv11_adultmousecortexchromiumx.FULL
2022-11-04 16:25:10,024 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:26:47,769 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:26:48,343 cisTopic     INFO     Merging peaks
2022-11-04 16:28:12,431 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

TXG_10xv1_adultmousefresh.FULL
2022-11-04 16:28:15,165 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:29:48,888 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:29:49,529 cisTopic     INFO     Merging peaks
2022-11-04 16:31:16,887 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

TXG_10xv2_adultmousecortex.FULL
2022-11-04 16:31:20,067 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:33:07,127 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:33:07,765 cisTopic     INFO     Merging peaks
2022-11-04 16:34:37,319 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

TXG_10xv2_adultmousecortexchromiumx.FULL
2022-11-04 16:34:40,211 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:36:23,461 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:36:24,080 cisTopic     INFO     Merging peaks
2022-11-04 16:37:50,411 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

VIB_hydrop_1.FULL
2022-11-04 16:37:53,106 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:39:18,384 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:39:18,966 cisTopic     INFO     Merging peaks
2022-11-04 16:41:05,037 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

VIB_hydrop_2.FULL
2022-11-04 16:41:08,130 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:41:50,910 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:41:51,184 cisTopic     INFO     Merging peaks
2022-11-04 16:42:39,568 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

VIB_hydrop_3.FULL
2022-11-04 16:42:41,818 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:43:49,918 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:43:50,360 cisTopic     INFO     Merging peaks
2022-11-04 16:45:08,487 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

VIB_hydrop_4.FULL
2022-11-04 16:45:11,187 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:46:12,444 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:46:12,827 cisTopic     INFO     Merging peaks
2022-11-04 16:47:21,042 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

VIB_hydrop_5.FULL
2022-11-04 16:47:23,453 cisTopic     INFO     Extending and merging peaks per class
2022-11-04 16:48:10,465 cisTopic     INFO     Normalizing peak scores
2022-11-04 16:48:10,769 cisTopic     INFO     Merging peaks
2022-11-04 16:49:03,603 cisTopic     INFO     Done!


  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.concat([outdf, df.get(noncanonical)], axis=1)
  return pd.co

# Check % chrM in consensus peaks

In [21]:
consensus_peaks_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob("final_consensus_peaks/*consensus_peaks.bed"))
}
consensus_peaks_path_dict

{'BIO_ddseq_m1c1.FULL': 'final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c2.FULL': 'final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c3.FULL': 'final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c4.FULL': 'final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c5.FULL': 'final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c6.FULL': 'final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c7.FULL': 'final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c8.FULL': 'final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m2c1.FULL': 'final_consensus_peaks/BIO_ddseq_m2c1.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m2c2.FULL': 'final_consensus_peaks/BIO_ddseq_m2c2.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m2c3.FULL': 'final

In [22]:
for sample, path in consensus_peaks_path_dict.items():
    print(sample)
    peaks_df = pd.read_csv(path, sep='\t', header=None)
    
    chroms_in_df = list(sorted(peaks_df[0].unique()))
    chroms_standard = ["chr" + str(x + 1) for x in range(22)] + ['chrX']
    chroms_nonstandard = list(set(chroms_in_df ) - set(chroms_standard) -  set(['chrM']))
    
    n_standard = peaks_df[0].value_counts()[chroms_standard].sum()
    n_contigs = peaks_df[0].value_counts()[chroms_nonstandard].sum()
    n_chrm = peaks_df[0].value_counts()['chrM'].sum()
    pct_nonstandard = (n_contigs + n_chrm)/len(peaks_df)*100
        
    print(f"\tpeaks on standard chromosomes: {n_standard}")
    print(f"\tpeaks on contigs: {n_contigs}")
    print(f"\tpeaks on chrM: {n_chrm}")
    print(f"\t% peaks non standard chromosomes: {pct_nonstandard}%")
    if 'chrY' in chroms_in_df:
        n_chrY = peaks_df[0].value_counts()['chrY'].sum()
        print(f"\tpeaks on chrY: {n_chrY}")
    else:
        print(f"\tpeaks on chrY: 0")

BIO_ddseq_m1c1.FULL


KeyError: "['chr20', 'chr21', 'chr22'] not in index"