<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [19]</a>'.</span>

In [1]:
import pycisTopic
import glob
import os
import pybiomart as pbm
import pandas as pd
import pickle
from pycisTopic.qc import *
from IPython.display import Image, display
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import multiprocess as mp  # for kde multithreading calculation
from multiprocess import Pool

%matplotlib inline
%load_ext lab_black

# Download annotation

In [2]:
!pwd

/lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus


In [3]:
wdir = "/lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus"
os.chdir(wdir)

In [4]:
genome = "mm10"

pbm_genome_name_dict = {
    "hg38": "hsapiens_gene_ensembl",
    "hg37": "hsapiens_gene_ensembl",
    "mm10": "mmusculus_gene_ensembl",
    "dm6": "dmelanogaster_gene_ensembl",
}

pbm_host_dict = {
    "hg38": "http://www.ensembl.org",
    "hg37": "http://grch37.ensembl.org/",
    "mm10": "http://nov2020.archive.ensembl.org/",
    "dm6": "http://www.ensembl.org",
}

if os.path.exists(f"annotation.tsv"):
    print(f"Loading cached genome annotation...")
    annotation = pd.read_csv("annotation.tsv", sep="\t", header=0, index_col=0)
else:
    dataset = pbm.Dataset(name=pbm_genome_name_dict[genome], host=pbm_host_dict[genome])

    annotation = dataset.query(
        attributes=[
            "chromosome_name",
            "transcription_start_site",
            "strand",
            "external_gene_name",
            "transcript_biotype",
        ]
    )
    filter = annotation["Chromosome/scaffold name"].str.contains("CHR|GL|JH|MT")
    annotation = annotation[~filter]
    annotation["Chromosome/scaffold name"] = annotation[
        "Chromosome/scaffold name"
    ].str.replace(r"(\b\S)", r"chr\1")
    annotation.columns = ["Chromosome", "Start", "Strand", "Gene", "Transcript_type"]
    annotation = annotation[annotation.Transcript_type == "protein_coding"]
    annotation.to_csv("annotation.tsv", sep="\t")

Loading cached genome annotation...


In [5]:
fragments_list = sorted(
    glob.glob("../1_data_repository/publicdata_full_fragments_vsn/*.tsv.gz")
)
fragments_dict = {}
for fragments_file in fragments_list:
    sample = fragments_file.split("/")[-1].split("_fragments.tsv.gz")[0]
    fragments_dict[sample] = fragments_file
fragments_dict

{'BIO_ddseq_m1c1.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c1.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c2.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c2.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c3.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c3.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c4.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c4.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c5.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c5.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c6.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c6.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c7.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_full_fragments_vsn/BIO_ddseq_m1c7.FULL.fragments.tsv.gz',
 'BIO_ddseq_m1c8.FULL.fragments.tsv.gz': '../1_data_repository/publicdata_fu

The regions we will count in. Here, I count everything in the human ENCODE SCREEN regions. You can also add a sample-specific region set, e.g. sample-specific peaks called on the sample's bams.

Now, make a sub dictionary of all samples within the fragments dict that have not been run yet (good for resuming a stopped cistopic run):

In [6]:
cistopic_qc_out = os.path.join(wdir, "cistopic_qc_out")
if not os.path.exists(cistopic_qc_out):
    os.makedirs(cistopic_qc_out)

In [7]:
region = "../0_resources/regions/V2.mm10-rDHS-Unfiltered.blacklisted.bed"

In [8]:
fragments_sub_dict = {}
regions_sub_dict = {}
for sample in fragments_dict:
    metadata_file = os.path.join(cistopic_qc_out, sample + "__metadata_bc.pkl")
    print(f"Checking if {metadata_file} exist...")
    if os.path.exists(metadata_file):
        print("\tMetadata exists! Skipping...")
    else:
        fragments_sub_dict[sample] = fragments_dict[sample]
        print("\tMetadata does not exist, adding to subdict to generate")

Checking if /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out/BIO_ddseq_m1c1.FULL.fragments.tsv.gz__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out/BIO_ddseq_m1c2.FULL.fragments.tsv.gz__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out/BIO_ddseq_m1c3.FULL.fragments.tsv.gz__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out/BIO_ddseq_m1c4.FULL.fragments.tsv.gz__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out/BIO_ddseq_m1c5.FULL.frag

In [9]:
regions_sub_dict = {x: region for x in sorted(fragments_sub_dict.keys())}

In [10]:
regions_paths_dict = {
    x.split("/")[-1].split(f"__")[0].replace(".FULL", ""): x
    for x in sorted(glob.glob("../public_3_cistopic_qc/final_consensus_peaks/*.bed"))
}
regions_paths_dict

{'BIO_ddseq_m1c1': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c2': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c3': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c4': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c5': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c6': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c7': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c8': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c8.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m2c1': '../public_3_cistopic_qc/final_consensus_peaks/BIO_dd

In [11]:
regions_sub_dict = {x: regions_paths_dict[x.split(".")[0]] for x in fragments_dict}
regions_sub_dict

{'BIO_ddseq_m1c1.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c1.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c2.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c2.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c3.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c3.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c4.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c4.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c5.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c5.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c6.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c6.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c7.FULL.fragments.tsv.gz': '../public_3_cistopic_qc/final_consensus_peaks/BIO_ddseq_m1c7.FULL__SCREEN_consensus_peaks.bed',
 'BIO_ddseq_m1c8.FULL.fragments.ts

In [12]:
ray.shutdown()

In [13]:
n_cores = 5
if fragments_sub_dict != {}:
    samples_sub = list(fragments_sub_dict.keys())
    blocks = [samples_sub[i : i + n_cores] for i in range(0, len(samples_sub), n_cores)]
    for samples_torun_in_block in blocks:
        fragments_sub_dict_block = {
            key: fragments_sub_dict[key] for key in samples_torun_in_block
        }
        regions_sub_dict_block = {
            key: regions_sub_dict[key] for key in samples_torun_in_block
        }

        metadata_bc_dict, profile_data_dict = compute_qc_stats(
            fragments_dict=fragments_sub_dict_block,
            tss_annotation=annotation,
            stats=[
                "barcode_rank_plot",
                "duplicate_rate",
                "insert_size_distribution",
                "profile_tss",
                "frip",
            ],
            label_list=None,
            path_to_regions=regions_sub_dict_block,
            n_cpu=n_cores,
            valid_bc=None,
            n_frag=10,
            n_bc=None,
            tss_flank_window=2000,
            tss_window=50,
            tss_minimum_signal_window=100,
            tss_rolling_window=10,
            # min_norm=0.2,
            remove_duplicates=True,
        )

        ray.shutdown()
        print(f"Dumping files in {cistopic_qc_out}...")
        for sample in sorted(metadata_bc_dict.keys()):
            metadata_bc_dict[sample]["sample_id"] = sample
            metadata_bc_dict[sample].index = [
                x + "___" + sample for x in list(metadata_bc_dict[sample].index)
            ]
            with open(
                os.path.join(cistopic_qc_out, f"{sample}__metadata_bc.pkl"), "wb"
            ) as f:
                pickle.dump(metadata_bc_dict[sample], f, protocol=4)

            with open(
                os.path.join(cistopic_qc_out, f"{sample}__profile_data.pkl"), "wb"
            ) as f:
                pickle.dump(profile_data_dict[sample], f, protocol=4)
else:
    print("All samples already processed.")

2022-11-16 19:03:38,194	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:03:41,557 cisTopic     INFO     Reading BIO_ddseq_m1c1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:03:41,562 cisTopic     INFO     Reading BIO_ddseq_m1c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:03:41,583 cisTopic     INFO     Reading BIO_ddseq_m1c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:03:41,620 cisTopic     INFO     Reading BIO_ddseq_m1c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:03:41,622 cisTopic     INFO     Reading BIO_ddseq_m1c5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:08,429 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:08,429 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:25,202 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:25,466 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:25,519 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:30,282 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:37,076 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:37,077 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:06:48,239 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:06:48,239 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:51,679 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:51,873 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:51,874 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:52,845 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:53,130 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:53,157 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:06:53,602 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:06:58,511 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:04,937 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:05,241 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:05,269 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:10,699 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:14,558 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:14,558 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:07:20,301 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:07:20,477 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:07:20,477 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:07:22,486 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:07:25,501 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:33,144 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:33,320 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:33,320 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:07:33,786 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:07:33,834 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:34,827 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:35,145 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:35,175 cisTopic     INFO     Returning valid barcodes
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:07:35,203 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:07:41,308 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:01,140 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c5.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:01,141 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:08:03,942 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:08:08,139 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:08:08,322 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:08:08,322 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:08:10,381 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:08:12,580 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:08:14,551 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:08:14,599 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:08:25,021 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:08:25,085 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:25,136 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:25,488 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:25,558 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:08:33,451 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:08:50,423 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:09:01,493 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:09:03,668 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:09:03,723 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:09:03,852 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:09:04,044 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c5.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:09:04,044 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:09:06,720 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:09:47,532 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:10:01,827 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:10:03,140 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:10:15,853 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:10:15,903 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:10:27,904 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:10:58,990 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:11:15,603 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:11:20,977 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:11:21,083 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:11:21,873 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:11:28,892 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:11:32,561 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=31333)[0m 2022-11-16 19:12:03,835 cisTopic     INFO     Sample BIO_ddseq_m1c1.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:12:11,292 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:12:11,445 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:12:12,769 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:12:22,055 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:12:23,504 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:12:23,628 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:12:24,394 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:12:26,613 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:12:38,723 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode
[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:12:38,777 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=31332)[0m 2022-11-16 19:13:03,728 cisTopic     INFO     Sample BIO_ddseq_m1c3.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=31329)[0m 2022-11-16 19:13:16,253 cisTopic     INFO     Sample BIO_ddseq_m1c4.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:13:37,513 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:13:37,648 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:13:38,531 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:13:52,209 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:13:58,816 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=31330)[0m 2022-11-16 19:14:35,046 cisTopic     INFO     Sample BIO_ddseq_m1c2.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:14:58,818 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:14:58,930 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:14:59,805 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:15:18,536 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=31331)[0m 2022-11-16 19:16:09,027 cisTopic     INFO     Sample BIO_ddseq_m1c5.FULL.fragments.tsv.gz done!


Dumping files in /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out...


2022-11-16 19:22:04,222	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:22:07,616 cisTopic     INFO     Reading BIO_ddseq_m2c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:22:07,618 cisTopic     INFO     Reading BIO_ddseq_m1c8.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:22:07,656 cisTopic     INFO     Reading BIO_ddseq_m1c6.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:22:07,725 cisTopic     INFO     Reading BIO_ddseq_m1c7.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:22:07,725 cisTopic     INFO     Reading BIO_ddseq_m2c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:23:49,677 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m2c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:23:49,677 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:23:58,975 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:23:59,146 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:23:59,158 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:02,970 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m2c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:12,577 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m2c1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:12,577 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:15,841 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:15,995 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m2c2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:15,996 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:17,383 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:23,854 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:24,068 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:24,093 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:28,096 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m2c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:31,239 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c7.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:31,240 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:24:41,963 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c8.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:24:41,963 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:42,834 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m2c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:43,371 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:43,527 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m2c1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:43,527 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:24:44,941 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:45,897 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:46,138 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:46,167 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:49,139 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:24:49,189 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:24:50,770 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c7.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:24:56,127 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:24:56,368 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:24:56,415 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:25:02,854 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c8.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:03,265 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m1c6.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:03,265 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:09,405 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:09,594 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c7.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:09,595 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:11,554 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:25:12,508 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m2c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:20,159 cisTopic     INFO     Marking barcodes with more than 10


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:20,455 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:20,487 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:25:21,195 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:25:21,249 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:25,795 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m1c6.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:25:27,368 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:25:27,661 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c8.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:25:27,661 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:25:29,806 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:48,347 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c7.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:51,203 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:51,384 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m1c6.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:51,384 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:25:53,345 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:58,514 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:25:58,568 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:26:10,421 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:26:12,835 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c8.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:26:23,848 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:26:23,895 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:26:34,959 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m1c6.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:26:47,912 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:26:47,966 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:26:50,194 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:27:12,150 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:27:20,216 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:27:49,733 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:27:49,971 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m2c2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:27:50,830 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:27:54,917 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:28:00,006 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:28:12,044 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=26136)[0m 2022-11-16 19:28:23,503 cisTopic     INFO     Sample BIO_ddseq_m2c2.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:28:24,028 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:28:28,536 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:28:54,174 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:29:03,686 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:29:03,817 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m2c1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:29:04,741 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:29:11,789 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:29:11,930 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c7.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:29:12,750 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:29:15,296 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:29:23,494 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:29:33,583 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:29:33,681 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c8.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:29:34,832 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26138)[0m 2022-11-16 19:29:42,627 cisTopic     INFO     Sample BIO_ddseq_m2c1.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:29:48,449 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:29:50,999 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=26135)[0m 2022-11-16 19:29:53,467 cisTopic     INFO     Sample BIO_ddseq_m1c7.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=26137)[0m 2022-11-16 19:30:27,407 cisTopic     INFO     Sample BIO_ddseq_m1c8.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:30:47,225 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:30:47,392 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m1c6.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:30:48,368 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:31:01,984 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=26139)[0m 2022-11-16 19:31:42,754 cisTopic     INFO     Sample BIO_ddseq_m1c6.FULL.fragments.tsv.gz done!


Dumping files in /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out...


2022-11-16 19:36:23,238	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:36:26,623 cisTopic     INFO     Reading BIO_ddseq_m2c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:36:26,708 cisTopic     INFO     Reading BIO_ddseq_m2c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:36:26,749 cisTopic     INFO     Reading TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:36:26,733 cisTopic     INFO     Reading OHS_s3atac_mouse.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:36:26,723 cisTopic     INFO     Reading TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:06,962 cisTopic     INFO     Computing barcode rank plot for OHS_s3atac_mouse.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:06,963 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:08,275 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:08,276 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:08,276 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:10,582 cisTopic     INFO     Computing duplicate rate plot for OHS_s3atac_mouse.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:12,530 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:12,654 cisTopic     INFO     Computing insert size distribution for OHS_s3atac_mouse.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:12,654 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:13,588 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:30,917 cisTopic     INFO     Computing TSS profile for OHS_s3atac_mouse.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:32,733 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:32,775 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:40,511 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m2c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:40,511 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:46,970 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:47,106 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:47,118 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:49,987 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m2c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:51,420 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:51,997 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:52,329 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:52,415 cisTopic     INFO     Computing FRIP profile for OHS_s3atac_mouse.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:53,654 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:37:57,394 cisTopic     INFO     Computing barcode rank plot for BIO_ddseq_m2c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:37:57,395 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:37:57,863 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:59,419 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:59,560 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m2c4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:37:59,561 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:38:00,597 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:05,198 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:05,346 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:05,361 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=10529)[0m 2022-11-16 19:38:07,122 cisTopic     INFO     Sample OHS_s3atac_mouse.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:08,822 cisTopic     INFO     Computing duplicate rate plot for BIO_ddseq_m2c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:11,268 cisTopic     INFO     Computing barcode rank plot for TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:11,268 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:16,815 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:16,864 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:16,882 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:19,336 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:19,550 cisTopic     INFO     Computing insert size distribution for BIO_ddseq_m2c3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:19,550 cisTopic     INFO     Counting fragments
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:38:19,539 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m2c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:20,699 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:22,746 cisTopic     INFO     Computing duplicate rate plot for TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:38:23,873 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:38:23,916 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:30,912 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:31,069 cisTopic     INFO     Computing insert size distribution for TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:31,069 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:38:33,137 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:42,103 cisTopic     INFO     Computing TSS profile for BIO_ddseq_m2c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:47,458 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:38:47,505 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:39:16,101 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:39:17,140 cisTopic     INFO     Computing TSS profile for TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:39:23,962 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:39:24,010 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:39:49,510 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:40:05,771 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:40:38,018 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:40:38,129 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m2c4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:40:38,785 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:40:43,996 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:40:44,665 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=10527)[0m 2022-11-16 19:41:02,712 cisTopic     INFO     Sample BIO_ddseq_m2c4.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:41:08,302 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:14,321 cisTopic     INFO     Computing barcode rank plot for TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:14,321 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:41:19,311 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:41:19,407 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_m2c3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:41:20,177 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:24,969 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:25,027 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:25,059 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:41:26,898 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:39,016 cisTopic     INFO     Computing duplicate rate plot for TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10528)[0m 2022-11-16 19:41:46,435 cisTopic     INFO     Sample BIO_ddseq_m2c3.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:55,597 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:55,727 cisTopic     INFO     Computing insert size distribution for TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:41:55,727 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:42:00,631 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:42:03,628 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:42:36,882 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:42:36,981 cisTopic     INFO     Computing FRIP profile for TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:42:37,535 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:42:47,210 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=10526)[0m 2022-11-16 19:43:12,650 cisTopic     INFO     Sample TXG_10xmultiome_e18mousebrainfresh.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:43:45,822 cisTopic     INFO     Computing TSS profile for TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:44:03,618 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:44:03,668 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:46:30,691 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:48:29,051 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:49:40,433 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:49:40,562 cisTopic     INFO     Computing FRIP profile for TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:49:41,648 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:50:08,157 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=10525)[0m 2022-11-16 19:51:15,694 cisTopic     INFO     Sample TXG_10xv11_adultmousecortexchromiumx.FULL.fragments.tsv.gz done!


Dumping files in /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out...


2022-11-16 19:54:58,513	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:55:01,925 cisTopic     INFO     Reading VIB_hydrop_1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 19:55:01,913 cisTopic     INFO     Reading TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 19:55:01,913 cisTopic     INFO     Reading TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:55:01,953 cisTopic     INFO     Reading VIB_hydrop_2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:55:01,972 cisTopic     INFO     Reading TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:56:50,189 cisTopic     INFO     Computing barcode rank plot for TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:56:50,190 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:56:54,967 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:56:55,008 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:56:55,022 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:00,541 cisTopic     INFO     Computing duplicate rate plot for TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:07,493 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:07,634 cisTopic     INFO     Computing insert size distribution for TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:07,634 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:09,713 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:52,801 cisTopic     INFO     Computing TSS profile for TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:59,237 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:57:59,287 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:24,725 cisTopic     INFO     Computing barcode rank plot for VIB_hydrop_2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:24,725 cisTopic     INFO     Counting fragments
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:24,654 cisTopic     INFO     Computing barcode rank plot for VIB_hydrop_1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:24,654 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:31,720 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:31,804 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:31,969 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:32,316 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:32,396 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:32,557 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:36,423 cisTopic     INFO     Computing duplicate rate plot for VIB_hydrop_2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:37,419 cisTopic     INFO     Computing duplicate rate plot for VIB_hydrop_1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:49,212 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:49,216 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:49,387 cisTopic     INFO     Computing insert size distribution for VIB_hydrop_2.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:49,387 cisTopic     INFO     Counting fragments
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:49,384 cisTopic     INFO     Computing insert size distribution for VIB_hydrop_1.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:49,385 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:58:50,665 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:58:50,888 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:59:18,643 cisTopic     INFO     Computing TSS profile for VIB_hydrop_1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:59:25,876 cisTopic     INFO     Computing TSS profile for VIB_hydrop_2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 19:59:26,408 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:59:35,871 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 19:59:35,942 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:59:44,094 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 19:59:44,175 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 19:59:57,502 cisTopic     INFO     Computing barcode rank plot for TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 19:59:57,503 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:09,182 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:09,242 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:09,325 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:14,893 cisTopic     INFO     Computing barcode rank plot for TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:14,893 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:21,902 cisTopic     INFO     Computing duplicate rate plot for TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18855)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:27,637 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:27,694 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:27,733 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:00:27,898 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=18856)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:38,354 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:38,492 cisTopic     INFO     Computing insert size distribution for TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:38,493 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:41,228 cisTopic     INFO     Computing duplicate rate plot for TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:00:43,501 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:58,443 cisTopic     INFO     Return plot data
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:58,602 cisTopic     INFO     Computing insert size distribution for TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:00:58,602 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:01:03,570 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:01:06,721 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:01:06,860 cisTopic     INFO     Computing FRIP profile for TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:01:07,827 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:01:10,542 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:01:13,888 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:01:18,690 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=18853)[0m 2022-11-16 20:01:59,256 cisTopic     INFO     Sample TXG_10xv1_adultmousefresh.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:02:19,668 cisTopic     INFO     Computing TSS profile for TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:02:40,863 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:02:40,921 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:02:42,777 cisTopic     INFO     Computing TSS profile for TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:03:04,227 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:03:04,286 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:05:19,908 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:05:41,148 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:07:12,451 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:07:28,007 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:08:22,493 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:08:22,610 cisTopic     INFO     Computing FRIP profile for TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:08:24,065 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:08:34,105 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:08:34,222 cisTopic     INFO     Computing FRIP profile for TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:08:35,167 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:08:49,644 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:09:00,002 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:09:36,925 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:09:55,081 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=18854)[0m 2022-11-16 20:10:08,100 cisTopic     INFO     Sample TXG_10xv2_adultmousecortex.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=18852)[0m 2022-11-16 20:10:17,051 cisTopic     INFO     Sample TXG_10xv2_adultmousecortexchromiumx.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:14:25,118 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:14:25,363 cisTopic     INFO     Computing FRIP profile for VIB_hydrop_1.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:14:26,712 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:14:35,753 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:14:48,582 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:14:48,812 cisTopic     INFO     Computing FRIP profile for VIB_hydrop_2.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:14:49,650 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=18855)[0m 2022-11-16 20:14:52,532 cisTopic     INFO     Sample VIB_hydrop_1.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:14:58,053 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=18856)[0m 2022-11-16 20:15:12,022 cisTopic     INFO     Sample VIB_hydrop_2.FULL.fragments.tsv.gz done!


Dumping files in /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out...


2022-11-16 20:24:21,136 cisTopic     INFO     n_cpu is larger than the number of samples. Setting n_cpu to the number of samples


2022-11-16 20:24:24,548	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:24:27,766 cisTopic     INFO     Reading VIB_hydrop_5.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:24:27,778 cisTopic     INFO     Reading VIB_hydrop_3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:24:27,894 cisTopic     INFO     Reading VIB_hydrop_4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:46,834 cisTopic     INFO     Computing barcode rank plot for VIB_hydrop_4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:46,835 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:48,359 cisTopic     INFO     Computing barcode rank plot for VIB_hydrop_5.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:48,359 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:54,137 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:54,214 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:54,364 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:55,140 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:55,221 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:55,401 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:27:59,125 cisTopic     INFO     Computing duplicate rate plot for VIB_hydrop_4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:27:59,663 cisTopic     INFO     Computing duplicate rate plot for VIB_hydrop_5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:10,292 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:10,465 cisTopic     INFO     Computing insert size distribution for VIB_hydrop_4.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:10,466 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:11,736 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:28:14,236 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:28:14,406 cisTopic     INFO     Computing insert size distribution for VIB_hydrop_5.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:28:14,406 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:28:15,629 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:29,203 cisTopic     INFO     Computing barcode rank plot for VIB_hydrop_3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:29,203 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:38,962 cisTopic     INFO     Marking barcodes with more than 10
[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:39,045 cisTopic     INFO     Returning plot data
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:39,090 cisTopic     INFO     Computing TSS profile for VIB_hydrop_4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:39,391 cisTopic     INFO     Returning valid barcodes


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:45,098 cisTopic     INFO     Computing duplicate rate plot for VIB_hydrop_3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:28:47,002 cisTopic     INFO     Computing TSS profile for VIB_hydrop_5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:53,755 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:28:53,824 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:59,063 cisTopic     INFO     Return plot data


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:59,238 cisTopic     INFO     Computing insert size distribution for VIB_hydrop_3.FULL.fragments.tsv.gz
[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:28:59,238 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:29:00,587 cisTopic     INFO     Returning plot data


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:29:04,411 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:29:04,486 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:29:38,335 cisTopic     INFO     Computing TSS profile for VIB_hydrop_3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35307)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=35308)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:29:59,339 cisTopic     INFO     Formatting annnotation
[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:29:59,420 cisTopic     INFO     Creating coverage matrix


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:30:35,930 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:30:40,717 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=35309)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:32:03,540 cisTopic     INFO     Coverage matrix done


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:39:04,847 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:40:03,262 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:41:22,047 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:44:15,833 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:44:16,149 cisTopic     INFO     Computing FRIP profile for VIB_hydrop_4.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:44:17,239 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:44:28,190 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=35307)[0m 2022-11-16 20:44:42,429 cisTopic     INFO     Sample VIB_hydrop_4.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:45:32,343 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:45:32,598 cisTopic     INFO     Computing FRIP profile for VIB_hydrop_5.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:45:33,422 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:45:42,673 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=35308)[0m 2022-11-16 20:45:58,340 cisTopic     INFO     Sample VIB_hydrop_5.FULL.fragments.tsv.gz done!


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:47:16,059 cisTopic     INFO     Returning normalized sample TSS enrichment data


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:47:16,320 cisTopic     INFO     Computing FRIP profile for VIB_hydrop_3.FULL.fragments.tsv.gz


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:47:17,418 cisTopic     INFO     Counting fragments


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:47:33,138 cisTopic     INFO     Intersecting fragments with regions


[2m[36m(compute_qc_stats_ray pid=35309)[0m 2022-11-16 20:47:52,454 cisTopic     INFO     Sample VIB_hydrop_3.FULL.fragments.tsv.gz done!


Dumping files in /lustre1/project/stg_00090/scatac_benchmark/public_4_cistopic_consensus/cistopic_qc_out...


# Plot

Calculating a KDE is simultaneously expensive and scales poorly with increasing n. Therefore, I wrote a multithreaded script that divides the QC array into equal parts (interleaved to avoid biases in the order!) and performs a KDE calculation on each part. Here, Otsu thresholding is used to find the right threshold for minimum fragments and minimum TSS enrichment. ddseq samples have a significantly higher noise floor than the other samples when it comes to fragment distribution. Therefore, the otsu algorithm is performed on all barcodes with a minimum of 300 fragments for the ddseq samples, and a minimum of 100 fragments for all the other samples. I tried to perform this filtering completely independent of sample/technique (e.g. using gaussian mixture modeling, Jenks natural breaks, or multiple step Otsu thresholding) but found that no solution worked perfectly for all samples.

This is regulated by the code below in qc_plots.py:
```
min_otsu_frags_dict = {}
for fragments_file in fragments_list:
    sample = fragments_file.split("/")[-1].split(".")[0]
    tech = sample.split('_')[1]
    if tech == "ddseq":
        if sample == "BIO_ddseq_1":
            min_otsu_frags_dict[sample] = 600
        else:
            min_otsu_frags_dict[sample] = 300
    elif tech == "hydrop":
        min_otsu_frags_dict[sample] = 300
    else:
        min_otsu_frags_dict[sample] = 100
```

In [14]:
!cat ../0_resources/scripts/qc_plots_public.py

import kde
import pycisTopic
import glob
import os
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import multiprocess as mp
from multiprocess import Pool
import pprint as pp

def histogram(array, nbins=100):
    """
    Draw histogram from distribution and identify centers.
    Parameters
    ---------
    array: `class::np.array`
            Scores distribution
    nbins: int
            Number of bins to use in the histogram
    Return
    ---------
    float
            Histogram values and bin centers.
    """
    array = array.ravel().flatten()
    hist, bin_edges = np.histogram(array, bins=nbins, range=None)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0
    return hist, bin_centers


def threshold_otsu(array, nbins=100, min_value=100):
    """
    Apply Otsu threshold on topic-region distributions [Otsu, 1979].
    Parameters
    ---------
    array: `class::np.array`
   

Since multiprocessing does not work with jupyter notebooks, run the following code in terminal:

```
mkdir plots_qc
mkdir selected_barcodes
SIF=../0_resources/cistopic_image/20220722_pycistopic.sif
singularity exec \
    --cleanenv \
    -H $PWD:/home \
    -B /lustre1,/staging,/data,${VSC_SCRATCH},${VSC_SCRATCH}/tmp:/tmp,${HOME}/.nextflow/assets/ \
    $SIF \
    python /lustre1/project/stg_00090/scatac_benchmark/0_resources/scripts/qc_plots_public.py
```

And then open the plots:

In [15]:
metadata_bc_pkl_list = sorted(glob.glob("cistopic_qc_out/*metadata_bc.pkl"))
metadata_bc_pkl_path_dict = {}
for metadata_bc_pkl_path in metadata_bc_pkl_list:
    sample = metadata_bc_pkl_path.split("/")[-1].split("__")[0]
    metadata_bc_pkl_path_dict[sample] = metadata_bc_pkl_path

for sample in metadata_bc_pkl_path_dict.keys():
    if os.path.exists(f"selected_barcodes/{sample}_bc_passing_filters_otsu.pkl"):
        print(f"{sample} bc passing filters exists, printing img and skipping")
        display(Image(f"plots_qc/{sample}_qc_otsu.png"))
    else:
        print(
            f"{sample} bc passing filters does not exist yet, generate using qc_plots.py script!"
        )

BIO_ddseq_m1c1.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c2.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c3.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c4.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c5.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c6.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c7.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m1c8.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
BIO_ddseq_m2c1.FULL.fragments.tsv.gz bc passing filters does not exist yet, generate using qc_plots.py script!
B

The biorad plots look a bit weird. I want to check significant overlap in barcodes between the biorad samples to check if something may have gone wrong when I merged the fastqs per sample.

In [16]:
import pandas as pd
import glob

In [17]:
df_merged = pd.DataFrame()
for file in glob.glob("selected_barcodes/BIO*.tsv"):
    df = pd.read_csv(file, sep="\t", index_col=0)
    df["raw_bc"] = [x.split("___")[0] for x in df.index]
    df["sample"] = [x.split("___")[1].replace("fragments.tsv.gz", "") for x in df.index]
    df_merged = pd.concat([df_merged, df])

In [18]:
df_merged

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [19]:
test = df_merged.groupby("sample")["raw_bc"].value_counts()

KeyError: 'sample'

In [None]:
test_df = test.unstack().T

In [None]:
test_df.sum(axis=1).sort_values(ascending=False)

In [None]:
test_df

In [None]:
for file in glob.glob("selected_barcodes/BIO*.tsv"):
    print(file)

In [None]:
from upsetplot import generate_counts

In [None]:
import pandas as pd
from upsetplot import plot
set_names = ['set1', 'set2', 'set3']
all_elems = set1.union(set2).union(set3)
df = pd.DataFrame([[e in set1, e in set2, e in set3] for e in all_elems], columns = set_names)
df_up = df.groupby(set_names).size()
plot(df_up, orientation='horizontal')

In [None]:
all_elems = list(set().union(*sets))
df = pd.DataFrame([[e in st for st in sets] for e in all_elems], columns = set_names)

In [None]:
import numpy as np

In [None]:
df_merged = pd.DataFrame()
bc_set_dict = {}
for file in sorted(glob.glob("selected_barcodes/BIO*.tsv")):
    sample = file.split("/")[-1].split(".")[0]
    df = pd.read_csv(file, sep="\t", index_col=0)
    df["raw_bc"] = [x.split("___")[0] for x in df.index]
    bc_set = set(np.concatenate([x.split("_") for x in df["raw_bc"]]).ravel())

    bc_set_dict[sample] = bc_set

In [None]:
print(sorted(bc_set_dict.keys()))
set_list = [bc_set_dict[x] for x in sorted(bc_set_dict.keys())]

In [None]:
all_elems = list(set().union(*set_list))
df = pd.DataFrame([[e in st for st in set_list] for e in all_elems], columns = sorted(bc_set_dict.keys()))

In [None]:
df

In [None]:
from upsetplot import plot
df_up = df.groupby(list(bc_set_dict.keys())).size()
plot(df_up, orientation='horizontal')

Ok, so nothing suspicious.

# override s3-atac

In [None]:
import pickle
import pandas as pd

with open("cistopic_qc_out/OHS_s3atac_mouse.FULL__metadata_bc.pkl", "rb") as f:
    df = pickle.load(f)

In [None]:
with open(
    "selected_barcodes/OHS_s3atac_mouse.FULL_bc_passing_filters_otsu.txt", "w"
) as f:
    for x in df.index:
        f.write(x + "\n")

In [None]:
with open(
    "selected_barcodes/OHS_s3atac_mouse.FULL_bc_passing_filters_otsu.pkl", "wb"
) as f:
    pickle.dump(df.index, f)