In [1]:
import pycisTopic
import glob
import os

# os.environ["TMPDIR"] = "dirty_tmp"
import pybiomart as pbm
import pandas as pd
import pickle
from pycisTopic.qc import *
from IPython.display import Image, display
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import multiprocess as mp  # for kde multithreading calculation
from multiprocess import Pool
from ray.dashboard import *

%matplotlib inline
%load_ext lab_black

In [2]:
import collections as cl
import gc
import logging
import sys
from typing import Dict, List, Optional, Tuple, Union

import matplotlib.backends.backend_pdf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyranges as pr
import ray
import seaborn as sns
from scipy.stats import gaussian_kde, norm

from pycisTopic.cistopic_class import *
from pycisTopic.utils import (
    collapse_duplicates,
    multiplot_from_generator,
    read_fragments_from_file,
)

In [3]:
import importlib

# Download annotation

In [4]:
!pwd

/dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged


In [5]:
wdir = "/dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged"
os.chdir(wdir)

In [6]:
genome = "hg38"

pbm_genome_name_dict = {
    "hg38": "hsapiens_gene_ensembl",
    "hg37": "hsapiens_gene_ensembl",
    "mm10": "mmusculus_gene_ensembl",
    "dm6": "dmelanogaster_gene_ensembl",
}

pbm_host_dict = {
    "hg38": "http://www.ensembl.org",
    "hg37": "http://grch37.ensembl.org/",
    "mm10": "http://nov2020.archive.ensembl.org/",
    "dm6": "http://www.ensembl.org",
}

if os.path.exists(f"annotation.tsv"):
    print(f"Loading cached genome annotation...")
    annotation = pd.read_csv("annotation.tsv", sep="\t", header=0, index_col=0)
else:
    dataset = pbm.Dataset(name=pbm_genome_name_dict[genome], host=pbm_host_dict[genome])

    annotation = dataset.query(
        attributes=[
            "chromosome_name",
            "transcription_start_site",
            "strand",
            "external_gene_name",
            "transcript_biotype",
        ]
    )
    filter = annotation["Chromosome/scaffold name"].str.contains("CHR|GL|JH|MT")
    annotation = annotation[~filter]
    annotation["Chromosome/scaffold name"] = annotation[
        "Chromosome/scaffold name"
    ].str.replace(r"(\b\S)", r"chr\1")
    annotation.columns = ["Chromosome", "Start", "Strand", "Gene", "Transcript_type"]
    annotation = annotation[annotation.Transcript_type == "protein_coding"]
    annotation.to_csv("annotation.tsv", sep="\t")

Loading cached genome annotation...


In [7]:
annotation

Unnamed: 0,Chromosome,Start,Strand,Gene,Transcript_type
37,chrKI270442chr.chr1,380608,1,5S_rRNA,rRNA
38,chrKI270711chr.chr1,24650,-1,,protein_coding
39,chrKI270711chr.chr1,29626,-1,,protein_coding
84,chrKI270442chr.chr1,217401,-1,5_8S_rRNA,rRNA
516,chrKI270744chr.chr1,51114,-1,U6,snRNA
...,...,...,...,...,...
45278,chr11,33076149,1,LINC00294,lncRNA
45279,chr11,55262155,1,TRIM48,protein_coding
45280,chr12,262836,-1,SLC6A13,protein_coding
45281,chr12,224431,-1,SLC6A13,processed_transcript


In [8]:
fragments_list = sorted(glob.glob("../1_data_repository/full_fragments/*.tsv.gz"))
fragments_dict = {}
for fragments_file in fragments_list:
    sample = fragments_file.split("/")[-1].split(".fragments.tsv.gz")[0]
    fragments_dict[sample] = fragments_file
fragments_dict

{'BIO_ddseq_1.FULL': '../1_data_repository/full_fragments/BIO_ddseq_1.FULL.fragments.tsv.gz',
 'BIO_ddseq_2.FULL': '../1_data_repository/full_fragments/BIO_ddseq_2.FULL.fragments.tsv.gz',
 'BIO_ddseq_3.FULL': '../1_data_repository/full_fragments/BIO_ddseq_3.FULL.fragments.tsv.gz',
 'BIO_ddseq_4.FULL': '../1_data_repository/full_fragments/BIO_ddseq_4.FULL.fragments.tsv.gz',
 'BRO_mtscatac_1.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_1.FULL.fragments.tsv.gz',
 'BRO_mtscatac_2.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_2.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_1.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_1.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_2.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_1.FULL': '../1_data_repository/full_fragments/CNA_10xv11_1.FULL.fragments.tsv.gz',
 'CNA_10xv11_2.FULL': '../1_data_repository/full_fragments/CNA_10xv11_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_3.FULL'

In [9]:
fragments_dict.pop("VIB_hydrop_11.FULL")
fragments_dict.pop("VIB_hydrop_12.FULL")
fragments_dict.pop("VIB_hydrop_21.FULL")
fragments_dict.pop("VIB_hydrop_22.FULL")

'../1_data_repository/full_fragments/VIB_hydrop_22.FULL.fragments.tsv.gz'

In [10]:
master_set_path = "../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed"

In [11]:
regions_paths_dict = {x: master_set_path for x in fragments_dict.keys()}
regions_paths_dict

{'BIO_ddseq_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_3.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_4.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BRO_mtscatac_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BRO_mtscatac_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xmultiome_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xmultiome_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xv11_1.FULL': '../full

In [12]:
len(regions_paths_dict)

47

Now, make a sub dictionary of all samples within the fragments dict that have not been run yet (good for resuming a stopped cistopic run):

In [13]:
cistopic_qc_out = os.path.join(wdir, "cistopic_qc_out_MASTER")
if not os.path.exists(cistopic_qc_out):
    os.makedirs(cistopic_qc_out)

In [14]:
fragments_sub_dict = {}
regions_sub_dict = {}
for sample in regions_paths_dict.keys():
    metadata_file = os.path.join(cistopic_qc_out, sample + "__metadata_bc.pkl")
    print(f"Checking if {metadata_file} exist...")
    if os.path.exists(metadata_file):
        print("\tMetadata exists! Skipping...")
    else:
        fragments_sub_dict[sample] = fragments_dict[sample]
        print("\tMetadata does not exist, adding to subdict to generate")

Checking if /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged/cistopic_qc_out_MASTER/BIO_ddseq_1.FULL__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged/cistopic_qc_out_MASTER/BIO_ddseq_2.FULL__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged/cistopic_qc_out_MASTER/BIO_ddseq_3.FULL__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged/cistopic_qc_out_MASTER/BIO_ddseq_4.FULL__metadata_bc.pkl exist...
	Metadata does not exist, adding to subdict to generate
Checking if /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_4_merged/cistopic_qc_out_MASTER/BRO_mtsca

In [15]:
regions_sub_dict = {x: regions_paths_dict[x] for x in sorted(fragments_sub_dict.keys())}
regions_sub_dict

{'BIO_ddseq_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_3.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BIO_ddseq_4.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BRO_mtscatac_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'BRO_mtscatac_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xmultiome_1.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xmultiome_2.FULL': '../full_3_cistopic_consensus/master_peaks/all.FIXEDCELLS.master_peaks.occurrence_filtered7.bed',
 'CNA_10xv11_1.FULL': '../full

In [16]:
def compute_qc_stats_single(
    fragments,
    tss_annotation: Union[pd.DataFrame, pr.PyRanges],
    stats: Optional[List[str]] = [
        "barcode_rank_plot",
        "duplicate_rate",
        "insert_size_distribution",
        "profile_tss",
        "frip",
    ],
    label: Optional[str] = None,
    path_to_regions: Optional[str] = None,
    valid_bc: Optional[List[str]] = None,
    n_frag: Optional[int] = None,
    n_bc: Optional[int] = None,
    tss_flank_window: Optional[int] = 1000,
    tss_window: Optional[int] = 50,
    tss_minimum_signal_window: Optional[int] = 100,
    tss_rolling_window: Optional[int] = 10,
    min_norm: Optional[int] = 0.2,
    partition: Optional[int] = 1,
    check_for_duplicates: Optional[bool] = True,
    remove_duplicates: Optional[bool] = True,
    use_polars: Optional[bool] = True,
):
    """
    Wrapper function to compute QC statistics on several samples. For detailed instructions, please see the independent functions.
    Parameters
    ---
    fragments: str
            Path to fragments file.
    tss_annotation: pd.DataFrame or pr.PyRanges
            A data frame or pyRanges containing transcription start sites for each gene, with 'Chromosome', 'Start' and 'Strand' as columns (additional columns will be ignored).
    stats: list, optional
            A list with the statistics that have to be computed. Default: All ('barcode_rank_plot', 'duplicate_rate', 'insert_size_distribution', 'profile_tss', 'FRIP).
    label: str
            Sample label. Default: None.
    path_to_regions: str
            Path to regions file to use for FRIP.
    valid_bc: list, optional
            A list containing selected barcodes. This parameter is ignored if n_frag or n_bc are specified. Default: None.
    n_frag: int, optional
            Minimal number of fragments assigned to a barcode to be kept. Either n_frag or n_bc can be specified. Default: None.
    n_bc: int, optional
            Number of barcodes to select. Either n_frag or n_bc can be specified. Default: None.
    tss_window: int, optional
            Window around the TSS used to count fragments in the TSS when calculating the TSS enrichment per barcode. Default: 50 (+/- 50 bp).
    tss_flank_window: int, optional
            Flanking window around the TSS. Default: 1000 (+/- 1000 bp).
    tss_minimum_signal_window: int, optional
            Tail window use to normalize the TSS enrichment. Default: 100 (average signal in the 100bp in the extremes of the TSS window).
    tss_rolling_window: int, optional
            Rolling window used to smooth signal. Default: 10.
    min_norm: int, optional
            Minimum normalization score. If the average minimum signal value is below this value, this number is used to normalize the TSS signal. This approach penalizes cells with fewer reads.
    check_for_duplicates: bool, optional
            If no duplicate counts are provided per row in the fragments file, whether to collapse duplicates. Default: True.
    remove_duplicates: bool, optional
            Whether to remove duplicates. Default: True.
    use_polars: bool, optional
            Whether to use polars to read fragments files. Default: True.
    Return
    ---
    pd.DataFrame or list and list
            A list with the barcode statistics for all samples (or a combined data frame with a column 'Sample' indicating the sample of origin) and a list of dictionaries with the sample-level profiles for each sample.
    """

    # Create logger
    level = logging.INFO
    log_format = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
    handlers = [logging.StreamHandler(stream=sys.stdout)]
    logging.basicConfig(level=level, format=log_format, handlers=handlers)
    log = logging.getLogger("cisTopic")
    # Compute stats
    metrics = {}
    metadata_bc_dict = {}
    profile_data_dict = {}
    # Prepare fragments
    if isinstance(fragments, str):
        log.info("Reading " + label)
        fragments_df = read_fragments_from_file(fragments, use_polars=use_polars).df
    else:
        fragments_df = fragments
    # Convert to category for memory efficiency
    fragments_df["Name"] = fragments_df["Name"].astype("category")
    # Check for duplicates
    if "Score" not in fragments_df or all(fragments_df["Score"] == "."):
        fragments_df = fragments_df[["Chromosome", "Start", "End", "Name"]]
        if check_for_duplicates:
            log.info("Collapsing duplicates")
            fragments_df = pd.concat(
                [
                    collapse_duplicates(fragments_df[fragments_df.Chromosome == x])
                    for x in fragments_df.Chromosome.cat.categories.values
                ]
            )
        else:
            fragments_df["Score"] = 1
    else:
        fragments_df = fragments_df[["Chromosome", "Start", "End", "Name", "Score"]]
    fragments_df["Score"] = fragments_df["Score"].astype("int32")
    # Prepare valid barcodes
    if valid_bc is not None:
        if n_bc is not None or n_frag is not None:
            valid_bc = None
    # Rank plot
    if "barcode_rank_plot" in stats:
        # Rank plot
        log.info("Computing barcode rank plot for " + label)
        metrics["barcode_rank_plot"] = barcode_rank_plot(
            fragments=fragments_df,
            valid_bc=valid_bc,
            n_frag=n_frag,
            n_bc=n_bc,
            remove_duplicates=remove_duplicates,
            plot=False,
            return_bc=True,
            return_plot_data=True,
        )
        if valid_bc is None:
            fragments_df = fragments_df[
                fragments_df.Name.isin(set(metrics["barcode_rank_plot"]["valid_bc"]))
            ]

    gc.collect()
    # Duplicate rate
    if "duplicate_rate" in stats:
        # Duplicate rate
        log.info("Computing duplicate rate plot for " + label)
        metrics["duplicate_rate"] = duplicate_rate(
            fragments=fragments_df, valid_bc=valid_bc, plot=False, return_plot_data=True
        )

    gc.collect()
    # Fragment size
    if "insert_size_distribution" in stats:
        # Fragment size
        log.info("Computing insert size distribution for " + label)
        metrics["insert_size_distribution"] = insert_size_distribution(
            fragments=fragments_df,
            valid_bc=valid_bc,
            remove_duplicates=remove_duplicates,
            plot=False,
            return_plot_data=True,
        )
    fragments_df = pr.PyRanges(fragments_df)
    gc.collect()
    # TSS
    if "profile_tss" in stats:
        # TSS
        log.info("Computing TSS profile for " + label)
        profile_tss_metrics = profile_tss(
            fragments=fragments_df,
            annotation=tss_annotation,
            valid_bc=valid_bc,
            plot=False,
            n_cpu=1,
            partition=partition,
            flank_window=tss_flank_window,
            tss_window=tss_window,
            minimum_signal_window=tss_minimum_signal_window,
            rolling_window=tss_rolling_window,
            min_norm=min_norm,
            return_TSS_enrichment_per_barcode=True,
            return_TSS_coverage_matrix_per_barcode=True,
            return_plot_data=True,
        )
        if profile_tss_metrics is not None:
            metrics["profile_tss"] = profile_tss_metrics
    gc.collect()
    # FRIP
    if "frip" in stats:
        # FRIP
        log.info("Computing FRIP profile for " + label)
        metrics["frip"] = frip(
            fragments=fragments_df,
            path_to_regions=path_to_regions,
            valid_bc=valid_bc,
            remove_duplicates=remove_duplicates,
            n_cpu=1,
            plot=False,
            return_plot_data=True,
        )
    del fragments_df
    gc.collect()
    metadata_bc, profile_data = metrics2data(metrics)

    if isinstance(metadata_bc, pd.DataFrame):
        metadata_bc = metadata_bc.fillna(0)

    metadata_bc_dict = {label: metadata_bc}
    profile_data_dict = {label: profile_data}
    log.info("Sample " + label + " done!")

    return metadata_bc_dict, profile_data_dict

In [17]:
n_cores = 10
if regions_sub_dict != {}:
    samples_sub = list(regions_sub_dict.keys())
    blocks = [samples_sub[i : i + n_cores] for i in range(0, len(samples_sub), n_cores)]
    for samples_torun_in_block in blocks:
        fragments_sub_dict_block = {
            key: fragments_sub_dict[key] for key in samples_torun_in_block
        }
        regions_sub_dict_block = {
            key: regions_sub_dict[key] for key in samples_torun_in_block
        }

        metadata_bc_dict, profile_data_dict = compute_qc_stats(
            fragments_dict=fragments_sub_dict_block,
            tss_annotation=annotation,
            stats=[
                "barcode_rank_plot",
                "duplicate_rate",
                "insert_size_distribution",
                "profile_tss",
                "frip",
            ],
            label_list=None,
            path_to_regions=regions_sub_dict_block,
            n_cpu=n_cores,
            valid_bc=None,
            n_frag=10,
            n_bc=None,
            tss_flank_window=2000,
            tss_window=50,
            tss_minimum_signal_window=100,
            tss_rolling_window=10,
            # min_norm=0.2,
            remove_duplicates=True,
        )

        ray.shutdown()
        print(f"Dumping files in {cistopic_qc_out}...")
        for sample in sorted(metadata_bc_dict.keys()):
            metadata_bc_dict[sample]["sample_id"] = sample
            metadata_bc_dict[sample].index = [
                x + "___" + sample for x in list(metadata_bc_dict[sample].index)
            ]
            with open(
                os.path.join(cistopic_qc_out, f"{sample}__metadata_bc.pkl"), "wb"
            ) as f:
                pickle.dump(metadata_bc_dict[sample], f, protocol=4)

            with open(
                os.path.join(cistopic_qc_out, f"{sample}__profile_data.pkl"), "wb"
            ) as f:
                pickle.dump(profile_data_dict[sample], f, protocol=4)
else:
    print("All samples already processed.")

2022-10-01 12:59:34,179	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=2087868)[0m 2022-10-01 13:00:01,264 cisTopic     INFO     Reading CNA_10xv11_2.FULL
[2m[36m(compute_qc_stats_ray pid=2087870)[0m 2022-10-01 13:00:01,253 cisTopic     INFO     Reading CNA_10xv11_1.FULL
[2m[36m(compute_qc_stats_ray pid=2087875)[0m 2022-10-01 13:00:01,271 cisTopic     INFO     Reading BIO_ddseq_2.FULL
[2m[36m(compute_qc_stats_ray pid=2087873)[0m 2022-10-01 13:00:01,256 cisTopic     INFO     Reading CNA_10xmultiome_2.FULL
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 13:00:01,264 cisTopic     INFO     Reading BIO_ddseq_1.FULL
[2m[36m(compute_qc_stats_ray pid=2087869)[0m 2022-10-01 13:00:01,267 cisTopic     INFO     Reading BIO_ddseq_3.FULL
[2m[36m(compute_qc_stats_ray pid=2087872)[0m 2022-10-01 13:00:01,268 cisTopic     INFO     Reading CNA_10xmultiome_1.FULL
[2m[36m(compute_qc_stats_ray pid=2087874)[0m 2022-10-01 13:00:01,247 cisTopic     INFO     Reading BRO_mtscatac_2.FULL
[2m[36m(compute_qc_stats_ray 

[2m[36m(compute_qc_stats_ray pid=2087867)[0m   TSS_matrix = cut_sites_TSS.groupby(


[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:09:28,955 cisTopic     INFO     Coverage matrix done
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:19:41,316 cisTopic     INFO     Returning normalized TSS coverage matrix per barcode
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:26:16,758 cisTopic     INFO     Returning normalized sample TSS enrichment data
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:26:16,950 cisTopic     INFO     Computing FRIP profile for BIO_ddseq_1.FULL
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:26:24,877 cisTopic     INFO     Counting fragments
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:26:46,386 cisTopic     INFO     Intersecting fragments with regions
[2m[36m(compute_qc_stats_ray pid=2087867)[0m 2022-10-01 14:28:02,975 cisTopic     INFO     Sample BIO_ddseq_1.FULL done!
Dumping files in /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full

2022-10-01 14:30:34,695	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=2091811)[0m 2022-10-01 14:31:03,000 cisTopic     INFO     Reading CNA_hydrop_3.FULL
[2m[36m(compute_qc_stats_ray pid=2091807)[0m 2022-10-01 14:31:02,976 cisTopic     INFO     Reading CNA_10xv11_5.FULL
[2m[36m(compute_qc_stats_ray pid=2091808)[0m 2022-10-01 14:31:02,997 cisTopic     INFO     Reading CNA_hydrop_2.FULL
[2m[36m(compute_qc_stats_ray pid=2091809)[0m 2022-10-01 14:31:02,984 cisTopic     INFO     Reading CNA_10xv11_4.FULL
[2m[36m(compute_qc_stats_ray pid=2091805)[0m 2022-10-01 14:31:02,995 cisTopic     INFO     Reading CNA_10xv11_3.FULL
[2m[36m(compute_qc_stats_ray pid=2091806)[0m 2022-10-01 14:31:02,998 cisTopic     INFO     Reading CNA_10xv2_2.FULL
[2m[36m(compute_qc_stats_ray pid=2091804)[0m 2022-10-01 14:31:02,989 cisTopic     INFO     Reading CNA_mtscatac_2.FULL
[2m[36m(compute_qc_stats_ray pid=2091812)[0m 2022-10-01 14:31:02,987 cisTopic     INFO     Reading CNA_10xv2_1.FULL
[2m[36m(compute_qc_stats_ray pid=20918

2022-10-01 15:17:34,615	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=2094460)[0m 2022-10-01 15:18:03,140 cisTopic     INFO     Reading MDC_mtscatac_2.FULL
[2m[36m(compute_qc_stats_ray pid=2094465)[0m 2022-10-01 15:18:03,132 cisTopic     INFO     Reading EPF_hydrop_4.FULL
[2m[36m(compute_qc_stats_ray pid=2094457)[0m 2022-10-01 15:18:03,132 cisTopic     INFO     Reading EPF_hydrop_3.FULL
[2m[36m(compute_qc_stats_ray pid=2094459)[0m 2022-10-01 15:18:03,162 cisTopic     INFO     Reading MDC_mtscatac_1.FULL
[2m[36m(compute_qc_stats_ray pid=2094463)[0m 2022-10-01 15:18:03,141 cisTopic     INFO     Reading OHS_s3atac_1.FULL
[2m[36m(compute_qc_stats_ray pid=2094462)[0m 2022-10-01 15:18:03,142 cisTopic     INFO     Reading HAR_ddseq_2.FULL
[2m[36m(compute_qc_stats_ray pid=2094466)[0m 2022-10-01 15:18:03,203 cisTopic     INFO     Reading OHS_s3atac_2.FULL
[2m[36m(compute_qc_stats_ray pid=2094464)[0m 2022-10-01 15:18:03,190 cisTopic     INFO     Reading EPF_hydrop_1.FULL
[2m[36m(compute_qc_stats_ray pid=20

2022-10-01 16:43:27,258	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=2097958)[0m 2022-10-01 16:43:54,817 cisTopic     INFO     Reading STA_10xv11_1.FULL
[2m[36m(compute_qc_stats_ray pid=2097954)[0m 2022-10-01 16:43:54,805 cisTopic     INFO     Reading UCS_ddseq_2.FULL
[2m[36m(compute_qc_stats_ray pid=2097955)[0m 2022-10-01 16:43:54,823 cisTopic     INFO     Reading SAN_10xmultiome_2.FULL
[2m[36m(compute_qc_stats_ray pid=2097961)[0m 2022-10-01 16:43:54,827 cisTopic     INFO     Reading STA_10xv11_2.FULL
[2m[36m(compute_qc_stats_ray pid=2097960)[0m 2022-10-01 16:43:54,800 cisTopic     INFO     Reading TXG_10xv2_2.FULL
[2m[36m(compute_qc_stats_ray pid=2097959)[0m 2022-10-01 16:43:54,825 cisTopic     INFO     Reading VIB_10xmultiome_1.FULL
[2m[36m(compute_qc_stats_ray pid=2097962)[0m 2022-10-01 16:43:54,809 cisTopic     INFO     Reading TXG_10xv2_1.FULL
[2m[36m(compute_qc_stats_ray pid=2097953)[0m 2022-10-01 16:43:54,820 cisTopic     INFO     Reading SAN_10xmultiome_1.FULL
[2m[36m(compute_qc_stats_r

2022-10-01 18:39:52,701	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m


[2m[36m(compute_qc_stats_ray pid=2102488)[0m 2022-10-01 18:40:13,123 cisTopic     INFO     Reading VIB_hydrop_2.FULL
[2m[36m(compute_qc_stats_ray pid=2102492)[0m 2022-10-01 18:40:13,119 cisTopic     INFO     Reading VIB_10xv1_1.FULL
[2m[36m(compute_qc_stats_ray pid=2102490)[0m 2022-10-01 18:40:13,116 cisTopic     INFO     Reading VIB_10xmultiome_2.FULL
[2m[36m(compute_qc_stats_ray pid=2102489)[0m 2022-10-01 18:40:13,125 cisTopic     INFO     Reading VIB_hydrop_1.FULL
[2m[36m(compute_qc_stats_ray pid=2102491)[0m 2022-10-01 18:40:13,189 cisTopic     INFO     Reading VIB_10xv2_1.FULL
[2m[36m(compute_qc_stats_ray pid=2102494)[0m 2022-10-01 18:40:13,185 cisTopic     INFO     Reading VIB_10xv2_2.FULL
[2m[36m(compute_qc_stats_ray pid=2102493)[0m 2022-10-01 18:40:13,179 cisTopic     INFO     Reading VIB_10xv1_2.FULL
[2m[36m(compute_qc_stats_ray pid=2102491)[0m 2022-10-01 18:47:25,182 cisTopic     INFO     Computing barcode rank plot for VIB_10xv2_1.FULL
[2m[36m(comput