# pycisTopic analysis

Full dataset, using consensus peak regions.

In [3]:
import pycisTopic
%load_ext nb_black
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import pickle
import pandas as pd
import os

<IPython.core.display.Javascript object>

In [2]:
!pwd

/dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/fixedcells_3_cistopic_consensus


<IPython.core.display.Javascript object>

In [3]:
wdir = '/dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/fixedcells_3_cistopic_consensus'
os.chdir( wdir )

<IPython.core.display.Javascript object>

In [4]:
# create output directory:
f_final_dir = os.path.join(wdir, 'downstream_analysis')
if not os.path.exists(f_final_dir):
    os.makedirs(f_final_dir)

<IPython.core.display.Javascript object>

In [5]:
import glob

<IPython.core.display.Javascript object>

In [6]:
import re

sample_annot = pd.DataFrame(
    [
        ['Broad_1','BioRad'],
        ['Broad_2','BioRad'],
        ['Stanford_1','10x v1.0'],
        ['Stanford_2','10x v1.0'],
        ['VIB_1','10x v1.0'],
        ['VIB_2','10x v1.0'],
        ['CNAG_1','10x v1.1'],
        ['CNAG_2','10x v1.1'],
        ['Broad_mito_1','mito-scATAC-seq'],
        ['Broad_mito_2','mito-scATAC-seq'],
        ['Sanger_1','10x multiome'],
        ['Sanger_2','10x multiome'],
        ['s3atac','s3-ATAC'],
        ['VIB_Hydrop_1','HyDrop'],
        ['VIB_Hydrop_2','HyDrop'],
        
    ],
    columns=['key','Technology']
)
sample_annot['Sample'] = sample_annot['key']
sample_annot.set_index('key', drop=False, inplace=True)
# rename samples:
sample_annot.loc['Broad_1','Sample'] = 'Harvard_1'
sample_annot.loc['Broad_2','Sample'] = 'Harvard_2'
sample_annot.loc['Broad_mito_1','Sample'] = 'Broad_1'
sample_annot.loc['Broad_mito_2','Sample'] = 'Broad_2'
sample_annot.loc['s3atac','Sample'] = 'OHSU'
sample_annot.loc['VIB_Hydrop_1','Sample'] = 'Hydrop_1'
sample_annot.loc['VIB_Hydrop_2','Sample'] = 'Hydrop_2'

sample_annot['Replicate'] = [ x.Sample.split('_')[-1] for i,x in sample_annot.iterrows() ]
sample_annot['st'] = [ re.sub('_[0-9]','',x.Sample)+' ('+x.Technology+')' for i,x in sample_annot.iterrows() ]

sample_annot.loc[sample_annot['key']=='s3atac','Replicate'] = 1
sample_map = { x: y.Sample for x,y in sample_annot.iterrows() }
sample_annot

Unnamed: 0_level_0,key,Technology,Sample,Replicate,st
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Broad_1,Broad_1,BioRad,Harvard_1,1,Harvard (BioRad)
Broad_2,Broad_2,BioRad,Harvard_2,2,Harvard (BioRad)
Stanford_1,Stanford_1,10x v1.0,Stanford_1,1,Stanford (10x v1.0)
Stanford_2,Stanford_2,10x v1.0,Stanford_2,2,Stanford (10x v1.0)
VIB_1,VIB_1,10x v1.0,VIB_1,1,VIB (10x v1.0)
VIB_2,VIB_2,10x v1.0,VIB_2,2,VIB (10x v1.0)
CNAG_1,CNAG_1,10x v1.1,CNAG_1,1,CNAG (10x v1.1)
CNAG_2,CNAG_2,10x v1.1,CNAG_2,2,CNAG (10x v1.1)
Broad_mito_1,Broad_mito_1,mito-scATAC-seq,Broad_1,1,Broad (mito-scATAC-seq)
Broad_mito_2,Broad_mito_2,mito-scATAC-seq,Broad_2,2,Broad (mito-scATAC-seq)


<IPython.core.display.Javascript object>

## Save/load cisTopic objects

In [7]:
cto_consensus_paths = sorted(glob.glob(f'cistopic_objects/*consensus.pkl'))
cistopic_obj_path_dict = {x.split('/')[-1].split(f'__')[0]:x for x in cto_consensus_paths}
cistopic_obj_path_dict

{'BIO_ddseq_1.FIXEDCELLS': 'cistopic_objects/BIO_ddseq_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl',
 'BIO_ddseq_2.FIXEDCELLS': 'cistopic_objects/BIO_ddseq_2.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_14topics.dimreduc.consensus.pkl',
 'BIO_ddseq_3.FIXEDCELLS': 'cistopic_objects/BIO_ddseq_3.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.consensus.pkl',
 'BIO_ddseq_4.FIXEDCELLS': 'cistopic_objects/BIO_ddseq_4.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl',
 'BRO_mtscatac_1.FIXEDCELLS': 'cistopic_objects/BRO_mtscatac_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl',
 'BRO_mtscatac_2.FIXEDCELLS': 'cistopic_objects/BRO_mtscatac_2.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_17topics.dimreduc.consensus.pkl',
 'CNA_10xmultiome_1.FIXEDCELLS': 'cistopic_objects/CNA_10xmultiome_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.consensus.pkl',
 'CNA_10xm

<IPython.core.display.Javascript object>

## Topic binarization & QC

In [8]:
from pycisTopic.topic_binarization import binarize_topics
from pycisTopic.topic_qc import compute_topic_metrics, plot_topic_qc
from pycisTopic.utils import fig2img
import matplotlib.pyplot as plt
from pycisTopic.topic_qc import topic_annotation
from pycisTopic.diff_features import (
    impute_accessibility,
    normalize_scores,
    find_highly_variable_features,
    find_diff_features
)
from pycisTopic.clust_vis import plot_imputed_features
import numpy as np
import copy

<IPython.core.display.Javascript object>

### Binarize the topic-region distributions

In [9]:
for sample in cistopic_obj_path_dict.keys():
# for sample  in ['SAN_10xmultiome_1.FIXEDCELLS']:
    path = cistopic_obj_path_dict[sample]
    if not os.path.exists(f'downstream_analysis/DARs/{sample}'):
        if os.path.isfile(path):
            print(f"Loading {path}")
            with open(path, 'rb') as f:
                cto = pickle.load(f)
                
            cto.cell_names = cto.cell_data.index
            if [x.split('-')[0] for x in cto.selected_model.cell_topic.columns] == [x.split('___')[0] for x in list(cto.cell_names)]:
                cto.selected_model.cell_topic.columns =  list(cto.cell_names)
                
            region_bin_topics = binarize_topics(
                cto,
                method='otsu',
                ntop=3000,
                plot=True,
                num_columns=6,
                save=f'plots_qc/{sample}__topic_region_binarized.png'
            )

            with open(
                    f"downstream_analysis/binarized_topics/{sample}__topic_region_binarized.pkl",'wb'
            ) as f:
                pickle.dump(region_bin_topics, f, protocol=4)

            binarized_cell_topics = binarize_topics(
                cto,
                target='cell',
                method='li',
                plot=True,
                num_columns=5,
                nbins=100,
                save=f'plots_qc/{sample}__cells_topic_binarized.png'
            )

            with open(
                    f"downstream_analysis/binarized_topics/{sample}__cells_topic_binarized.pkl",'wb'
            ) as f:
                pickle.dump(region_bin_topics, f, protocol=4)

            topic_qc_metrics = compute_topic_metrics(cto)

            with open(
                    f"downstream_analysis/binarized_topics/{sample}__topic_qc.pkl",'wb'
            ) as f:
                pickle.dump(topic_qc_metrics, f, protocol=4)

            fig_dict= {
                'CoherenceVSAssignments': plot_topic_qc(topic_qc_metrics, var_x='Coherence', var_y='Log10_Assignments',
                                                        var_color='Gini_index', plot=False, return_fig=True),
                'AssignmentsVSCells_in_bin': plot_topic_qc(topic_qc_metrics, var_x='Log10_Assignments',
                                                           var_y='Cells_in_binarized_topic', var_color='Gini_index',
                                                           plot=False, return_fig=True),
                'CoherenceVSCells_in_bin': plot_topic_qc(topic_qc_metrics, var_x='Coherence',
                                                         var_y='Cells_in_binarized_topic', var_color='Gini_index',
                                                         plot=False, return_fig=True),
                'CoherenceVSRegions_in_bin': plot_topic_qc(topic_qc_metrics, var_x='Coherence',
                                                           var_y='Regions_in_binarized_topic', var_color='Gini_index',
                                                           plot=False, return_fig=True),
                'CoherenceVSMarginal_dist': plot_topic_qc(topic_qc_metrics, var_x='Coherence',
                                                          var_y='Marginal_topic_dist', var_color='Gini_index',
                                                          plot=False, return_fig=True),
                'CoherenceVSGini_index': plot_topic_qc(topic_qc_metrics, var_x='Coherence', var_y='Gini_index',
                                                       var_color='Gini_index', plot=False, return_fig=True)}

            fig=plt.figure(figsize=(40, 43))
            i = 1
            for fig_ in fig_dict.keys():
                plt.subplot(2, 3, i)
                img = fig2img(fig_dict[fig_]) #To convert figures to png to plot together, see .utils.py. This converts the figure to png.
                plt.imshow(img)
                plt.axis('off')
                i += 1
            plt.subplots_adjust(wspace=0, hspace=-0.70)
            #plt.tight_layout()
            plt.show()
            plt.savefig(f'plots_qc/{sample}__topic_qc_metrics.png', facecolor='white', dpi=150)

            topic_annot = topic_annotation(
                cto,
                annot_var='consensus_cell_type',
                binarized_cell_topic=binarized_cell_topics,
                general_topic_thr = 0.2
            )
            topic_qc_metrics = pd.concat(
                [
                    topic_annot[['consensus_cell_type', 'Ratio_cells_in_topic', 'Ratio_group_in_population']],
                    topic_qc_metrics
                ],
                axis=1
            )

            imputed_acc_obj = impute_accessibility(
                cto,
                selected_cells=None,
                selected_regions=None,
                scale_factor=10**6
            )

            normalized_imputed_acc_obj = normalize_scores(
                imputed_acc_obj,
                scale_factor=10**4
            )

            with open(
                f"downstream_analysis/imputed_acc_objs/{sample}__imputed_acc_obs.pkl",'wb'
            ) as f:
                pickle.dump(normalized_imputed_acc_obj, f, protocol=4)

            variable_regions = find_highly_variable_features(
                normalized_imputed_acc_obj,
                min_disp = 0.05,
                min_mean = 0.0125,
                max_mean = 3,
                max_disp = np.inf,
                n_bins=20,
                n_top_features=None,
                plot=True,
                save=f'plots_qc/{sample}__HVR.png'
            )

            print(f"Found {len(variable_regions)} variable regions")
            with open(f'downstream_analysis/HVRs/{sample}__HVRs.bed', 'w') as f:
                for line in [x.replace(':','\t').replace('-','\t') for x in variable_regions]:
                    f.write(f"{line}\n")
                    
            with open(
                f'downstream_analysis/HVRs/{sample}__HVRs.pkl', 'w'
            ) as f:
                pickle.dump(variable_regions, f, protocol=4)

            os.mkdir(f'downstream_analysis/DARs/{sample}')

            markers_dict = find_diff_features(
                cto,
                normalized_imputed_acc_obj,
                variable='consensus_cell_type',
                var_features=variable_regions,
                contrasts=None,
                adjpval_thr=0.05,
                log2fc_thr=np.log2(1.5),
                n_cpu=5,
            )
            with open(
                    f"downstream_analysis/DARs/{sample}/{sample}__DARs_dict.pkl",'wb'
            ) as f:
                pickle.dump(markers_dict, f, protocol=4)

            for cell_type in markers_dict.keys():
                markers = markers_dict[cell_type].index.tolist()
                df = pd.DataFrame(markers)
                if len(df) == 0:
                    print(f"no DARs found for {cell_type} in {sample}")
                else:
                    df[[0,1]] = df[0].str.split(':',expand=True)
                    df[[1,2]] = df[1].str.split('-',expand=True)
                    df[3] = markers_dict[cell_type].index.tolist()
                    df[3] = cell_type.replace(' ', '_')# + '_' + df[3].astype(str)
                    score = markers_dict[cell_type]['Log2FC']
                    score = score.reset_index(drop=True)
                    df[4] = score
                    df[5] = '.'
                    pval = markers_dict[cell_type]['Adjusted_pval']
                    pval = pval.reset_index(drop=True)
                    df[6] = pval
                    name = cell_type.replace(' ', '_')
                    # name = name.replace('/', '-')
                    if not os.path.exists(f'downstream_analysis/DARs/{sample}'):
                        os.mkdir(f'downstream_analysis/DARs/{sample}')

                    df.to_csv(f'downstream_analysis/DARs/{sample}/{sample}__{cell_type.replace(" ", "_")}__DARs.bed', sep='\t', header=False, index=False)
                    df[0:2000].to_csv(f'downstream_analysis/DARs/{sample}/{sample}__{cell_type.replace(" ", "_")}__DARs.TOP2k.bed', sep='\t', header=False, index=False)

        else:
            print(f'{path} does not exist!')
    else:
        print(f"downstream_analysis/DARs/{sample} already exists!")

downstream_analysis/DARs/BIO_ddseq_1.FIXEDCELLS already exists!
downstream_analysis/DARs/BIO_ddseq_2.FIXEDCELLS already exists!
downstream_analysis/DARs/BIO_ddseq_3.FIXEDCELLS already exists!
downstream_analysis/DARs/BIO_ddseq_4.FIXEDCELLS already exists!
downstream_analysis/DARs/BRO_mtscatac_1.FIXEDCELLS already exists!
downstream_analysis/DARs/BRO_mtscatac_2.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xmultiome_1.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xmultiome_2.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv11_1.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv11_2.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv11_3.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv11_4.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv11_5.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv2_1.FIXEDCELLS already exists!
downstream_analysis/DARs/CNA_10xv2_2.FIXEDCELLS already exists!
downstream_analys

<IPython.core.display.Javascript object>

# Calculate DARs between male and female

In [10]:
normalized_imputed_acc_obj_path_dict = {x.split('/')[-1].split(f'__')[0]:x for x in sorted(glob.glob(f'downstream_analysis/imputed_acc_objs/*normalized_imputed_acc_obs.pkl'))}
normalized_imputed_acc_obj_path_dict

{'BIO_ddseq_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_2.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_3.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_3.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_4.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_4.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BRO_mtscatac_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BRO_mtscatac_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BRO_mtscatac_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BRO_mtscatac_2.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'CNA_10xmultiome_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/CNA_10xmultiome_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'CNA_10xmultiome_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/CNA_10xmultiome_2.FIXEDCELLS__normalized_imputed_ac

<IPython.core.display.Javascript object>

In [11]:
hvr_path_dict = {x.split('/')[-1].split(f'__')[0]:x for x in sorted(glob.glob(f'downstream_analysis/HVRs/*bed'))}
hvr_path_dict

{'BIO_ddseq_1.FIXEDCELLS': 'downstream_analysis/HVRs/BIO_ddseq_1.FIXEDCELLS__HVRs.bed',
 'BIO_ddseq_2.FIXEDCELLS': 'downstream_analysis/HVRs/BIO_ddseq_2.FIXEDCELLS__HVRs.bed',
 'BIO_ddseq_3.FIXEDCELLS': 'downstream_analysis/HVRs/BIO_ddseq_3.FIXEDCELLS__HVRs.bed',
 'BIO_ddseq_4.FIXEDCELLS': 'downstream_analysis/HVRs/BIO_ddseq_4.FIXEDCELLS__HVRs.bed',
 'BRO_mtscatac_1.FIXEDCELLS': 'downstream_analysis/HVRs/BRO_mtscatac_1.FIXEDCELLS__HVRs.bed',
 'BRO_mtscatac_2.FIXEDCELLS': 'downstream_analysis/HVRs/BRO_mtscatac_2.FIXEDCELLS__HVRs.bed',
 'CNA_10xmultiome_1.FIXEDCELLS': 'downstream_analysis/HVRs/CNA_10xmultiome_1.FIXEDCELLS__HVRs.bed',
 'CNA_10xmultiome_2.FIXEDCELLS': 'downstream_analysis/HVRs/CNA_10xmultiome_2.FIXEDCELLS__HVRs.bed',
 'CNA_10xv11_1.FIXEDCELLS': 'downstream_analysis/HVRs/CNA_10xv11_1.FIXEDCELLS__HVRs.bed',
 'CNA_10xv11_2.FIXEDCELLS': 'downstream_analysis/HVRs/CNA_10xv11_2.FIXEDCELLS__HVRs.bed',
 'CNA_10xv11_3.FIXEDCELLS': 'downstream_analysis/HVRs/CNA_10xv11_3.FIXEDCELLS__H

<IPython.core.display.Javascript object>

In [12]:
for sample in cistopic_obj_path_dict.keys():
# for sample  in ['SAN_10xmultiome_1.FIXEDCELLS']:
    if not os.path.exists(f'male_female_DARs/{sample}'):
        cto_path = cistopic_obj_path_dict[sample]
        if os.path.isfile(cto_path):
            print(f"Loading {cto_path}")
            with open(cto_path, 'rb') as f:
                cto = pickle.load(f)
                
            cto.cell_names = cto.cell_data.index
            if [x.split('-')[0] for x in cto.selected_model.cell_topic.columns] == [x.split('___')[0] for x in list(cto.cell_names)]:
                cto.selected_model.cell_topic.columns =  list(cto.cell_names)
                
        if 'fmx_sample' in cto.cell_data.columns:
            acc_path = normalized_imputed_acc_obj_path_dict[sample]

            if os.path.isfile(acc_path):
                print(f"Loading {acc_path}")
                with open(acc_path, 'rb') as f:
                    normalized_imputed_acc_obj = pickle.load(f)


            hvr_pkl_path = f'downstream_analysis/HVRs/{sample}__HVRs.pkl'

            if os.path.exists(hvr_pkl_path):
                print(f"Loading {hvr_pkl_path}")
                with open(hvr_pkl_path, 'rb') as f:
                    variable_regions = pickle.load(f)
            else:
                print("HVRs do not exist!")

            cto.cell_data['fmx_sample_consensus_cell_type'] = cto.cell_data['fmx_sample'] + "_" + cto.cell_data["consensus_cell_type"]

            contrasts = []
            for cell_type in cto.cell_data['consensus_cell_type'].unique():
                print(cell_type)
                ct_a = 'sampleA_' + cell_type
                ct_b = 'sampleB_' + cell_type
                contrast = [[[ct_a], [ct_b]], [[ct_b], [ct_a]]]
                contrasts = contrasts + contrast

            markers_dict = find_diff_features(
                cto,
                normalized_imputed_acc_obj,
                variable='fmx_sample_consensus_cell_type',
                var_features=variable_regions,
                contrasts=contrasts,
                adjpval_thr=0.05,
                log2fc_thr=np.log2(1.2),
                n_cpu=10,
            )
            
            if not os.path.exists(f'male_female_DARs/{sample}'):
                os.mkdir(f'male_female_DARs/{sample}')
                
            with open(
                f"male_female_DARs/{sample}/{sample}__DARs_dict_1-2xfoldchange.pkl",'wb'
            ) as f:
                pickle.dump(markers_dict, f, protocol=4)
                        
            for contrast in markers_dict.keys():
                markers = markers_dict[contrast].index.tolist()
                df = pd.DataFrame(markers)
                if len(df) == 0:
                    print(f"no DARs found for {contrast} in {sample}")
                else:
                    df[[0,1]] = df[0].str.split(':',expand=True)
                    df[[1,2]] = df[1].str.split('-',expand=True)
                    df[3] = markers_dict[contrast].index.tolist()
                    df[3] = contrast.replace(' ', '_')# + '_' + df[3].astype(str)
                    score = markers_dict[contrast]['Log2FC']
                    score = score.reset_index(drop=True)
                    df[4] = score
                    df[5] = '.'
                    pval = markers_dict[contrast]['Adjusted_pval']
                    pval = pval.reset_index(drop=True)
                    df[6] = pval
                    name = contrast.replace(' ', '_')

                    df.to_csv(f'male_female_DARs/{sample}/{sample}__{contrast.replace(" ", "_")}__1-2xfoldchange_DARs.bed', sep='\t', header=False, index=False)

Loading cistopic_objects/BIO_ddseq_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl
Loading cistopic_objects/BIO_ddseq_2.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_14topics.dimreduc.consensus.pkl
Loading cistopic_objects/BIO_ddseq_3.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_10topics.dimreduc.consensus.pkl
Loading cistopic_objects/BIO_ddseq_4.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl
Loading cistopic_objects/BRO_mtscatac_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_11topics.dimreduc.consensus.pkl
Loading downstream_analysis/imputed_acc_objs/BRO_mtscatac_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl
Loading downstream_analysis/HVRs/BRO_mtscatac_1.FIXEDCELLS__HVRs.pkl
Dendritic cell
CD14+ monocyte
B cell
CD4+ T cell
Cytotoxic T cell
Natural killer cell
CD16+ monocyte


2022-09-30 13:27:18,976	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4148889)[0m 2022-09-30 13:27:54,906 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4148889)[0m 2022-09-30 13:27:55,931 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4148888)[0m 2022-09-30 13:27:56,312 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4148888)[0m 2022-09-30 13:27:57,196 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4148886)[0m 2022-09-30 13:27:57,592 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4148887)[0m 2022-09-30 13:27:59,134 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4148885)[0m 2022-09-30 13:28:00,607 cisTopic 

2022-09-30 13:39:14,873	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4149868)[0m 2022-09-30 13:39:49,994 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4149867)[0m 2022-09-30 13:39:51,163 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4149866)[0m 2022-09-30 13:39:52,427 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4149864)[0m 2022-09-30 13:39:53,767 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4149866)[0m 2022-09-30 13:39:54,809 cisTopic     INFO     Computing p-value for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4149865)[0m 2022-09-30 13:39:55,005 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4149864)[0m 2022-09-30 

2022-09-30 13:50:33,452	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4150928)[0m 2022-09-30 13:51:06,462 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4150928)[0m 2022-09-30 13:51:07,300 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4150927)[0m 2022-09-30 13:51:07,692 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4150927)[0m 2022-09-30 13:51:08,389 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4150925)[0m 2022-09-30 13:51:08,798 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4150926)[0m 2022-09-30 13:51:09,913 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4150924)[0m 2022-09-30 13:51:10,971 cisTopic     INFO     Formatting data for

2022-09-30 13:59:10,166	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4151909)[0m 2022-09-30 13:59:42,962 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4151908)[0m 2022-09-30 13:59:44,037 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4151909)[0m 2022-09-30 13:59:44,685 cisTopic     INFO     Computing p-value for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4151907)[0m 2022-09-30 13:59:45,130 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4151908)[0m 2022-09-30 13:59:45,712 cisTopic     INFO     Computing p-value for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4151906)[0m 2022-09-30 13:59:46,042 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=415190

2022-09-30 14:07:55,234	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4152894)[0m 2022-09-30 14:08:27,995 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4152893)[0m 2022-09-30 14:08:29,077 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4152892)[0m 2022-09-30 14:08:30,004 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4152892)[0m 2022-09-30 14:08:30,978 cisTopic     INFO     Computing p-value for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4152891)[0m 2022-09-30 14:08:31,017 cisTopic     INFO     Formatting data for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4152891)[0m 2022-09-30 14:08:31,974 cisTopic     INFO     Computing p-value for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4152890)[0m 2022-09-30 14:08:31,952 cisTopic 

2022-09-30 14:17:30,327	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4153815)[0m 2022-09-30 14:18:03,015 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4153813)[0m 2022-09-30 14:18:04,083 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4153814)[0m 2022-09-30 14:18:05,150 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4153815)[0m 2022-09-30 14:18:05,254 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4153812)[0m 2022-09-30 14:18:06,061 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4153813)[0m 2022-09-30 14:18:06,253 cisTopic     INFO     Computing p-value for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4153811)[0m 2022-09-30 14:18:07,044 cisTo

2022-09-30 14:26:53,393	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4154717)[0m 2022-09-30 14:27:27,902 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4154716)[0m 2022-09-30 14:27:30,085 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4154715)[0m 2022-09-30 14:27:31,749 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4154717)[0m 2022-09-30 14:27:32,150 cisTopic     INFO     Computing p-value for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4154714)[0m 2022-09-30 14:27:33,456 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4154716)[0m 2022-09-30 14:27:33,833 cisTopic     INFO     Computing p-value for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4154713)[0m 2022-09-30 14:27:35,053 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_

2022-09-30 14:37:28,266	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4156106)[0m 2022-09-30 14:37:59,485 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4156104)[0m 2022-09-30 14:37:59,854 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4156105)[0m 2022-09-30 14:38:00,039 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4156103)[0m 2022-09-30 14:38:00,520 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4156100)[0m 2022-09-30 14:38:00,739 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4156105)[0m 2022-09-30 14:38:00,808 cisTopic     INFO     Computing p-value for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4156106)[0m 2022-09-30 14:38:

2022-09-30 14:46:28,968	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4157627)[0m 2022-09-30 14:47:00,518 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4157626)[0m 2022-09-30 14:47:00,974 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4157625)[0m 2022-09-30 14:47:01,505 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4157627)[0m 2022-09-30 14:47:01,640 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4157626)[0m 2022-09-30 14:47:01,994 cisTopic     INFO     Computing p-value for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4157624)[0m 2022-09-30 14:47:02,012 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4157622)[0m 2022-09-30 14:47:

2022-09-30 14:56:46,398	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4158694)[0m 2022-09-30 14:57:21,842 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4158692)[0m 2022-09-30 14:57:23,279 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4158691)[0m 2022-09-30 14:57:24,728 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4158694)[0m 2022-09-30 14:57:25,407 cisTopic     INFO     Computing p-value for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4158693)[0m 2022-09-30 14:57:26,214 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4158692)[0m 2022-09-30 14:57:26,814 cisTopic     INFO     Computing p-value for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4158690)[0m 2022-09-30 14:57:27,513 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ mon

2022-09-30 15:08:08,216	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4161525)[0m 2022-09-30 15:08:47,571 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4161524)[0m 2022-09-30 15:08:50,681 cisTopic     INFO     Formatting data for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4161522)[0m 2022-09-30 15:08:55,526 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4161525)[0m 2022-09-30 15:08:56,741 cisTopic     INFO     Computing p-value for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4161524)[0m 2022-09-30 15:08:59,305 cisTopic     INFO     Computing p-value for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4161523)[0m 2022-09-30 15:09:00,476 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4161521)[0m 2022-09-30 15:09:04,504 cisTopic     INFO    

2022-09-30 15:19:45,678	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4162813)[0m 2022-09-30 15:20:09,681 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4162811)[0m 2022-09-30 15:20:09,982 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4162812)[0m 2022-09-30 15:20:10,233 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4162809)[0m 2022-09-30 15:20:10,521 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4162813)[0m 2022-09-30 15:20:10,704 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4162810)[0m 2022-09-30 15:20:10,720 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4162810)[0m 2022-09-30 15:20:10,902 cisTopic 

2022-09-30 15:23:03,323	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4163807)[0m 2022-09-30 15:23:32,275 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4163806)[0m 2022-09-30 15:23:33,041 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4163805)[0m 2022-09-30 15:23:33,885 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4163804)[0m 2022-09-30 15:23:34,701 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4163807)[0m 2022-09-30 15:23:35,690 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4163803)[0m 2022-09-30 15:23:35,648 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4163802)[0m 2022-09-30 15:23:36,500 cisTo

2022-09-30 15:30:54,491	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4165266)[0m 2022-09-30 15:31:22,369 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4165265)[0m 2022-09-30 15:31:22,555 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4165264)[0m 2022-09-30 15:31:22,952 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4165266)[0m 2022-09-30 15:31:23,114 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4165263)[0m 2022-09-30 15:31:23,062 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4165265)[0m 2022-09-30 15:31:23,292 cisTopic     INFO     Computing p-value for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4165262)[0m 2022-09-30 15:31:23,398 c

2022-09-30 15:34:46,478	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4166070)[0m 2022-09-30 15:35:15,209 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4166068)[0m 2022-09-30 15:35:15,917 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4166070)[0m 2022-09-30 15:35:16,514 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4166069)[0m 2022-09-30 15:35:16,569 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4166068)[0m 2022-09-30 15:35:17,245 cisTopic     INFO     Computing p-value for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4166067)[0m 2022-09-30 15:35:17,366 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4166069)[0m 2022-09-30 15:35:17,931 cisTopic     INFO     Computing p-value f

2022-09-30 15:40:12,899	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4167003)[0m 2022-09-30 15:40:44,673 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4167002)[0m 2022-09-30 15:40:44,896 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4167003)[0m 2022-09-30 15:40:44,988 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4167002)[0m 2022-09-30 15:40:45,263 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4167000)[0m 2022-09-30 15:40:45,292 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4167001)[0m 2022-09-30 15:40:45,950 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4166999)[0m 2022-09-30 15:40:46,261 cisTopic     INFO    

2022-09-30 15:47:38,058	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4167982)[0m 2022-09-30 15:48:06,842 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4167981)[0m 2022-09-30 15:48:07,551 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4167980)[0m 2022-09-30 15:48:08,358 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4167978)[0m 2022-09-30 15:48:09,149 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4167980)[0m 2022-09-30 15:48:09,685 cisTopic     INFO     Computing p-value for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4167977)[0m 2022-09-30 15:48:09,945 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4167978)[0m 2022-09-30 15:48:10,596 cisTopic     INFO     Computing p-value for sampleB_B c

2022-09-30 15:52:59,330	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4168794)[0m 2022-09-30 15:53:30,947 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4168793)[0m 2022-09-30 15:53:31,731 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4168792)[0m 2022-09-30 15:53:32,580 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4168791)[0m 2022-09-30 15:53:33,330 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4168794)[0m 2022-09-30 15:53:33,497 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4168790)[0m 2022-09-30 15:53:34,033 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4168793)[0m 2022-09-30 15:53:34,152 cisTopic   

2022-09-30 15:58:20,425	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4169600)[0m 2022-09-30 15:58:52,882 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4169601)[0m 2022-09-30 15:58:53,854 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4169599)[0m 2022-09-30 15:58:54,819 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4169598)[0m 2022-09-30 15:58:55,645 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4169596)[0m 2022-09-30 15:58:56,693 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4169597)[0m 2022-09-30 15:58:57,477 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4169600)[0m 2022-09-30 15:58:57,772 cisTopic     INFO     Computing p-value for sampleA_CD14+

2022-09-30 16:06:31,908	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4170501)[0m 2022-09-30 16:07:00,637 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4170502)[0m 2022-09-30 16:07:01,115 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4170500)[0m 2022-09-30 16:07:01,791 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4170498)[0m 2022-09-30 16:07:02,493 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4170499)[0m 2022-09-30 16:07:03,093 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4170497)[0m 2022-09-30 16:07:03,855 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4170501)[0m 2022-09-30 16:07:04,318 cisTopic     INFO

2022-09-30 16:11:59,390	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4171296)[0m 2022-09-30 16:12:33,110 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4171295)[0m 2022-09-30 16:12:33,850 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4171294)[0m 2022-09-30 16:12:34,777 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4171293)[0m 2022-09-30 16:12:35,720 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4171294)[0m 2022-09-30 16:12:36,638 cisTopic     INFO     Computing p-value for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4171292)[0m 2022-09-30 16:12:36,742 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4171293)[0m 2022-09-30 16

2022-09-30 16:18:31,753	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4172137)[0m 2022-09-30 16:19:04,642 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4172135)[0m 2022-09-30 16:19:05,655 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4172136)[0m 2022-09-30 16:19:06,608 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4172133)[0m 2022-09-30 16:19:07,539 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4172134)[0m 2022-09-30 16:19:08,585 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4172136)[0m 2022-09-30 16:19:08,871 cisTopic     INFO     Computing p-value for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4172132)[0m 2022-09-30 16:19:09,453 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sam

2022-09-30 16:24:47,184	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4172962)[0m 2022-09-30 16:25:21,645 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4172960)[0m 2022-09-30 16:25:25,988 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4172961)[0m 2022-09-30 16:25:30,246 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4172962)[0m 2022-09-30 16:25:31,971 cisTopic     INFO     Computing p-value for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4172959)[0m 2022-09-30 16:25:35,489 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4172960)[0m 2022-09-30 16:25:36,949 cisTopic     INFO     Computing p-value for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pi

2022-09-30 16:37:47,783	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4173990)[0m 2022-09-30 16:38:23,066 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4173989)[0m 2022-09-30 16:38:27,345 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4173987)[0m 2022-09-30 16:38:31,801 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4173988)[0m 2022-09-30 16:38:34,816 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4173984)[0m 2022-09-30 16:38:37,467 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4173986)[0m 2022-09-30 16:38:39,879 cisTopic     INFO     Formatting data for sampleB_Natural killer cell_VS_sampleA_Natural killer cell
[2m[36m(markers_ray pid=4173984)[0m 2022-09-30 16:38:42,4

2022-09-30 16:49:35,459	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4174938)[0m 2022-09-30 16:50:08,931 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4174937)[0m 2022-09-30 16:50:08,943 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4174935)[0m 2022-09-30 16:50:10,155 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4174936)[0m 2022-09-30 16:50:11,371 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4174934)[0m 2022-09-30 16:50:12,622 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4174938)[0m 2022-09-30 16:50:13,651 cisTopic     INFO     Computing p-value for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4174937)[0m 2022-09-30 16:50:13,695 cisTopic     

2022-09-30 16:58:05,533	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4175798)[0m 2022-09-30 16:58:38,655 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4175796)[0m 2022-09-30 16:58:39,919 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4175797)[0m 2022-09-30 16:58:40,898 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4175795)[0m 2022-09-30 16:58:42,104 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4175794)[0m 2022-09-30 16:58:43,251 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4175798)[0m 2022-09-30 16:58:43,829 cisTopic     INFO     Computing p-value for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4175793)[0m 2022-09-30 16:58:44,207 cisTopic     INFO     For

2022-09-30 17:07:09,084	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4176731)[0m 2022-09-30 17:07:43,037 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4176730)[0m 2022-09-30 17:07:44,546 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4176729)[0m 2022-09-30 17:07:45,835 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4176728)[0m 2022-09-30 17:07:47,084 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4176727)[0m 2022-09-30 17:07:48,422 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4176729)[0m 2022-09-30 17:07:49,438 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4176726)[0m 2022-09-30 17

2022-09-30 17:16:21,177	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4177637)[0m 2022-09-30 17:16:52,271 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4177636)[0m 2022-09-30 17:16:52,690 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4177635)[0m 2022-09-30 17:16:53,050 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4177634)[0m 2022-09-30 17:16:53,498 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4177635)[0m 2022-09-30 17:16:53,731 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4177633)[0m 2022-09-30 17:16:53,923 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4177637)[0m 2022-09-30 17:16:54,208

2022-09-30 17:25:50,907	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4178551)[0m 2022-09-30 17:26:21,687 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4178550)[0m 2022-09-30 17:26:22,153 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4178549)[0m 2022-09-30 17:26:23,026 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4178548)[0m 2022-09-30 17:26:23,794 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4178547)[0m 2022-09-30 17:26:24,497 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4178546)[0m 2022-09-30 17:26:25,154 cisTopic     INFO     Formatting data for sampleB_B cell_VS_sampleA_B cell
[2m[36m(markers_ray pid=4178549)[0m 2022-09-30 17:26:25,290 cisTopic     INFO     Computing p-value for sampleA_CD4+ 

2022-09-30 17:34:14,569	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4179393)[0m 2022-09-30 17:34:45,409 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4179392)[0m 2022-09-30 17:34:45,697 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4179390)[0m 2022-09-30 17:34:46,091 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4179391)[0m 2022-09-30 17:34:46,388 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4179393)[0m 2022-09-30 17:34:46,799 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4179389)[0m 2022-09-30 17:34:46,844 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4179390)[0m 2022-09-30 17:34:

2022-09-30 17:39:46,930	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4180187)[0m 2022-09-30 17:40:18,924 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4180186)[0m 2022-09-30 17:40:19,572 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4180185)[0m 2022-09-30 17:40:20,257 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4180185)[0m 2022-09-30 17:40:20,631 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4180183)[0m 2022-09-30 17:40:20,911 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4180183)[0m 2022-09-30 17:40:21,269 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4180184)[0m 2022-09-30 17:40:21,574 cisTopic     INFO    

2022-09-30 17:47:36,823	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4181041)[0m 2022-09-30 17:48:10,356 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4181041)[0m 2022-09-30 17:48:10,978 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4181040)[0m 2022-09-30 17:48:11,437 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4181040)[0m 2022-09-30 17:48:11,918 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4181038)[0m 2022-09-30 17:48:12,251 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4181039)[0m 2022-09-30 17:48:13,113 cisTopic     INFO     Formatting data for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4181038)[0m 2022-09-30 17:48:13,199 cisTopic 

2022-09-30 17:57:10,957	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4181966)[0m 2022-09-30 17:57:45,381 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4181964)[0m 2022-09-30 17:57:47,448 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4181965)[0m 2022-09-30 17:57:51,061 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4181963)[0m 2022-09-30 17:57:54,945 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4181962)[0m 2022-09-30 17:57:58,982 cisTopic     INFO     Formatting data for sampleA_Natural killer cell_VS_sampleB_Natural killer cell
[2m[36m(markers_ray pid=4181966)[0m 2022-09-30 17:58:02,445 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4181961)[0m 2022-09-30 17:58:03,182 c

2022-09-30 18:08:56,315	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4182982)[0m 2022-09-30 18:09:30,212 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4182979)[0m 2022-09-30 18:09:31,316 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4182981)[0m 2022-09-30 18:09:32,323 cisTopic     INFO     Formatting data for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4182981)[0m 2022-09-30 18:09:32,912 cisTopic     INFO     Computing p-value for sampleA_Dendritic cell_VS_sampleB_Dendritic cell
[2m[36m(markers_ray pid=4182978)[0m 2022-09-30 18:09:33,318 cisTopic     INFO     Formatting data for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4182978)[0m 2022-09-30 18:09:33,933 cisTopic     INFO     Computing p-value for sampleB_Dendritic cell_VS_sampleA_Dendritic cell
[2m[36m(markers_ray pid=4182982)[0m 2022-09-30 18:09:34,327 cisTopic 

2022-09-30 18:19:06,740	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4183933)[0m 2022-09-30 18:19:41,296 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4183932)[0m 2022-09-30 18:19:42,339 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4183931)[0m 2022-09-30 18:19:43,467 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4183930)[0m 2022-09-30 18:19:44,476 cisTopic     INFO     Formatting data for sampleB_CD16+ monocyte_VS_sampleA_CD16+ monocyte
[2m[36m(markers_ray pid=4183931)[0m 2022-09-30 18:19:44,897 cisTopic     INFO     Computing p-value for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4183933)[0m 2022-09-30 18:19:45,503 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4183929)[0m 2022-09-30 18:19:45,717 cisTopic 

2022-09-30 18:29:20,779	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4184882)[0m 2022-09-30 18:29:52,090 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4184881)[0m 2022-09-30 18:29:52,861 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4184879)[0m 2022-09-30 18:29:53,651 cisTopic     INFO     Formatting data for sampleA_CD4+ T cell_VS_sampleB_CD4+ T cell
[2m[36m(markers_ray pid=4184880)[0m 2022-09-30 18:29:54,302 cisTopic     INFO     Formatting data for sampleB_CD4+ T cell_VS_sampleA_CD4+ T cell
[2m[36m(markers_ray pid=4184878)[0m 2022-09-30 18:29:55,085 cisTopic     INFO     Formatting data for sampleA_CD16+ monocyte_VS_sampleB_CD16+ monocyte
[2m[36m(markers_ray pid=4184882)[0m 2022-09-30 18:29:55,779 cisTopic     INFO     Computing p-value for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4184877)[0m 2022-09-30 18:29:55,785 cisTopic     INFO     F

2022-09-30 18:35:32,307	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


[2m[36m(markers_ray pid=4185678)[0m 2022-09-30 18:36:01,152 cisTopic     INFO     Formatting data for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4185677)[0m 2022-09-30 18:36:01,861 cisTopic     INFO     Formatting data for sampleB_Cytotoxic T cell_VS_sampleA_Cytotoxic T cell
[2m[36m(markers_ray pid=4185676)[0m 2022-09-30 18:36:02,687 cisTopic     INFO     Formatting data for sampleA_CD14+ monocyte_VS_sampleB_CD14+ monocyte
[2m[36m(markers_ray pid=4185675)[0m 2022-09-30 18:36:03,477 cisTopic     INFO     Formatting data for sampleB_CD14+ monocyte_VS_sampleA_CD14+ monocyte
[2m[36m(markers_ray pid=4185673)[0m 2022-09-30 18:36:04,248 cisTopic     INFO     Formatting data for sampleA_B cell_VS_sampleB_B cell
[2m[36m(markers_ray pid=4185678)[0m 2022-09-30 18:36:05,112 cisTopic     INFO     Computing p-value for sampleA_Cytotoxic T cell_VS_sampleB_Cytotoxic T cell
[2m[36m(markers_ray pid=4185674)[0m 2022-09-30 18:36:05,220 cisTopic     IN

<IPython.core.display.Javascript object>

# visualise dars

In [14]:
markers_path_dict = {x.split('/')[-1].split(f'__')[0]:x for x in sorted(glob.glob(f'downstream_analysis/DARs/*/*.pkl'))}
markers_path_dict

{'BIO_ddseq_1.FIXEDCELLS': 'downstream_analysis/DARs/BIO_ddseq_1.FIXEDCELLS/BIO_ddseq_1.FIXEDCELLS__DARs_dict.pkl',
 'BIO_ddseq_2.FIXEDCELLS': 'downstream_analysis/DARs/BIO_ddseq_2.FIXEDCELLS/BIO_ddseq_2.FIXEDCELLS__DARs_dict.pkl',
 'BIO_ddseq_3.FIXEDCELLS': 'downstream_analysis/DARs/BIO_ddseq_3.FIXEDCELLS/BIO_ddseq_3.FIXEDCELLS__DARs_dict.pkl',
 'BIO_ddseq_4.FIXEDCELLS': 'downstream_analysis/DARs/BIO_ddseq_4.FIXEDCELLS/BIO_ddseq_4.FIXEDCELLS__DARs_dict.pkl',
 'BRO_mtscatac_1.FIXEDCELLS': 'downstream_analysis/DARs/BRO_mtscatac_1.FIXEDCELLS/BRO_mtscatac_1.FIXEDCELLS__DARs_dict.pkl',
 'BRO_mtscatac_2.FIXEDCELLS': 'downstream_analysis/DARs/BRO_mtscatac_2.FIXEDCELLS/BRO_mtscatac_2.FIXEDCELLS__DARs_dict.pkl',
 'CNA_10xmultiome_1.FIXEDCELLS': 'downstream_analysis/DARs/CNA_10xmultiome_1.FIXEDCELLS/CNA_10xmultiome_1.FIXEDCELLS__DARs_dict.pkl',
 'CNA_10xmultiome_2.FIXEDCELLS': 'downstream_analysis/DARs/CNA_10xmultiome_2.FIXEDCELLS/CNA_10xmultiome_2.FIXEDCELLS__DARs_dict.pkl',
 'CNA_10xv11_1.FIX

<IPython.core.display.Javascript object>

In [15]:
imputed_acc_obj_dict = {x.split('/')[-1].split(f'__')[0]:x for x in sorted(glob.glob(f'downstream_analysis/imputed_acc_objs/*.pkl'))}
imputed_acc_obj_dict

{'BIO_ddseq_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_2.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_3.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_3.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BIO_ddseq_4.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BIO_ddseq_4.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BRO_mtscatac_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BRO_mtscatac_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'BRO_mtscatac_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/BRO_mtscatac_2.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'CNA_10xmultiome_1.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/CNA_10xmultiome_1.FIXEDCELLS__normalized_imputed_acc_obs.pkl',
 'CNA_10xmultiome_2.FIXEDCELLS': 'downstream_analysis/imputed_acc_objs/CNA_10xmultiome_2.FIXEDCELLS__normalized_imputed_ac

<IPython.core.display.Javascript object>

In [16]:
for sample in markers_path_dict.keys():
    print(sample)
    fig_path = f'plots_qc/{sample}_DAR_umap.png'
    if not os.path.exists(fig_path):
        with open(cistopic_obj_path_dict[sample], 'rb') as f:
            cto = pickle.load(f)

        with open(imputed_acc_obj_dict[sample], 'rb') as f:
            imputed_acc_obj = pickle.load(f)

        with open(markers_path_dict[sample], 'rb') as f:
            markers_dict = pickle.load(f)

        [ print(x + ': '+ str(len(markers_dict[x]))) for x in markers_dict.keys() ]
        plot_imputed_features(
            cto,
            reduction_name='UMAP',
            imputed_data=imputed_acc_obj,
            features=[markers_dict[x].index.tolist()[0] for x in markers_dict.keys() if len(markers_dict[x])> 0],
            scale=False,
            num_columns=3,
            selected_cells = cto.projections['cell']['UMAP'].index.tolist(),
            save=fig_path
        )
    else:
        print(f"\t{fig_path} already exists")

BIO_ddseq_1.FIXEDCELLS
	plots_qc/BIO_ddseq_1.FIXEDCELLS_DAR_umap.png already exists
BIO_ddseq_2.FIXEDCELLS
	plots_qc/BIO_ddseq_2.FIXEDCELLS_DAR_umap.png already exists
BIO_ddseq_3.FIXEDCELLS
	plots_qc/BIO_ddseq_3.FIXEDCELLS_DAR_umap.png already exists
BIO_ddseq_4.FIXEDCELLS
	plots_qc/BIO_ddseq_4.FIXEDCELLS_DAR_umap.png already exists
BRO_mtscatac_1.FIXEDCELLS
	plots_qc/BRO_mtscatac_1.FIXEDCELLS_DAR_umap.png already exists
BRO_mtscatac_2.FIXEDCELLS
	plots_qc/BRO_mtscatac_2.FIXEDCELLS_DAR_umap.png already exists
CNA_10xmultiome_1.FIXEDCELLS
	plots_qc/CNA_10xmultiome_1.FIXEDCELLS_DAR_umap.png already exists
CNA_10xmultiome_2.FIXEDCELLS
	plots_qc/CNA_10xmultiome_2.FIXEDCELLS_DAR_umap.png already exists
CNA_10xv11_1.FIXEDCELLS
	plots_qc/CNA_10xv11_1.FIXEDCELLS_DAR_umap.png already exists
CNA_10xv11_2.FIXEDCELLS
	plots_qc/CNA_10xv11_2.FIXEDCELLS_DAR_umap.png already exists
CNA_10xv11_3.FIXEDCELLS
	plots_qc/CNA_10xv11_3.FIXEDCELLS_DAR_umap.png already exists
CNA_10xv11_4.FIXEDCELLS
	plots_qc/

<IPython.core.display.Javascript object>

# Check chromosome distribution of DARs:

In [5]:
import glob

<IPython.core.display.Javascript object>

In [16]:
dar_path_dict = {x.split('/')[-1].split('__1-2xfoldchange_DARs.ENCODE3_overlap.bed')[0]: x for x in glob.glob('male_female_DARs/*/*DARs.bed')}

<IPython.core.display.Javascript object>

In [22]:
peaks_df

Unnamed: 0,0,1,2,3,4,5,6
0,chr9,137591036,137591536,sampleA_CD4+_T_cell_VS_sampleB_CD4+_T_cell,0.362751,.,0.032575
1,chr6,108589182,108589682,sampleA_CD4+_T_cell_VS_sampleB_CD4+_T_cell,0.32567,.,0.044384
2,chr17,36105672,36106172,sampleA_CD4+_T_cell_VS_sampleB_CD4+_T_cell,0.272849,.,0.009967


<IPython.core.display.Javascript object>

In [24]:
chroms_in_df

['chr17', 'chr6', 'chr9']

<IPython.core.display.Javascript object>

In [30]:
chroms_standard

['chr1',
 'chr2',
 'chr3',
 'chr4',
 'chr5',
 'chr6',
 'chr7',
 'chr8',
 'chr9',
 'chr10',
 'chr11',
 'chr12',
 'chr13',
 'chr14',
 'chr15',
 'chr16',
 'chr17',
 'chr18',
 'chr19',
 'chr20',
 'chr21',
 'chr22',
 'chrX']

<IPython.core.display.Javascript object>

In [35]:
chroms_standard = ["chr" + str(x + 1) for x in range(22)] + ['chrX']


<IPython.core.display.Javascript object>

In [36]:
peaks_df[0].value_counts()[chroms_standard]

KeyError: "['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr7', 'chr8', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX'] not in index"

<IPython.core.display.Javascript object>

{'chr17', 'chr6', 'chr9'}

<IPython.core.display.Javascript object>

In [43]:
for sample, path in dar_path_dict.items():
    print(sample)
    dars_df = pd.read_csv(path, sep='\t', header=None)
    
    chroms_in_df = list(sorted(dars_df[0].unique()))
    chroms_standard = ["chr" + str(x + 1) for x in range(22)] + ['chrX']
    chroms_nonstandard = list(set(chroms_in_df ) - set(chroms_standard) -  set(['chrM']))
    
    chroms_standard_in_sample = set(chroms_in_df).intersection(set(chroms_standard))
    n_standard = dars_df[0].value_counts()[chroms_standard_in_sample].sum()
    
    
    n_contigs = dars_df[0].value_counts()[chroms_nonstandard].sum()
    if 'chrM' in dars_df[0].value_counts().index:
        n_chrm = dars_df[0].value_counts()['chrM'].sum()
    else:
        n_chrm = 0
    pct_nonstandard = (n_contigs + n_chrm)/len(dars_df)*100
        
    print(f"\tdars on standard chromosomes: {n_standard}")
    print(f"\tdars on contigs: {n_contigs}")
    print(f"\tdars on chrM: {n_chrm}")
    print(f"\t% dars non standard chromosomes: {pct_nonstandard}%")
    if 'chrY' in chroms_in_df:
        n_chrY = dars_df[0].value_counts()['chrY'].sum()
        print(f"\tdars on chrY: {n_chrY}")
    else:
        print(f"\tdars on chrY: 0")

EPF_hydrop_2.FIXEDCELLS__sampleA_CD4+_T_cell_VS_sampleB_CD4+_T_cell__1-2xfoldchange_DARs.bed
	dars on standard chromosomes: 3
	dars on contigs: 0
	dars on chrM: 0
	% dars non standard chromosomes: 0.0%
	dars on chrY: 0
EPF_hydrop_2.FIXEDCELLS__sampleB_Cytotoxic_T_cell_VS_sampleA_Cytotoxic_T_cell__1-2xfoldchange_DARs.bed
	dars on standard chromosomes: 27347
	dars on contigs: 60
	dars on chrM: 0
	% dars non standard chromosomes: 0.21892217316743898%
	dars on chrY: 2
EPF_hydrop_2.FIXEDCELLS__sampleA_Cytotoxic_T_cell_VS_sampleB_Cytotoxic_T_cell__1-2xfoldchange_DARs.bed
	dars on standard chromosomes: 7233
	dars on contigs: 6
	dars on chrM: 0
	% dars non standard chromosomes: 0.08288437629506838%
	dars on chrY: 6
EPF_hydrop_2.FIXEDCELLS__sampleB_CD14+_monocyte_VS_sampleA_CD14+_monocyte__1-2xfoldchange_DARs.bed
	dars on standard chromosomes: 260
	dars on contigs: 9
	dars on chrM: 0
	% dars non standard chromosomes: 3.3457249070631967%
	dars on chrY: 1
EPF_hydrop_2.FIXEDCELLS__sampleA_Natural_

<IPython.core.display.Javascript object>

## Gene activity

In [17]:
import pyranges as pr
import requests
import pybiomart as pbm

<IPython.core.display.Javascript object>

### Get gene annotations and chromosome sizes

In [18]:
# For human
dataset = pbm.Dataset(name='hsapiens_gene_ensembl',  host='http://www.ensembl.org')
annot = dataset.query(attributes=['chromosome_name', 'start_position', 'end_position', 'strand', 'external_gene_name', 'transcription_start_site', 'transcript_biotype'])
annot['Chromosome/scaffold name'] = 'chr' + annot['Chromosome/scaffold name'].astype(str)
annot.columns=['Chromosome', 'Start', 'End', 'Strand', 'Gene','Transcription_Start_Site', 'Transcript_type']
annot = annot[annot.Transcript_type == 'protein_coding']
annot.Strand[annot.Strand == 1] = '+'
annot.Strand[annot.Strand == -1] = '-'
pr_annotation = pr.PyRanges(annot.dropna(axis = 0))
pr_annotation

Unnamed: 0,Chromosome,Start,End,Strand,Gene,Transcription_Start_Site,Transcript_type
0,chr1,1471765,1497848,+,ATAD3B,1471765,protein_coding
1,chr1,1471765,1497848,+,ATAD3B,1471784,protein_coding
2,chr1,3069168,3438621,+,PRDM16,3069168,protein_coding
3,chr1,3069168,3438621,+,PRDM16,3069197,protein_coding
4,chr1,3069168,3438621,+,PRDM16,3069211,protein_coding
...,...,...,...,...,...,...,...
85681,chrY,6865918,6911752,-,AMELY,6911752,protein_coding
85682,chrY,6865918,6911752,-,AMELY,6872608,protein_coding
85683,chrY,21903618,21918042,-,RBMY1E,21918032,protein_coding
85684,chrY,24045229,24048019,-,CDY1B,24047969,protein_coding


<IPython.core.display.Javascript object>

In [19]:
# get chromosome sizes (hg38)
target_url = 'http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.chrom.sizes'
chromsizes = pd.read_csv(target_url, sep='\t', header=None)
chromsizes.columns = ['Chromosome', 'End']
chromsizes['Start'] = [0]*chromsizes.shape[0]
chromsizes = chromsizes.loc[:,['Chromosome', 'Start', 'End']]
chromsizes = pr.PyRanges(chromsizes)
chromsizes

Unnamed: 0,Chromosome,Start,End
0,chr1,0,248956422
1,chr1_GL383518v1_alt,0,182439
2,chr1_GL383519v1_alt,0,110268
3,chr1_GL383520v2_alt,0,366580
4,chr1_KI270706v1_random,0,175055
...,...,...,...
450,chrX_KI270880v1_alt,0,284869
451,chrX_KI270881v1_alt,0,144206
452,chrX_KI270913v1_alt,0,274009
453,chrY,0,57227415


<IPython.core.display.Javascript object>

### Infer gene activity

In [20]:
from pycisTopic.gene_activity import get_gene_activity
from pycisTopic.diff_features import find_diff_features

<IPython.core.display.Javascript object>

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [21]:
gene_act_dict = {}

for key in cistopic_obj_dict.keys():
    print(key)
    gene_act, weights = get_gene_activity(
        imputed_acc_obj_dict[key], # Region-cell probabilities
        pr_annotation, # Gene annotation
        chromsizes, # Chromosome size
        use_gene_boundaries=True, # Whether to use the whole search space or stop when encountering another gene
        upstream=[1000, 100000], # Search space upstream. The minimum means that even if there is a gene right next to it 
                                 #these bp will be taken (1kbp here)
        downstream=[1000,100000], # Search space downstream
        distance_weight=True, # Whether to add a distance weight (an exponential function, the weight will decrease with distance)
        decay_rate=1, # Exponent for the distance exponential funciton (the higher the faster will be the decrease)
        extend_gene_body_upstream=10000, # Number of bp upstream immune to the distance weight (their value will be maximum for 
                                         #this weight)
        extend_gene_body_downstream=500, # Number of bp downstream immune to the distance weight
        gene_size_weight=False, # Whether to add a weights based on the length of the gene
        gene_size_scale_factor='median', # Dividend to calculate the gene size weigth. Default is the median value of all genes
                                         #in the genome
        remove_promoters=False, # Whether to remove promoters when computing gene activity scores
        average_scores=True, # Whether to divide by the total number of region assigned to a gene when calculating the gene 
                             # activity score
        scale_factor=1, # Value to multiply for the final gene activity matrix
        extend_tss=[10,10], # Space to consider a promoter
        gini_weight = True, # Whether to add a gini index weigth. The more unique the region is, the higher this weight will be
        return_weights= True, # Whether to return the final weights
        project='Gene_activity') # Project name for the gene activity object
    gene_act_dict[key] = copy.copy(gene_act)


NameError: name 'cistopic_obj_dict' is not defined

<IPython.core.display.Javascript object>

### Infer the Differentially Accessible Genes (DAGs)

In [None]:
from pycisTopic.clust_vis import plot_imputed_features

In [None]:
dag_markers_dict = {}
for key in cistopic_obj_dict.keys():
    print(key)
    dag_markers_dict[key] = find_diff_features(cistopic_obj_dict[key],
                      gene_act_dict[key],
                      variable='consensus_cell_type',
                      var_features=None,
                      contrasts=None,
                      adjpval_thr=0.05,
                      log2fc_thr=np.log2(1.5),
                      n_cpu=5,
                      )

In [None]:
for key in cistopic_obj_dict.keys():
    print(key)
    plot_imputed_features(cistopic_obj_dict[key],
                    reduction_name='UMAP',
                    imputed_data=gene_act,
                    features=['CD34', 'THY1', # CD34+
                              'NCAM1', 'FCGR3A', # NK
                              'CD14', 'CD68', # Monocytes
                              'MS4A1', 'CD79A', # B cells
                              'CD8A', 'CD8B', # Cytotoxic CD8 cells
                              'CD4', # CD4 T
                              'CD3D', # T
                              'IL3RA', 'CD1C', 'BATF3', # Dendritic
                             ],
                    scale=True,
                    num_columns=5,
                    )

## Export to loom

In [None]:
# cistopic_obj_dict # ok
# imputed_acc_obj_dict # ok
# region_bin_topics #(region_bin_topics_dict) ok
# binarized_cell_topic # ok
# DARs_dict #(markers_dict_dict) ok

In [None]:
f_region_loom_dir = os.path.join(f_final_dir, 'region_acc_loom')
if not os.path.exists(f_region_loom_dir):
    os.makedirs(f_region_loom_dir)
    
f_gene_loom_dir = os.path.join(f_final_dir, 'gene_act_loom')
if not os.path.exists(f_gene_loom_dir):
    os.makedirs(f_gene_loom_dir)

### Region accessibility

In [None]:
from pycisTopic.loom import (
    export_region_accessibility_to_loom,
    export_gene_activity_to_loom
)

In [None]:
for key in cistopic_obj_dict.keys():
    print(key)
    s = 'merged' if key=='merged' else sample_annot.loc[key,'Sample']
    f_out = os.path.join(f_region_loom_dir, s + '__libDS_region_accessibility.loom')
    if os.path.exists(f_out):
        print(f"Skipping {f_out}: already exists.")
        continue
    # Subset regions, we will use only regions in topics and DARs here to make it faster
    regions_in_topics = list(set(sum([region_bin_topics_dict[key][i].index.tolist() for i in region_bin_topics_dict[key].keys()],[])))
    regions_in_DARs = list(set(sum([markers_dict_dict[key][i].index.tolist() for i in markers_dict_dict[key].keys()],[])))
    # make sure we only take regions that actually exist in the accessibility matrix:
    selected_regions = list(set(regions_in_topics + regions_in_DARs).intersection(set(imputed_acc_obj_dict[key].feature_names)))

    # Export to loom
    export_region_accessibility_to_loom(
        accessibility_matrix = imputed_acc_obj_dict[key],
        cistopic_obj = cistopic_obj_dict[key], 
        binarized_topic_region = region_bin_topics_dict[key],
        binarized_cell_topic = binarized_cell_topic_dict[key],
        out_fname = f_out,
        selected_regions = selected_regions ,
        selected_cells = [ x.split('-')[0] + '-' + x.split('-')[1]  for x in cistopic_obj_dict[key].cell_names ], # this leaves a cell barcode of the format type 'TGCATGTCGCCGTTCCAAGA-21'
        # selected_cells = cistopic_obj_dict[key].projections['cell']['UMAP'].index.tolist(), # cflerin original
        cluster_annotation = ['consensus_cell_type'],
        cluster_markers = {'consensus_cell_type': markers_dict_dict[key]},
        tree_structure = ('scATAC-seq_Benchmark', 'ATAC_library_downsampled', 'Region_accessibility'),
        title = s + ' - Region accessibility all',
        nomenclature = "hg38"
    )

In [None]:
len([ x.split('-')[0] + '-' + x.split('-')[1]  for x in cistopic_obj_dict[key].cell_names ])

### Gene activity

In [None]:
from ctxcore.genesig import Regulon

# generate a dummy regulon (required for export_gene_activity_to_loom):
phreg = Regulon(
        name='placeholder regulon',
        gene2weight={'phreg': 1.0},
        transcription_factor="phreg",
        gene2occurrence={"phreg": 1},
    )

In [None]:
for key in cistopic_obj_dict.keys():
    print(key)
    s = 'merged' if key=='merged' else sample_annot.loc[key,'Sample']
    f_out = os.path.join(f_gene_loom_dir, s + '__libDS_gene_activity.loom')
    if os.path.exists(f_out):
        print(f"Skipping {f_out}: already exists.")
        continue
    export_gene_activity_to_loom(
        gene_activity_matrix = gene_act_dict[key],
        cistopic_obj = cistopic_obj_dict[key], 
        regulons = [phreg],
        selected_cells = [ x.split('-')[0] + '-' + x.split('-')[1]  for x in cistopic_obj_dict[key].cell_names ], # this leaves a cell barcode of the format type 'TGCATGTCGCCGTTCCAAGA-21'
        # selected_cells = cistopic_obj_dict[key].projections['cell']['UMAP'].index.tolist(), # cflerin original        out_fname = f_out,
        cluster_annotation = ['consensus_cell_type'],
        cluster_markers = {'consensus_cell_type': markers_dict_dict[key]},
        tree_structure = ('scATAC-seq_Benchmark', 'ATAC_library_downsampled', 'Gene_activity'),
        title = s + ' - Gene activity',
        nomenclature = "hg38"
    )