In [1]:
%matplotlib inline
import scanpy as sc
import scrublet as scr
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import gzip

In [2]:
## rewrite 'load_genes' in scrublet to make it compatiable to gzip files
def load_genes(filename, delimiter='\t', column=0, skip_rows=0):
    gene_list = []
    gene_dict = {}

    with gzip.open(filename) as f:
        for iL in range(skip_rows):
            f.readline()
        for l in f:
            gene = l.decode().strip('\n').split(delimiter)[column]
            if gene in gene_dict:
                gene_dict[gene] += 1
                gene_list.append(gene + '__' + str(gene_dict[gene]))
                if gene_dict[gene] == 2:
                    i = gene_list.index(gene)
                    gene_list[i] = gene + '__1'
            else: 
                gene_dict[gene] = 1
                gene_list.append(gene)
    return gene_list

In [3]:
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rc('font', size=14)
plt.rcParams['pdf.fonttype'] = 42

In [4]:
input_dir = '/cndd2/junhao/ALS_FTD_singleCell/run_cellBender_on_raw_snRNA/'

sample_name = [
"MCX_ALS_110",
"MCX_ALS_111",
"MCX_ALS_113",
"MCX_ALS_332",
"MCX_ALS_388",
"MCX_ALS_52",
"MCX_Control_1069",
"MCX_Control_902",
"MCX_Control_904",
"MCX_Control_906",
"MCX_Control_91",
"MCX_Control_945",
"MCX_FTD_36",
"MCX_FTD_54",
"MCX_FTD_55",
"MCX_FTD_61",
"MCX_FTD_674",
"MCX_FTD_908",
"mFCX_ALS_110",
"mFCX_ALS_111",
"mFCX_ALS_113",
"mFCX_ALS_332",
"mFCX_ALS_388",
"mFCX_ALS_52",
"mFCX_Control_1069",
"mFCX_Control_902",
"mFCX_Control_904",
"mFCX_Control_906",
"mFCX_Control_91",
"mFCX_Control_945",
"mFCX_FTD_36",
# "mFCX_FTD_54", # removed because cellBender failed to create h5 file (no cells left)
"mFCX_FTD_55",
"mFCX_FTD_61",
"mFCX_FTD_674",
"mFCX_FTD_908",
]


for selected_sample in sample_name:
    
    adata = sc.read_10x_h5(input_dir + selected_sample + '_cellBender_corrected_filtered.h5')
    adata.var_names_make_unique()
    
    counts_matrix = adata.X.tocsc()
    genes = adata.var_names.values
    barcode = adata.obs_names.values
    barcode = np.array([selected_sample + '_' + item.split('-')[0] for item in barcode])

    print('Counts matrix shape: {} rows(cells), {} columns(genes)'.format(counts_matrix.shape[0], counts_matrix.shape[1]))

    scrub = scr.Scrublet(counts_matrix, expected_doublet_rate=0.06)

    doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=2, 
                                                              min_cells=3, 
                                                              min_gene_variability_pctl=85, 
                                                              n_prin_comps=30)
    
    # manually set the threshold to call doublet to 0.2
    scrub.call_doublets(threshold=0.2)

    df = pd.DataFrame()
    df['barcode'] = barcode
    df['doublet_scores'] = scrub.doublet_scores_obs_
    df['predicted_doublets'] = scrub.predicted_doublets_
    
    df.to_csv('{}_doublet_score_callThreshold_0.2.tsv'.format(selected_sample), sep="\t", header=True, index=False)
    
    fig, axs=scrub.plot_histogram()
    fig.savefig('{}_doublet_score_hist_callThreshold_0.2.pdf'.format(selected_sample))
    plt.close()

    print('Running UMAP...')
    scrub.set_embedding('UMAP', scr.get_umap(scrub.manifold_obs_, 10, min_dist=0.3))
    print('Done.')
    
    fig, axs=scrub.plot_embedding('UMAP', order_points=True)
    fig.savefig('{}_doublet_score_umap_callThreshold_0.2.pdf'.format(selected_sample))
    plt.close()

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6931 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.23
Detected doublet rate = 3.0%
Estimated detectable doublet fraction = 37.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.0%
Elapsed time: 22.9 seconds
Detected doublet rate = 3.4%
Estimated detectable doublet fraction = 40.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.2%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7026 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.37
Detected doublet rate = 1.1%
Estimated detectable doublet fraction = 28.4%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 3.9%
Elapsed time: 21.9 seconds
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 43.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.6%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7272 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.63
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 3.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 3.6%
Elapsed time: 26.7 seconds
Detected doublet rate = 3.1%
Estimated detectable doublet fraction = 36.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.4%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 4783 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.27
Detected doublet rate = 2.0%
Estimated detectable doublet fraction = 19.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 9.9%
Elapsed time: 11.4 seconds
Detected doublet rate = 3.2%
Estimated detectable doublet fraction = 27.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 11.5%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7254 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Elapsed time: 14.5 seconds
Detected doublet rate = 3.3%
Estimated detectable doublet fraction = 39.3%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.3%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7501 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.22
Detected doublet rate = 3.5%
Estimated detectable doublet fraction = 40.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.9%
Elapsed time: 19.9 seconds
Detected doublet rate = 3.8%
Estimated detectable doublet fraction = 41.2%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 9.1%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5971 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.23
Detected doublet rate = 3.0%
Estimated detectable doublet fraction = 42.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.1%
Elapsed time: 15.2 seconds
Detected doublet rate = 3.5%
Estimated detectable doublet fraction = 45.4%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.6%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6927 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.19
Detected doublet rate = 4.4%
Estimated detectable doublet fraction = 45.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 9.7%
Elapsed time: 15.6 seconds
Detected doublet rate = 4.2%
Estimated detectable doublet fraction = 44.6%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 9.4%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5977 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.29
Detected doublet rate = 2.6%
Estimated detectable doublet fraction = 38.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.8%
Elapsed time: 16.1 seconds
Detected doublet rate = 5.1%
Estimated detectable doublet fraction = 50.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.0%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5667 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.28
Detected doublet rate = 2.1%
Estimated detectable doublet fraction = 31.3%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.8%
Elapsed time: 14.2 seconds
Detected doublet rate = 3.2%
Estimated detectable doublet fraction = 40.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.0%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5373 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.40
Detected doublet rate = 0.9%
Estimated detectable doublet fraction = 24.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 3.7%
Elapsed time: 14.5 seconds
Detected doublet rate = 3.0%
Estimated detectable doublet fraction = 41.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.4%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7828 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.34
Detected doublet rate = 1.8%
Estimated detectable doublet fraction = 28.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.1%
Elapsed time: 21.1 seconds
Detected doublet rate = 4.3%
Estimated detectable doublet fraction = 47.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 9.0%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6650 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.23
Detected doublet rate = 6.0%
Estimated detectable doublet fraction = 37.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 15.8%
Elapsed time: 7.7 seconds
Detected doublet rate = 6.7%
Estimated detectable doublet fraction = 39.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 16.8%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6314 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.22
Detected doublet rate = 5.6%
Estimated detectable doublet fraction = 30.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 18.4%
Elapsed time: 7.1 seconds
Detected doublet rate = 6.1%
Estimated detectable doublet fraction = 31.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 19.3%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 4467 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.57
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 1.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.5%
Elapsed time: 7.9 seconds
Detected doublet rate = 3.7%
Estimated detectable doublet fraction = 31.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 11.9%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7867 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.23
Detected doublet rate = 4.0%
Estimated detectable doublet fraction = 38.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.3%
Elapsed time: 9.9 seconds
Detected doublet rate = 4.6%
Estimated detectable doublet fraction = 41.3%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 11.2%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 1524 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 4.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 4.0%
Elapsed time: 2.9 seconds
Detected doublet rate = 1.5%
Estimated detectable doublet fraction = 19.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.6%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 8178 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.23
Detected doublet rate = 3.9%
Estimated detectable doublet fraction = 37.3%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.4%
Elapsed time: 14.0 seconds
Detected doublet rate = 4.5%
Estimated detectable doublet fraction = 41.2%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.8%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5740 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.61
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 2.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.1%
Elapsed time: 11.8 seconds
Detected doublet rate = 3.1%
Estimated detectable doublet fraction = 39.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.8%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5627 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.61
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 4.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 3.0%
Elapsed time: 9.6 seconds
Detected doublet rate = 5.3%
Estimated detectable doublet fraction = 42.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 12.5%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5269 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.58
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 7.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 2.7%
Elapsed time: 11.1 seconds
Detected doublet rate = 2.8%
Estimated detectable doublet fraction = 36.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.6%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6586 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.27
Detected doublet rate = 2.1%
Estimated detectable doublet fraction = 26.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.8%
Elapsed time: 12.9 seconds
Detected doublet rate = 3.1%
Estimated detectable doublet fraction = 35.9%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.7%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6441 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.63
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 2.6%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 3.0%
Elapsed time: 14.1 seconds
Detected doublet rate = 2.7%
Estimated detectable doublet fraction = 29.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.9%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5042 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.59
Detected doublet rate = 0.2%
Estimated detectable doublet fraction = 5.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 2.8%
Elapsed time: 12.4 seconds
Detected doublet rate = 6.9%
Estimated detectable doublet fraction = 42.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 16.2%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5742 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.25
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 38.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.6%
Elapsed time: 13.4 seconds
Detected doublet rate = 3.3%
Estimated detectable doublet fraction = 41.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.9%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7556 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.25
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 39.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.3%
Elapsed time: 17.4 seconds
Detected doublet rate = 3.3%
Estimated detectable doublet fraction = 43.7%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.6%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7956 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.27
Detected doublet rate = 2.7%
Estimated detectable doublet fraction = 38.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.2%
Elapsed time: 19.9 seconds
Detected doublet rate = 4.0%
Estimated detectable doublet fraction = 47.3%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.4%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6427 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.6%
Estimated detectable doublet fraction = 20.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 2.9%
Elapsed time: 15.4 seconds
Detected doublet rate = 2.6%
Estimated detectable doublet fraction = 38.6%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.7%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 5548 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.50
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 1.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.3%
Elapsed time: 9.0 seconds
Detected doublet rate = 11.4%
Estimated detectable doublet fraction = 33.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 34.3%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7673 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.25
Detected doublet rate = 2.8%
Estimated detectable doublet fraction = 40.5%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.9%
Elapsed time: 14.9 seconds
Detected doublet rate = 3.6%
Estimated detectable doublet fraction = 45.2%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 8.0%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7856 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.20
Detected doublet rate = 5.1%
Estimated detectable doublet fraction = 44.4%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 11.5%
Elapsed time: 14.2 seconds
Detected doublet rate = 5.1%
Estimated detectable doublet fraction = 44.4%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 11.5%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 3945 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.56
Detected doublet rate = 0.3%
Estimated detectable doublet fraction = 5.0%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.1%
Elapsed time: 6.7 seconds
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 28.6%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.2%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 7623 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.22
Detected doublet rate = 4.4%
Estimated detectable doublet fraction = 41.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.5%
Elapsed time: 11.5 seconds
Detected doublet rate = 4.8%
Estimated detectable doublet fraction = 44.1%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 10.9%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 3119 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.50
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 11.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 0.3%
Elapsed time: 6.1 seconds
Detected doublet rate = 1.8%
Estimated detectable doublet fraction = 26.2%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 7.0%
Running UMAP...
Done.


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Counts matrix shape: 6589 rows(cells), 33538 columns(genes)
Preprocessing...


  w.setdiag(float(target_total) / tots_use)


Simulating doublets...
Embedding transcriptomes using PCA...
Calculating doublet scores...
Automatically set threshold at doublet score = 0.25
Detected doublet rate = 2.2%
Estimated detectable doublet fraction = 39.8%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 5.5%
Elapsed time: 14.4 seconds
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 45.6%
Overall doublet rate:
	Expected   = 6.0%
	Estimated  = 6.3%
Running UMAP...
Done.
