In [None]:
rm(list=ls())

In [None]:
ls.str()

In [1]:
library(Seurat)
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(data.table))
library(DoubletFinder)

Attaching SeuratObject



In [5]:
library(future)
library(future.apply)
plan("multicore", workers = 10) 
options(future.globals.maxSize = 100 * 1024^3)

In [6]:
rawdir <- paste0(getwd(), '/data/')
tmpdir <- paste0(getwd(), '/tmp/')
outdir <- paste0(getwd(), '/output/')
if(!dir.exists(tmpdir)) dir.create(tmpdir)
if(!dir.exists(outdir)) dir.create(outdir)

In [7]:
samples <- list.files(rawdir)
samples

# 批量运行
- DoubletFinder对相同细胞类型构成的doublets不敏感，因为这些细胞在表达特征上与真实的单细胞没有明显的差异

## （1）不使用SCT

In [8]:
starttime <- Sys.time()
slist <- future_lapply(samples,function(i){
    idir <- file.path(rawdir, i)
    a <- fread(idir, data.table = F) %>% column_to_rownames('V1')
    TenXdat <- CreateSeuratObject(counts = a, min.cells = 3, min.features = 500, project = strsplit(i,'[.]')[[1]][1])
    TenXdat[["percent.mt"]] <- PercentageFeatureSet(TenXdat, pattern = "^MT-")
    TenXdat <- subset(TenXdat, subset = nFeature_RNA > 200 & nFeature_RNA < 25000 & percent.mt < 25 & nCount_RNA > 1000 & nCount_RNA < 500000)
    
    # 给UMI添加project特异标签。提供的UMI已添加project标签，无需自行添加
    #prefix <- paste(strsplit(i,'_')[[1]][c(2,4,5)], collapse = '_')
    #NewCellName <- paste(prefix, colnames(TenXdat), sep = '_')
    #TenXdat <- RenameCells(TenXdat, NewCellName)
    
    #TenXdat <- RenameCells(TenXdat, add.cell.id = paste(strsplit(i,'_')[[1]][c(2,4,5)], collapse = '_'))
    
    TenXdat@meta.data$orig.ident <- paste(strsplit((TenXdat@project.name),"_")[[1]][c(4,5)], collapse = '_')
    TenXdat@meta.data$Patient <- strsplit((TenXdat@project.name),"_")[[1]][4]
    TenXdat@meta.data$Source <- strsplit((TenXdat@project.name),"_")[[1]][5]
    TenXdat@meta.data$barcode <- rownames(TenXdat@meta.data)

    TenXdat <- NormalizeData(TenXdat)
    TenXdat <- ScaleData(TenXdat, vars.to.regress = c("percent.mt"))
    TenXdat <- FindVariableFeatures(TenXdat, selection.method = "vst", nfeatures = 2000)
    TenXdat <- RunPCA(TenXdat, features = VariableFeatures(object = TenXdat), verbose = FALSE)
    
    pc.num = 1:20
    TenXdat <- FindNeighbors(TenXdat, dims = pc.num)
    TenXdat <- FindClusters(object = TenXdat, resolution = 0.5)
    #TenXdat <- RunTSNE(object = TenXdat, dims = pc.num, reduction.use = "pca")
    TenXdat <- RunUMAP(object = TenXdat, dims = pc.num)
    
    # 寻找最优pK值
    TenXdat_db <- TenXdat  # 用新变量跑DoubletFinder
    sweep.res.list_TenXdat <- paramSweep_v3(TenXdat_db, PCs = pc.num, sct = FALSE)
    sweep.stats_TenXdat<- summarizeSweep(sweep.res.list_TenXdat, GT = FALSE)
    bcmvn_TenXdat <- find.pK(sweep.stats_TenXdat)
    pK_bcmvn <- bcmvn_TenXdat$pK[which.max(bcmvn_TenXdat$BCmetric)] %>% as.character() %>% as.numeric()
    
    # 排除不能检出的同源doublets，优化期望的doublets数量
    DoubletRate = 0.05    # 5000细胞对应的doublets rate是3.9%，见https://cloud.tencent.com/developer/article/1825672
    homotypic.prop <- modelHomotypic(TenXdat_db@meta.data$seurat_clusters)   # 最好提供celltype（注释之后的？）
    nExp_poi <- round(DoubletRate*ncol(TenXdat_db))
    nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
    
    TenXdat_db <- doubletFinder_v3(TenXdat_db, PCs = pc.num, pN = 0.25, pK = pK_bcmvn, nExp = nExp_poi.adj, reuse.pANN = F, sct = F)
    colnames(TenXdat_db@meta.data)[grep('^DF',colnames(TenXdat_db@meta.data))] <- "DF"
    
    TenXdat$doublets <- TenXdat_db$DF
    TenXdat
})
endtime <- Sys.time()

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 10761
Number of edges: 403669

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9314
Number of communities: 19
Elapsed time: 4 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22...

“检测到 10761 个列名，然而数据共有 10762 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:54:29 UMAP embedding parameters a = 0.9922 b = 1.112

06:54:29 Read 10761 rows and found 20 numeric columns

06:54:29 Using Annoy for neighbor search, n_neighbors = 30

06:54:29 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9627
Number of edges: 356350

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9052
Number of communities: 15
Elapsed time: 4 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9627 个列名，然而数据共有 9628 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:38:07 UMAP embedding parameters a = 0.9922 b = 1.112

06:38:07 Read 9627 rows and found 20 numeric columns

06:38:07 Using Annoy for neighbor search, n_neighbors = 30

06:38:07 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9872
Number of edges: 361088

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9115
Number of communities: 14
Elapsed time: 6 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9872 个列名，然而数据共有 9873 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:10:24 UMAP embedding parameters a = 0.9922 b = 1.112

06:10:24 Read 9872 rows and found 20 numeric columns

06:10:24 Using Annoy for neighbor search, n_neighbors = 30

06:10:24 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7564
Number of edges: 276029

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8987
Number of communities: 13
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 7564 个列名，然而数据共有 7565 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:57:34 UMAP embedding parameters a = 0.9922 b = 1.112

05:57:34 Read 7564 rows and found 20 numeric columns

05:57:34 Using Annoy for neighbor search, n_neighbors = 30

05:57:34 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8398
Number of edges: 326688

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9213
Number of communities: 16
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 8398 个列名，然而数据共有 8399 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:35:06 UMAP embedding parameters a = 0.9922 b = 1.112

05:35:06 Read 8398 rows and found 20 numeric columns

05:35:06 Using Annoy for neighbor search, n_neighbors = 30

05:35:06 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9719
Number of edges: 348276

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8830
Number of communities: 15
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9720 个列名，然而数据共有 9721 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:24:22 UMAP embedding parameters a = 0.9922 b = 1.112

05:24:22 Read 9719 rows and found 20 numeric columns

05:24:22 Using Annoy for neighbor search, n_neighbors = 30

05:24:22 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9812
Number of edges: 361191

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8934
Number of communities: 17
Elapsed time: 3 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9812 个列名，然而数据共有 9813 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:17:04 UMAP embedding parameters a = 0.9922 b = 1.112

06:17:04 Read 9812 rows and found 20 numeric columns

06:17:05 Using Annoy for neighbor search, n_neighbors = 30

06:17:05 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9782
Number of edges: 344945

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8823
Number of communities: 14
Elapsed time: 3 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9782 个列名，然而数据共有 9783 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:08:14 UMAP embedding parameters a = 0.9922 b = 1.112

06:08:14 Read 9782 rows and found 20 numeric columns

06:08:14 Using Annoy for neighbor search, n_neighbors = 30

06:08:14 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9542
Number of edges: 357067

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9153
Number of communities: 18
Elapsed time: 8 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9542 个列名，然而数据共有 9543 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:09:07 UMAP embedding parameters a = 0.9922 b = 1.112

06:09:07 Read 9542 rows and found 20 numeric columns

06:09:07 Using Annoy for neighbor search, n_neighbors = 30

06:09:07 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 10298
Number of edges: 350759

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8807
Number of communities: 13
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22...

“检测到 10299 个列名，然而数据共有 10300 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:43:35 UMAP embedding parameters a = 0.9922 b = 1.112

06:43:35 Read 10298 rows and found 20 numeric columns

06:43:35 Using Annoy for neighbor search, n_neighbors = 30

06:43:35 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9288
Number of edges: 350877

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9083
Number of communities: 15
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 9288 个列名，然而数据共有 9289 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:30:00 UMAP embedding parameters a = 0.9922 b = 1.112

05:30:00 Read 9288 rows and found 20 numeric columns

05:30:00 Using Annoy for neighbor search, n_neighbors = 30

05:30:00 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8345
Number of edges: 312920

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8818
Number of communities: 11
Elapsed time: 5 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 8347 个列名，然而数据共有 8348 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:28:51 UMAP embedding parameters a = 0.9922 b = 1.112

05:28:51 Read 8345 rows and found 20 numeric columns

05:28:51 Using Annoy for neighbor search, n_neighbors = 30

05:28:51 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9529
Number of edges: 362210

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9194
Number of communities: 12
Elapsed time: 7 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9529 个列名，然而数据共有 9530 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:41:53 UMAP embedding parameters a = 0.9922 b = 1.112

05:41:53 Read 9529 rows and found 20 numeric columns

05:41:53 Using Annoy for neighbor search, n_neighbors = 30

05:41:53 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 5923
Number of edges: 217528

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8652
Number of communities: 12
Elapsed time: 1 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 5923 个列名，然而数据共有 5924 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:19:30 UMAP embedding parameters a = 0.9922 b = 1.112

05:19:30 Read 5923 rows and found 20 numeric columns

05:19:30 Using Annoy for neighbor search, n_neighbors = 30

05:19:30 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 9592
Number of edges: 354080

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9039
Number of communities: 13
Elapsed time: 6 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.001..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."

“检测到 9593 个列名，然而数据共有 9594 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:34:42 UMAP embedding parameters a = 0.9922 b = 1.112

05:34:42 Read 9592 rows and found 20 numeric columns

05:34:42 Using Annoy for neighbor search, n_neighbors = 30

05:34:42 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6461
Number of edges: 237174

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9012
Number of communities: 13
Elapsed time: 1 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 6461 个列名，然而数据共有 6462 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:43:56 UMAP embedding parameters a = 0.9922 b = 1.112

05:43:56 Read 6461 rows and found 20 numeric columns

05:43:56 Using Annoy for neighbor search, n_neighbors = 30

05:43:56 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8224
Number of edges: 318443

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9109
Number of communities: 14
Elapsed time: 3 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 8224 个列名，然而数据共有 8225 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
05:53:25 UMAP embedding parameters a = 0.9922 b = 1.112

05:53:25 Read 8224 rows and found 20 numeric columns

05:53:25 Using Annoy for neighbor search, n_neighbors = 30

05:53:25 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7546
Number of edges: 259941

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8873
Number of communities: 13
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 7546 个列名，然而数据共有 7547 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:09:18 UMAP embedding parameters a = 0.9922 b = 1.112

06:09:18 Read 7546 rows and found 20 numeric columns

06:09:18 Using Annoy for neighbor search, n_neighbors = 30

06:09:18 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8806
Number of edges: 329525

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9162
Number of communities: 13
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 8806 个列名，然而数据共有 8807 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:06:08 UMAP embedding parameters a = 0.9922 b = 1.112

06:06:08 Read 8806 rows and found 20 numeric columns

06:06:08 Using Annoy for neighbor search, n_neighbors = 30

06:06:08 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7353
Number of edges: 249141

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9019
Number of communities: 12
Elapsed time: 2 seconds
[1] "Creating artificial doublets for pN = 5%"
[1] "Creating Seurat object..."
[1] "Normalizing Seurat object..."
[1] "Finding variable genes..."
[1] "Scaling data..."
[1] "Running PCA..."
[1] "Calculating PC distance matrix..."
[1] "Defining neighborhoods..."
[1] "Computing pANN across all pK..."
[1] "pK = 0.005..."
[1] "pK = 0.01..."
[1] "pK = 0.02..."
[1] "pK = 0.03..."
[1] "pK = 0.04..."
[1] "pK = 0.05..."
[1] "pK = 0.06..."
[1] "pK = 0.07..."
[1] "pK = 0.08..."
[1] "pK = 0.09..."
[1] "pK = 0.1..."
[1] "pK = 0.11..."
[1] "pK = 0.12..."
[1] "pK = 0.13..."
[1] "pK = 0.14..."
[1] "pK = 0.15..."
[1] "pK = 0.16..."
[1] "pK = 0.17..."
[1] "pK = 0.18..."
[1] "pK = 0.19..."
[1] "pK = 0.2..."
[1] "pK = 0.21..."
[1] "pK = 0.22..."
[1] "pK = 0.23..."


“检测到 7353 个列名，然而数据共有 7354 列（文件不合法）。添加了 1 个额外列名作为第一列，并被用于猜测行名或索引。若上述猜测不正确，可在后续使用setnames()进行修改，或修复用于生成该文件的文件写入命令以生成有效的文件。”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Regressing out percent.mt

Centering and scaling data matrix

Computing nearest neighbor graph

Computing SNN

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
06:24:16 UMAP embedding parameters a = 0.9922 b = 1.112

06:24:16 Read 7353 rows and found 20 numeric columns

06:24:16 Using Annoy for neighbor search, n_neighbors = 30

06:24:16 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*

20*约8000个细胞，10线程，100G，单跑tSNE约15min，跑tSNE+UMAP约20min

In [None]:
endtime-starttime

In [None]:
saveRDS(slist, file = paste0(tmpdir, 'raw_standard_slist.Rds'))

## （2）使用SCT标准化

In [None]:
starttime <- Sys.time()
slist <- future_lapply(samples,function(i){
    idir <- file.path(rawdir, i)
    a <- fread(idir, data.table = F) %>% column_to_rownames('V1')
    TenXdat <- CreateSeuratObject(counts = a, min.cells = 3, min.features = 500, project = strsplit(i,'[.]')[[1]][1])
    TenXdat[["percent.mt"]] <- PercentageFeatureSet(TenXdat, pattern = "^MT-")
    TenXdat <- subset(TenXdat, subset = nFeature_RNA > 100 & nFeature_RNA < 25000 & percent.mt < 15 & nCount_RNA > 500 & nCount_RNA < 500000)
    
    # 给UMI添加project特异标签。提供的UMI已添加project标签，无需自行添加
    #prefix <- paste(strsplit(i,'_')[[1]][c(2,4,5)], collapse = '_')
    #NewCellName <- paste(prefix, colnames(TenXdat), sep = '_')
    #TenXdat <- RenameCells(TenXdat, NewCellName)
    
    TenXdat@meta.data$orig.ident <- paste(strsplit((TenXdat@project.name),"_")[[1]][c(4,5)], collapse = '_')
    TenXdat@meta.data$Patient <- strsplit((TenXdat@project.name),"_")[[1]][4]
    TenXdat@meta.data$Source <- strsplit((TenXdat@project.name),"_")[[1]][5]

    TenXdat <- SCTransform(TenXdat, vars.to.regress = c("percent.mt"), verbose = FALSE)
    TenXdat <- RunPCA(TenXdat, features = VariableFeatures(object = TenXdat), verbose = FALSE)
    
    pc.num = 1:20
    TenXdat <- FindNeighbors(TenXdat, dims = pc.num)
    TenXdat <- FindClusters(object = TenXdat, resolution = 0.5)
    TenXdat <- RunTSNE(object = TenXdat, dims = pc.num, reduction.use = "pca")
    TenXdat <- RunUMAP(object = TenXdat, dims = pc.num)
    
    TenXdat_db <- TenXdat  # 用新变量跑DoubletFinder
    sweep.res.list_TenXdat <- paramSweep_v3(TenXdat_db, PCs = pc.num, sct = TRUE)
    sweep.stats_TenXdat<- summarizeSweep(sweep.res.list_TenXdat, GT = FALSE)
    bcmvn_TenXdat <- find.pK(sweep.stats_TenXdat)
    pK_bcmvn <- bcmvn_TenXdat$pK[which.max(bcmvn_TenXdat$BCmetric)] %>% as.character() %>% as.numeric()
    
    DoubletRate = 0.05    # 5000细胞对应的doublets rate是3.9%，见https://cloud.tencent.com/developer/article/1825672
    homotypic.prop <- modelHomotypic(TenXdat_db@meta.data$seurat_clusters)   # 最好提供celltype
    nExp_poi <- round(DoubletRate*ncol(TenXdat_db))
    nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
    
    TenXdat_db <- doubletFinder_v3(TenXdat_db, PCs = pc.num, pN = 0.25, pK = pK_bcmvn, nExp = nExp_poi.adj, reuse.pANN = F, sct = TRUE)
    colnames(TenXdat_db@meta.data)[grep('^DF',colnames(TenXdat_db@meta.data))] <- "DF"
    
    TenXdat$doublets <- TenXdat_db$DF
    TenXdat
})
endtime <- Sys.time()

In [9]:
endtime-starttime

Time difference of 2.365998 hours

In [10]:
saveRDS(slist, file = paste0(tmpdir, 'raw_sct_slist_2.Rds'))

# 可以先保留DoubletFinder的结果，后续亚群分析时再筛掉

In [None]:
for (i in 1:length(slist)){
  slist[[i]] <- subset(slist[[i]], doublets == "Singlet")
}