### Load libraries and functions

In [1]:
#Load libraries
library(Seurat)
library(Rcpp)
library(parallelDist)
library(stringr)
library(viridis)
library(ggplot2)
library(cowplot)
library(cluster)
library(data.table)
library(foreach)
library(doParallel)
library(proxy)
library(ComplexHeatmap)
library(circlize)
library(igraph)
library(qvalue)
library(dplyr)
library(viridis)
library(VGAM)
library(forcats)
library(grDevices)
library(graphics)
library(RColorBrewer)
library(pheatmap)
library(Cairo)
library(reshape2)
library(R.utils)
set.seed(seed = 42)

Attaching SeuratObject

Loading required package: viridisLite

Loading required package: iterators

Loading required package: parallel


Attaching package: ‘proxy’


The following objects are masked from ‘package:stats’:

    as.dist, dist


The following object is masked from ‘package:base’:

    as.matrix


Loading required package: grid

ComplexHeatmap version 2.11.1
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference

If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
  genomic data. Bioinformatics 2016.

The new InteractiveComplexHeatmap package can directly export static 
complex heatmaps into an interactive Shiny app with zero effort. Have a try!

This message can be suppressed by:
  suppressPackageStartupMessages(library(ComplexHeatmap))


circlize version 0.4.14
CRA

In [2]:
#load basic functions
createEmptyDf = function( nrow, ncol, colnames = c() ){
  data.frame( matrix( vector(), nrow, ncol, dimnames = list( c(), colnames ) ) )
}

tableread_fast = function(i, header=TRUE, quote="", sep=","){
  tmp = fread(i, header=header, sep=sep, quote=quote, nThread=32)
  tmp = as.data.frame(tmp)
  return(tmp)
}

### Create Seurat object

In [6]:
#load data
#Put gene list in the target panel in the working directory ("BD_genes_plus25")

###Input layer
set.seed(seed = 42)

sample.name <- "matrix_inflection_demulti_aCD4.txt"
input_dir <- "raw.data"

################################ Processing layer ##########################################
#Create input file name
input_name <- str_c(input_dir, sample.name, sep = "/") #Name of dataset1

#Unzip input file
compressed.name <- str_c(input_name, ".gz", sep = "")
gunzip(compressed.name)

#Create Seurat object
matrix <- tableread_fast(input_name, header = TRUE, quote="", sep="\t")
row.names(matrix) <- matrix$V1
matrix <- dplyr::select(matrix, -V1)
seu1 <- CreateSeuratObject(counts=matrix, project = "seu", min.cells = 3, min.features = 10)

In [7]:
#load dataset 2
#Put gene list in the target panel in the working directory ("BD_genes_plus10")

###Input layer
set.seed(seed = 42)

sample.name <- "matrix_inflection_demulti_Cont_aPDL1.txt"
input_dir <- "raw.data"

################################ Processing layer ##########################################
#Create input file name
input_name <- str_c(input_dir, sample.name, sep = "/") #Name of dataset1

#Unzip input file
compressed.name <- str_c(input_name, ".gz", sep = "")
gunzip(compressed.name)

#Create Seurat object
matrix <- tableread_fast(input_name, header = TRUE, quote="", sep="\t")
row.names(matrix) <- matrix$V1
matrix <- dplyr::select(matrix, -V1)
seu2 <- CreateSeuratObject(counts=matrix, project = "seu", min.cells = 3, min.features = 10)

In [8]:
#Merge two datasets
seu <- merge(x=seu1, y = seu2)

### Preprocessing for excluding check non-T cell contamination

In [3]:
###Function: perform Seurat pipeline: Normalizing, Scaling, PCA, JackStraw, dimentional reduction, find clusters
##Input: seurat object
##Output: seurat object, JackStraw Plot

Preprocess <- function(seu, dir.name, sample.name){
    # normalizing data
    seu = NormalizeData(object = seu, scale.factor=1000000)
    VariableFeatures(seu) <- rownames(seu[["RNA"]]) #Variable features: all genes
    ngenes <- length(x = seu@assays$RNA@var.features)
    all.genes <- rownames(seu)
    seu = ScaleData(object = seu, vars.to.regress = c("nCount_RNA"), features = all.genes) 

    # perform PCA
    seu = RunPCA(object = seu, features = seu@assays$RNA@var.features, npcs = 50)

    #JackStraw
    seu = JackStraw(object = seu, num.replicate = 100, dims = 50)
    seu <- ScoreJackStraw(object = seu, dims = 1:50, score.thresh = 0.05)
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("_JackStraw.png", sep='')
    png(file.name, width = 1250, height = 500)
    p <- JackStrawPlot(object = seu, dims = 1:50)
    plot(p)
    dev.off()

    #Determine PCs used for clustering/tSNE analysis (dims.use)
    #Extract PCs which fulfill the pvalue threshold
    tmp = as.data.frame(seu@reductions$pca@jackstraw@overall.p.values)
    tmp1 = tmp[tmp$Score>0.05,1]
    dims= c(1:(min(tmp1)-1))

    ##Perform dimentional reduction
    seu <- RunUMAP(object = seu, dims = 1:(min(tmp1)-1))

    ##Find clusters
    seu = FindNeighbors(seu, reduction = "pca", dims = dims, force.recalc = TRUE)
    
    return(seu)
}


In [4]:
###Function: DimPlot with sample origin
##Input: seurat object
##Output: DimplotStraw Plot

DimOrigin <- function(seu, dir.name, sample.name, red.use, resol){
    p1 = DimPlot(object = seu, reduction = red.use, label = TRUE, label.size = 10, pt.size = 0.5) +
      theme(axis.title.x = element_text(size=10, family = "Arial"), 
            axis.title.y = element_text(size=10, family = "Arial"), 
            axis.text.x = element_text(size=10, colour = 1, family = "Arial"), 
            axis.text.y = element_text(size = 10, colour = 1, family = "Arial")) +
      theme(panel.border = element_rect(fill = NA, size = 1)) 
    p2 = DimPlot(object = seu, reduction = red.use, label = FALSE, label.size = 10, pt.size = 0.5, group.by = "orig.ident") +
      theme(axis.title.x = element_text(size=10, family = "Arial"),
            axis.title.y = element_text(size=10, family = "Arial"),
            axis.text.x = element_text(size=10, colour = 1, family = "Arial"),
            axis.text.y = element_text(size = 10, colour = 1, family = "Arial")) +
      theme(panel.border = element_rect(fill = NA, size = 1)) 

    legend1 <- cowplot::get_legend(p1)
    legend2 <- cowplot::get_legend(p2)
    p1 = p1 + theme(legend.position = 'none')
    p2 = p2 + theme(legend.position = 'none')
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c(red.use, "reso", resol, "png", sep='.')
    save_plot(file = file.name, plot_grid(p1, legend1, p2, legend2, ncol=2, nrow=2), device="png", 
              units="in", dpi = 600, base_width = 10, base_height = 10, limitsize=FALSE)
}

In [5]:
###Function: Find marker genes with marker heatmap
##Input: seurat object
##Output: Marker gene table and marker gene heatmap plot

MarkerHeatmap <- function(seu, dir.name, sample.name, resol){
    seu.markers = FindAllMarkers(seu, verbose = TRUE, test.use="wilcox", only.pos=TRUE, min.pct=0.1, features.use = NULL, return.thresh=0.05)

    #Create heatmap with top10 marker genes
    top10 = seu.markers %>% group_by(cluster) %>% top_n(10, avg_log2FC)
    top10 = as.data.frame(top10)
    top10  = top10 [!duplicated(top10$gene),]
    top10 = top10 %>% arrange(desc(avg_log2FC))  %>% arrange(cluster)
    top10 = as.data.frame(top10)
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("marker_res", resol, "png", sep='.')
    p <- DoHeatmap(seu, features = top10$gene, disp.min = -2.5, disp.max = 2.5, size = 8)
    ggsave(file = file.name, plot = p, device="png", units="in", dpi = 300,
           width = 20, height = 20, limitsize=FALSE)

    seu.markers$cluster = as.numeric(seu.markers$cluster)
    seu.markers = seu.markers %>% arrange(desc(avg_log2FC))  %>% arrange(cluster)
    seu.markers = as.data.frame(seu.markers)
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("ALLmarkers_minpct0.1_Adj_p0.05.txt", sep='')
    fwrite(seu.markers, file.name, row.names=F, col.names=T, sep="\t", quote=F)
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("ALLmarkers_minpct0.1_Adj_p0.05.rda", sep='')
    save(seu.markers, file=file.name)
}

In [6]:
###Function: Create Scatter and Violin plot for selected genes
##Input: seurat object, gene list
##Output: Scatter plot, Violin plot

ScatterViolin <- function(seu, dir.name, sample.name, red.use, tmps, tmp_names){
    for (i in 1:length(tmp_names)){
        #Call gene list
        tmp <- tmps[[i]]
        tmp_name <- tmp_names[i]
  
        #Scatter Plot
        file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("Scatter", tmp_name, "png", sep='.')
        png(file.name, width = 1536, height = 240)
        p <- FeaturePlot(seu, features = tmp, ncol = 6, order = TRUE,
                        reduction = red.use, dims=c(1,2), cols = c("grey", "red"), pt.size = 0.2)
        plot(p)
        dev.off()
        
        #Violin Plot
        file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("Violin", tmp_name, "png", sep='.')
        png(file.name, width = 1536, height = 240)
        p <- VlnPlot(seu, features = tmp, ncol = 6, pt.size = 0.1)
        plot(p)
        dev.off()
    }
}


### Quality Check

In [32]:
###Gene filtering

##Input layer
dir.name=("Seurat_plot/")
gene.list.name <- "BD_genes_plus10.txt"
sample.name <- "ICIrevise"

########################## Processing layer ###############################
dir.create(dir.name)

##load gene list of BD target panel
BD_genes <- read.table(gene.list.name, header = TRUE)
BD_genes <- as.vector(BD_genes$Genesymbol)

##Chose BD target gene list for downstream analysis
res1 = seu@assays$RNA@counts
res1 =res1[res1@Dimnames[[1]] %in% BD_genes,]
ngenes <- length(res1@Dimnames[[1]])
seu@assays$RNA@counts <- res1
seu@assays$RNA@data <- res1

#Scatter plot for gene/read count
file.name=paste(dir.name, sample.name, "Reads.Genes.png", sep='')
png(file.name, width = 512, height = 400)
FeatureScatter(object = seu, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size = 0.3) +
  geom_hline(yintercept=10) +
  scale_x_log10()
dev.off()

#RidgePlot for gene/read count, 
file.name=paste(dir.name, sample.name, "nCount_RNA_log.png", sep='')
png(file.name, width = 512, height = 400)
RidgePlot(object = seu, features = "nCount_RNA", group.by="orig.ident", ncol = 1) +
  scale_x_log10()
dev.off()

file.name=paste(dir.name, sample.name, "nGenes.png", sep='')
png(file.name, width = 512, height = 400)
RidgePlot(object = seu, features = "nFeature_RNA", group.by="orig.ident", ncol = 1)
dev.off()

Scale for 'x' is already present. Adding another scale for 'x', which will
replace the existing scale.

Picking joint bandwidth of 0.0565



Picking joint bandwidth of 7.11



###  Initial analysis

In [33]:
###Separating seurat object by samples

#Remove doublet, Sampletag-undetected, and Pmel-Sampletag detected cells
seu.cd4 <- subset(x=seu, idents = c("mouseSampleTag5", "mouseSampleTag6"))
seu.cont <- subset(x=seu, idents = c("mouseSampleTag1"))
seu.pdl1 <- subset(x=seu, idents = c("mouseSampleTag2"))

conditions <- c("Control", "aPDL1", "aCD4")
seu.list <- list(seu.cont, seu.pdl1, seu.cd4)

In [34]:
###Run Seurat pipeline (dimentional reduction ~ determining resolution) with iterative process

###Input layer 
sample.basename <- "ICIrev"
dir.basename <- "Seurat_plot"
red.use <- "umap"
resol <- 1.0

#For lineage check
tmp_1 = c("Trbc2", "Cd3e", "Cd8a", "Cd4", "Cd14", "Lyz2")
tmps <- list(tmp_1)
tmp_names <- c("lineage")

########################## Processing layer #############################
dir.create(dir.basename)

for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")
    dir.name <- str_c(dir.basename, condition, sep = "/")
    
    ### Start processing ###
    dir.create(dir.name)
    seu <- Preprocess(seu, dir.name, sample.name)
    
    #Find clusters
    seu <- FindClusters(object = seu, resolution =resol)
    
    #DimPlot with sample origin
    DimOrigin(seu, dir.name, sample.name, red.use, resol)
    
    #Marker gene extraction and create marker gene heatmap
    MarkerHeatmap(seu, dir.name, sample.name, resol)
    
    #Check contamination
    ScatterViolin(seu, dir.name, sample.name, red.use, tmps, tmp_names)
        
    #Output Seurat object in 2nd analysis
    file.name=paste(sample.name, ".rda", sep='')
    save(seu, file=file.name)
}

“'Seurat_plot' already exists”
Regressing out nCount_RNA

Centering and scaling data matrix

“The following 4 features requested have zero variance (running reduction without them): Arg1, Ccl19, Il22, Tlr9”
PC_ 1 
Positive:  Bcl2a1a, Lgals1, Tnfrsf9, Lag3, Pdcd1, Tnfrsf4, Klrc1, S100a10, Nkg7, Ctla4 
	   Cd8a, Cd3g, Thy1, Cxcr6, Tnfrsf18, Cd52, Ybx3, Icos, Cd8b1, Cd6 
	   Cst7, Nfkb2, Klrk1, Gapdh, Il2ra, Ctsd, Cd5, Irf8, Tigit, Zap70 
Negative:  Cd300a, Trem2, Clec4d, Sell, Adgre1, Fn1, Thbs1, Lyz2, Cd14, Nlrp3 
	   Il1rn, Ccl2, C1qa, Cd36, Tlr4, Fcgr3, Mgst1, Tnfsf13, Ccr1, Il7r 
	   F13a1, C1qb, Cd63, Itgam, Cxcl2, Ccl6, Thbd, Mmp12, Btg1, Fcna 
PC_ 2 
Positive:  Ccl5, Gzmb, Gzmk, Itgax, Klrc1, Nkg7, Cxcr6, Klrg1, Fasl, Gzma 
	   Ccr2, Lgals3, Lgals1, Cxcr3, Cd6, Cd5, Gimap7, S100a10, Entpd1, Igkc 
	   Thy1, Rgs1, Il12rb2, Ccr5, Ctsd, Lilrb4a, Casp1, Tnfrsf25, Ifng, Il18rap 
Negative:  Ccr7, Sell, Il7r, Il4ra, Tnfrsf4, Xcl1, Tcf7, Junb, Cd7, Irf8 
	   Nrp1, Fas, Tnfsf8, Dusp1, Il1r2

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 8079
Number of edges: 254335

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7147
Number of communities: 12
Elapsed time: 1 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
Calculating cluster 0

Calculating cluster 1

Calculating cluster 2

Calculating cluster 3

Calculating cluster 4

Calculating cluster 5

Calculating cluster 6

Calculating cluster 7

Calculating cluster 8

Calculating cluster 9

Calculating cluster 10

Calculating cluster 11

Regressing out nCount_RNA

Centering and scaling data matrix

“The following 4 features requested have zero variance (runn

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 13840
Number of edges: 406366

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7625
Number of communities: 12
Elapsed time: 3 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 13625
Number of edges: 383408

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7580
Number of communities: 14
Elapsed time: 3 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

### Initial Clustering and check contamination

### 2nd analysis with removing contaminant non-T cells

In [35]:
#Remove contaminant clusters
load("ICIrev_Control.rda")
seu.cont <- subset(seu, idents = c(0, 1, 2, 3, 4, 5, 6, 7, 9))#8: NK, 10:Macrophage, 11:Th17
load("ICIrev_aPDL1.rda")
seu.pdl1 <- subset(seu, idents = c(0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11))#5: NK
load("ICIrev_aCD4.rda")
seu.cd4 <- subset(seu, idents = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))#10: NK, 11:Treg, 12:Macrophage, 13:Macrophage

conditions <- c("Control", "aPDL1", "aCD4")
seu.list <- list(seu.cont, seu.pdl1, seu.cd4)

In [36]:
###Run Seurat pipeline (dimentional reduction ~ determining resolution) with iterative process

###Input layer 
sample.basename <- "ICIrev"
dir.basename <- "Seurat_plot_2nd"
red.use <- "umap"
resol <- 0.6

#For lineage check
tmp_1 = c("Trbc2", "Cd3e", "Cd8a", "Cd4", "Cd14", "Lyz2")
tmps <- list(tmp_1)
tmp_names <- c("lineage")

########################## Processing layer #############################
dir.create(dir.basename)

for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")
    dir.name <- str_c(dir.basename, condition, sep = "/")
    
    ### Start processing ###
    dir.create(dir.name)
    seu <- Preprocess(seu, dir.name, sample.name)
    
    #Find clusters
    seu <- FindClusters(object = seu, resolution =resol)
    
    #DimPlot with sample origin
    DimOrigin(seu, dir.name, sample.name, red.use, resol)
    
    #Marker gene extraction and create marker gene heatmap
    MarkerHeatmap(seu, dir.name, sample.name, resol)
    
    #Check contamination
    ScatterViolin(seu, dir.name, sample.name, red.use, tmps, tmp_names)
        
    #Output Seurat object in 2nd analysis
    file.name=paste(sample.name, "_2nd.rda", sep='')
    save(seu, file=file.name)
}

Regressing out nCount_RNA

Centering and scaling data matrix

“The following 8 features requested have zero variance (running reduction without them): Arg1, Ccl19, Chil3, Clec4e, Il22, Tlr9, F5, Tlr8”
PC_ 1 
Positive:  Ccl5, Gzmb, Gzmk, Itgax, Klrc1, Nkg7, Cxcr6, Lgals1, Klrg1, Fasl 
	   Lgals3, Cxcr3, Ccr2, Gzma, Cd6, S100a10, Cd5, Gimap7, Rgs1, Igkc 
	   Entpd1, Thy1, Ctsd, Il12rb2, Il18rap, Cd52, Lilrb4a, Icos, Ccr5, Casp1 
Negative:  Ccr7, Sell, Il7r, Il4ra, Tnfrsf4, Tcf7, Il6ra, Fas, Junb, Cd7 
	   Xcl1, Pik3ip1, Nrp1, Irf8, Tnfsf8, Dusp1, Il1r2, Bach2, Btla, Stat3 
	   Tfrc, Cxcr5, Cd74, H2-K1, Nt5e, Trbc1, Myc, Kit, Jun, H2-DMa 
PC_ 2 
Positive:  Sell, Btg1, Il7r, Lef1, Arl4c, Ifngr1, Cd7, Pik3ip1, Tcf7, Txk 
	   Il6ra, Itk, Dusp2, Lat, Ifit3b, Bcl2, Ifit3, Cxcr4, Trbc1, Cxcr3 
	   Cd69, Ikbkb, Cxcr5, Gzma, Ccl5, Il4ra, Gzmm, Dpp4, H2-Ob, Itgae 
Negative:  Tnfrsf4, Lag3, Tnfrsf9, Pdcd1, Irf8, Il1r2, Bcl2a1a, Nrp1, Xcl1, Lgals1 
	   Il2ra, Cd74, Kit, S100a10, Tnfrsf18, Ctla4, Ccr

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7526
Number of communities: 7
Elapsed time: 1 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
Calculating cluster 0

Calculating cluster 1

Calculating cluster 2

Calculating cluster 3

Calculating cluster 4

Calculating cluster 5

Calculating cluster 6

Regressing out nCount_RNA

Centering and scaling data matrix

“The following 4 features requested have zero variance (running reduction without them): Arg1, Ccl19, Il22, Tlr9”
PC_ 1 
Positive:  Lag3, Pdcd1, Bcl2a1a, Tnfrsf9, Klrc1, Lgals1,

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12803
Number of edges: 369847

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8030
Number of communities: 9
Elapsed time: 3 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12823
Number of edges: 321379

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7728
Number of communities: 8
Elapsed time: 2 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

### 3rd analysis with removing contaminant non-T cells (aCD4 only)

In [37]:
#Remove contaminant clusters
load("ICIrev_aCD4_2nd.rda")
seu.cd4 <- subset(seu, idents = c(0, 1, 2, 3, 4, 5, 6))#7: Fibro

conditions <- c("aCD4")
seu.list <- list(seu.cd4)

In [38]:
###Run Seurat pipeline (dimentional reduction ~ determining resolution) with iterative process

###Input layer 
sample.basename <- "ICIrev"
dir.basename <- "Seurat_plot_3rd"
red.use <- "umap"
resol <- 0.6

#For lineage check
tmp_1 = c("Trbc2", "Cd3e", "Cd8a", "Cd4", "Cd14", "Lyz2")
tmps <- list(tmp_1)
tmp_names <- c("lineage")

########################## Processing layer #############################
dir.create(dir.basename)

for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")
    dir.name <- str_c(dir.basename, condition, sep = "/")
    
    ### Start processing ###
    dir.create(dir.name)
    seu <- Preprocess(seu, dir.name, sample.name)
    
    #Find clusters
    seu <- FindClusters(object = seu, resolution =resol)
    
    #DimPlot with sample origin
    DimOrigin(seu, dir.name, sample.name, red.use, resol)
    
    #Marker gene extraction and create marker gene heatmap
    MarkerHeatmap(seu, dir.name, sample.name, resol)
    
    #Check contamination
    ScatterViolin(seu, dir.name, sample.name, red.use, tmps, tmp_names)
        
    #Output Seurat object in 2nd analysis
    file.name=paste(sample.name, "_3rd.rda", sep='')
    save(seu, file=file.name)
}

Regressing out nCount_RNA

Centering and scaling data matrix

“The following 11 features requested have zero variance (running reduction without them): Arg1, Fcna, Cd163, Cxcr1, Cxcr2, F5, Fcer2a, Fcrla, Iglc3, Mzb1, Tlr8”
PC_ 1 
Positive:  Ccr7, Tcf7, Sell, Cxcr5, Il7r, Tnfsf8, Myc, Cd9, Il6ra, Xcl1 
	   Btla, Tnfrsf25, Fas, Nt5e, Bcl6, Il4ra, Cd69, Cd200, Cxcl10, Stat5a 
	   Tnfrsf4, Stat4, Junb, Trat1, H2-K1, Btg1, Tlr1, Bach2, Ptprc, Ighm 
Negative:  Gzmb, Ccl5, Lgals3, Nkg7, Lgals1, Ccr2, Ctsd, Pdcd1, Gzmk, Cxcr6 
	   S100a10, Klrc1, Havcr2, Ctla4, Lag3, Ccr5, Entpd1, Itga4, Gimap7, Fasl 
	   Cd52, Rgs1, Ctsw, Ifng, Cd3g, Tigit, Cd38, Il2rb, Klrk1, Cd48 
PC_ 2 
Positive:  Ifit3b, Ifit3, Sell, Ifngr1, Il7r, Ccl5, Irf7, Tcf7, Gzma, Ifit1 
	   Rsad2, Selplg, Cmpk2, Btg1, Mx1, Pik3ip1, Gzmk, Itgax, Cd7, Ddx58 
	   Isg15, Cxcr5, Cxcr3, Il6ra, Arl4c, Itga4, Cxcl10, Usp18, Cd69, Ccr2 
Negative:  Gapdh, Tnfrsf4, Tnfrsf9, Xcl1, Lag3, Bcl2a1a, Irf8, Tfrc, Ybx3, Cd160 
	   Il2ra, Cd8a, Tnfrs

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12447
Number of edges: 310287

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7706
Number of communities: 8
Elapsed time: 2 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

### Determine clustering resolution

In [7]:
###Function: Calculate Silhouette score for each clustering resolution
##Input: seurat object, resolution vectors
##Output: Silhouette score plot

Silhouette <- function(seu, dir.name, sample.name, resolutions){
    ###Calculate Shilouhette score
    distance_matrix = parDist(seu@reductions$umap@cell.embeddings, method = "euclidean", threads=4)
    silhouette_score = NULL
    silhouette_score_mean = list()

    for (j in c(1:length(resolutions))){
      #Perform clustering
      seu <- FindClusters(object = seu, resolution =resolutions[j])
      silhouette = silhouette(as.numeric(seu@active.ident), dist = distance_matrix)
      silhouette = silhouette[,3]
      silhouette_score = c(silhouette_score, mean(silhouette))
      names(silhouette_score)[j]=resolutions[j]
      res <- vector()
      for (k in c(0:length(unique(seu@active.ident))-1)){
        tmp = mean(silhouette[seu@active.ident %in% k]) 
        res = c(res, tmp)
      }
      silhouette_score_mean[[j]]=res
      names(silhouette)=rownames(seu@meta.data)
      silhouette.name = sprintf("silhouette_score.res.%s", resolutions[j])
      seu =  AddMetaData(object = seu, metadata = silhouette, col.name = silhouette.name)
    }

    max_silhouette = names(silhouette_score[max(silhouette_score)])

    x = data.frame(resolution = resolutions,
                   mean_silhouette_score = silhouette_score)

    p_silhouette = ggplot(x, aes(x = resolution, y = mean_silhouette_score)) +
      geom_bar(stat = "identity") +
      ggtitle("Mean silhouette scores of clustering") +
      theme_linedraw() + 
      theme(plot.title=element_text(hjust = 0.5), text=element_text(size=12)) + 
      theme(axis.text.x=element_text(size=12), axis.text.y=element_text(size=12)) 
    file.name <- str_c(dir.name, sample.name, sep='/') %>% str_c("_silhouette.png", sep='')
    ggsave(file = file.name, plot = p_silhouette, device="png", units="in", dpi = 300,
           width = 4, height = 3, limitsize=FALSE)   
}

In [8]:
#Remove contaminant clusters
load("ICIrev_Control_2nd.rda")
seu.cont <- seu
load("ICIrev_aPDL1_2nd.rda")
seu.pdl1 <- seu
load("ICIrev_aCD4_3rd.rda")
seu.cd4 <- seu

conditions <- c("Control", "aPDL1", "aCD4")
seu.list <- list(seu.cont, seu.pdl1, seu.cd4)

In [44]:
###Run Seurat pipeline (dimentional reduction ~ determining resolution) with iterative process

###Input layer 
sample.basename <- "ICIrev"
dir.name <- "Seurat_plot_resolution"
red.use <- "umap"
resolutions <- c(0.1, 0.15, 0.2, 0.4, 0.6, 0.8)

########################## Processing layer #############################
dir.create(dir.name)

for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")

    #Calculate Silhouette scores
    Silhouette(seu, dir.name, sample.name, resolutions)
}

“'Seurat_plot_resolution' already exists”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9218
Number of communities: 3
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8982
Number of communities: 4
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8780
Number of communities: 5
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8021
Number of communities: 5
Elapsed time: 1 seconds
Modularity Optimizer

In [9]:
###Visualize clusters

###Input layer 
sample.basename <- "ICIrev"
dir.basename <- "Seurat_plot_Clustering"
red.use <- "umap"
conditions <- c("Control", "aPDL1", "aCD4")
resolutions <- c(0.5, 0.2, 0.4)

########################## Processing layer #############################
dir.create(dir.basename)

for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    resol <- resolutions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")
    dir.name <- str_c(dir.basename, condition, sep = "/")
    
    #Find clusters
    seu <- FindClusters(object = seu, resolution =resol)
    
    #DimPlot with sample origin
    DimOrigin(seu, dir.name, sample.name, red.use, resol)
    
    #Marker gene extraction and create marker gene heatmap
    MarkerHeatmap(seu, dir.name, sample.name, resol)
    
    #Output Seurat object in 2nd analysis
    file.name=paste(sample.name, "_clust.rda", sep='')
    save(seu, file=file.name)
}

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 7635
Number of edges: 225647

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7715
Number of communities: 6
Elapsed time: 2 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12803
Number of edges: 369847

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9009
Number of communities: 4
Elapsed time: 2 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 12447
Number of edges: 310287

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8149
Number of communities: 7
Elapsed time: 2 seconds


“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostScript font database”
“font family 'Arial' not found in PostSc

### Integration of scTCRseq results into SCT Seurat object

In [10]:
###Define function
#Create histogram was showing the total number of TCR reads detected in each cell (top left) 
#and the percentage of TCR reads that were the most common among them (bottom left).
#Scatter plot representing these two metrices of cells were also generated
Histogram <- function(d, file.name, sample.name, dir.output){

  d_count_total <- as.numeric(d$count.total)
  
  #Count histogram
  ppi <- 300
  image.file <- str_c(dir.output, file.name, sep = "/") %>% str_c(sample.name, "histogram.count.tiff", sep = '.') 
  tiff(image.file, width=1.2*ppi, height=0.8*ppi, res=ppi)
  p <- ggplot(NULL, aes(x=d_count_total)) +
    geom_histogram(binwidth=0.1) +
    theme_bw(base_size = 6) +
    labs(x = "Read counts of TCR") +
    theme(
      axis.title.y=element_blank(),
      axis.text.x = element_text(family="Arial"),
      axis.text.y = element_text(family="Arial"),
      axis.title=element_text(size=4)) +
    scale_x_continuous(trans=scales::log2_trans(),
                   breaks=scales::trans_breaks("log2",function(x) 2^x),
                   labels=scales::trans_format("log2",scales::math_format(2^.x)))
  print(p)
  dev.off()

  #Frequency histogram  
  d_freq <- as.numeric(d$freq)
  med_f <- 0.3
  ppi <- 300
  image.file <- str_c(dir.output, file.name, sep = "/") %>% str_c(sample.name, "histogram.freq.tiff", sep = '.')
  tiff(image.file, width=1.2*ppi, height=0.8*ppi, res=ppi)
  p <- ggplot(NULL, aes(x=d_freq)) +
    geom_histogram(binwidth=0.05) +
    theme_bw(base_size = 6) +
    labs(x = "Proportion of the largest TCR read") +
    theme(
      axis.title.y=element_blank(),
      axis.text.x = element_text(family="Arial"),
      axis.text.y = element_text(family="Arial"),
      axis.title=element_text(size=4)) 
  print(p)
  dev.off()

  df <- data.frame(d_count_total, d_freq)
  
  #Scatter plot    
  total <- nrow(subset(df, df$d_freq >= 0))
  lb <- paste(round(100*nrow( subset(df, df$d_freq < 0.6 & df$d_count_total < 2^5 )) / total, digits = 1), "%", sep="" ) 
  rb <- paste(round(100*nrow( subset(df, df$d_freq >= 0.6 & df$d_count_total < 2^5 )) / total, digits = 1), "%", sep="" ) 
  lt <- paste(round(100*nrow( subset(df, df$d_freq < 0.6 & df$d_count_total >= 2^5 )) / total, digits = 1), "%", sep="" ) 
  rt <- paste(round(100*nrow( subset(df, df$d_freq >= 0.6 & df$d_count_total >= 2^5 )) / total, digits = 1), "%", sep="" ) 
  
  ppi <- 300
  image.file <- str_c(dir.output, file.name, sep = "/") %>% str_c(sample.name, "Scatter.tiff", sep = '.')                     
  tiff(image.file, width=1.2*ppi, height=1.2*ppi, res=ppi)
  p <- ggplot(d, aes(x=d_freq, y=d_count_total)) +  
    stat_bin2d(bins=60) +
    scale_fill_gradient(low="lightblue", high="red") +
    theme_bw(base_size = 6) +
    geom_vline(aes(xintercept = 0.6), size=0.25, colour="black") +
    geom_hline(aes(yintercept = 2^5), size=0.25, colour="black") +
    labs(x = "Proportion of the largest TCR read", y = "Read counts of TCR") +
    theme(
      axis.text.x = element_text(family="Arial"),
      axis.text.y = element_text(family="Arial"),
      axis.title=element_text(size=4)) +
    guides(fill=FALSE) +
    scale_y_continuous(trans=scales::log2_trans(),
                   breaks=scales::trans_breaks("log2",function(x) 2^x),
                   labels=scales::trans_format("log2",scales::math_format(2^.x)))
    p <- p + #xlim(0,1) +
    annotate("text", x=-Inf, y=0, hjust=-0.1, vjust=-0.4, label=lb,
             family="Arial",colour="black",size=1.5) +
    annotate("text", x=Inf, y=0, hjust=1.1, vjust=-0.4, label=rb,
             family="Arial",colour="black",size=1.5) +
    annotate("text", x=-Inf, y=Inf, hjust=-0.1, vjust=1.3, label=lt,
             family="Arial",colour="black",size=1.5) +
    annotate("text", x=Inf, y=Inf, hjust=1.1, vjust=1.3, label=rt,
             family="Arial",colour="black",size=1.5) 
  print(p)
  dev.off()  
}

In [11]:
###Define function
#Create TCRa/TCRb combine table for cell barcodes
CombineTable <- function(dir.input, sample.name, dir.output, count_th, freq_th, clonotype, cores){
    combined.tables <- data.frame()
    #Unzip files
    files <- list.files(dir.input, ".tar.bz2")
    for(file in files){
        name.input <- str_c(dir.input, file, sep = "/")
        bunzip2(name.input, remove=FALSE)
        name.input <- str_remove(name.input, pattern = ".bz2")
        untar(name.input)
        file.remove(name.input)
    }
    
    ###Define function
    tableread_fast = function(i, header=TRUE, quote="", sep=","){
      tmp = fread(i, header=header, sep=sep, quote=quote, nThread=32)
      tmp = as.data.frame(tmp)
      return(tmp)
    }
    
    #Convert MiXCR output into VDJtools format
    Convert <- function(file.name, name.TRA){
        name.input <- str_c(name.TRA, file.name, sep = "/")
        data <- tableread_fast(name.input, header = TRUE, sep = '\t')

        #Extract TCR information from mixcr output
        count.total<-data$cloneCount / data$cloneFraction
        freq<-data$cloneFraction
        cdr3nt<-data$nSeqImputedCDR3
        cdr3aa<-data$aaSeqImputedCDR3
        v<-str_sub(data$bestVHit, end=-4)
        d<-str_sub(data$bestDHit, end=-4)
        j<-str_sub(data$bestJHit, end=-4)
        data3 <- rbind(count.total,freq,cdr3nt,cdr3aa,v,d,j)
        data3 <- as.data.frame(t(data3))
        names(data3) <- c("count.total","freq","cdr3nt","cdr3aa","v","d","j")

        #Extract largest clone in cell barcode and append cell barcode information
        name_out <- str_split(file.name, "_")
        CB <- name_out[[1]][[4]]
        CB_out <- str_split(CB, "\\.")
        CB <- CB_out[[1]][[1]]
        d_out <- data3[1,]
        d_out$CB <- CB

        return(d_out)
    }

    #Convert MiXCR output into VDJtools format
    name.TRA <- str_c(sample.name, "TRAC1_mixcr", sep = "_")
    files  <- list.files(name.TRA, pattern=".txt")
    cl <- makeCluster(cores)
    registerDoParallel(cl)
    TRA.table <- invisible(foreach(file.name = files,
            .combine = rbind, .packages=c("ggplot2", "extrafont", "stringr", "dplyr", "data.table")) %dopar% {Convert(file.name, name.TRA)})
    stopCluster(cl)
    unlink(name.TRA, recursive=TRUE)

    name.TRB <- str_c(sample.name, "TRBC1_mixcr", sep = "_")
    files  <- list.files(name.TRB, pattern=".txt")
    cl <- makeCluster(cores)
    registerDoParallel(cl)
    TRB.table <- invisible(foreach(file.name = files,
            .combine = rbind, .packages=c("ggplot2", "extrafont", "stringr", "dplyr", "data.table")) %dopar% {Convert(file.name, name.TRB)})
    stopCluster(cl)
    unlink(name.TRB, recursive=TRUE)

    #Output histogram and scatter plot for summarizing scTCR status
    Histogram(TRA.table, "TRA", sample.name, dir.output)
    Histogram(TRB.table, "TRB", sample.name, dir.output)

    #thresholding
    TRA.table$count.total <- as.numeric(TRA.table$count.total)
    TRA.table$freq <- as.numeric(TRA.table$freq)
    TRB.table$count.total <- as.numeric(TRB.table$count.total)
    TRB.table$freq <- as.numeric(TRB.table$freq)
    TCRa_th <- subset(TRA.table, count.total >= 2^(count_th) & freq >= freq_th)
    TCRb_th <- subset(TRB.table, count.total >= 2^(count_th) & freq >= freq_th)

    #Paring TCRa and TCRb by cell barcode
    names(TCRa_th) <- c("count.total.A","freq.A","cdr3nt.A","cdr3aa.A","v.A","d.A","j.A", "CB")
    names(TCRb_th) <- c("count.total.B","freq.B","cdr3nt.B","cdr3aa.B","v.B","d.B","j.B", "CB")
    combined <- merge(TCRa_th, TCRb_th, all=T, by ="CB")
    combined$CB <- str_c(sample.name, combined$CB, sep = "_")
    #Exclude cells in which neither TCRa nor TCRb sequence were detected
    combined <- subset(combined, combined$cdr3nt.A != "UD" | combined$cdr3nt.B != "UD")

    #Generate clone id for further analysis
    #Definition of clones can be changed between "ABnt", "ABaa", "Bnt", and "Baa"
    if(clonotype=="nt"){
        combined$clone.id.TCRa <- str_c(combined$cdr3nt.A, combined$v.A, combined$j.A, sep="_")
        combined$clone.id.TCRb <- str_c(combined$cdr3nt.B, combined$v.B, combined$j.B, sep="_")
    }
    if(clonotype=="aa"){
        combined$clone.id.TCRa <- str_c(combined$cdr3aa.A, combined$v.A, combined$j.A, sep="_")
        combined$clone.id.TCRb <- str_c(combined$cdr3aa.B, combined$v.B, combined$j.B, sep="_")
    }
    combined$clone.id.TCRab <- str_c(combined$clone.id.TCRa, combined$clone.id.TCRb, sep = "_")
    name.output <- str_c(dir.output, sample.name, sep = "/") %>% str_c(clonotype, "table", "count_th", count_th, "freq_th", freq_th, clonotype, "csv", sep = ".")
    write.csv(combined, name.output, row.names = FALSE)
    
    combined.tables <- rbind(combined.tables, combined)
    
    return(combined.tables)
}

In [12]:
#Extract TCR sequence for each cel barcode (aCD4)
#iteration process for dataset (CD4/CD8 Seurat object) and clonotype (Ant/Bnt)
set.seed(seed = 42)
seurat.names <- c("ICIrev_aCD4_clust")
sample.name <- "aCD4"
dir.input <- "scTCR_rawdata_aCD4"
cores <- 12
dir.output <- "scTCR_processing"
#Threshold for valid cell barcode with single TCR sequence
count_th <- 5 #read count threshold for all TCR sequences per cell barcode
freq_th <- 0.6 #proportion threshold for largest TCR
clonotype <- "nt" #Definition of clones. nt: nucleotide sequence / aa: amino acid sequnece 
hashtag <- TRUE #Whether hashtags are used or not.
batch <- TRUE #Whether multiple chips are used or not.

################################ Processing layer #################################################
dir.create(dir.output)

##Create TCRa/TCRb combine table for cell barcodes
combined.tables <- CombineTable(dir.input, sample.name, dir.output, count_th, freq_th, clonotype, cores)

##Integrate to the Seurat object; iteration for dataset (CD4/CD8 Seurat object))
#Change batch name in combine.table
combined.tables$CB <- str_replace(combined.tables$CB, "Ex124TCRBD", "Aoki")

for(seurat.name in seurat.names){
    #load Seurat object
    name.input <- str_c(seurat.name, "rda", sep = ".")
    load(name.input)
    
    #Extract meta.data and cell BC information
    meta.data <- seu@meta.data
    meta.data$names <- row.names(meta.data)
    if(hashtag == TRUE){
        if(batch == FALSE){
            CB.info <- str_split(meta.data$names, pattern = "_", simplify = TRUE)
            meta.data$CB <- CB.info[,ncol(CB.info)]
        } else {
            CB.info <- str_split(meta.data$names, pattern = "_", simplify = TRUE)
            meta.data$CB <- str_c(CB.info[,2], CB.info[,3], sep = "_")
        }
    } else {
            meta.data$CB <- meta.data$names
    }

    #Merge to Seurat object of SCT data
    meta.data <- merge(combined.tables, meta.data, all.y = T, by ="CB")
    clone.ids <- c("clone.id.TCRa", "clone.id.TCRb", "clone.id.TCRab")
    for(clone.id in clone.ids){
        clone.id.list <- dplyr::select(meta.data, clone.id)
        row.names(clone.id.list)=as.character(meta.data$names)
        seu <- AddMetaData(object = seu, metadata = clone.id.list, col.name = clone.id)
    }
    
    #Output Seurat object
    name.output <- str_c(seurat.name, "scTCRmerged", clonotype, "rda", sep = ".")
    save(seu, file=name.output)
}

“Removed 170 rows containing non-finite values (stat_bin).”
“Removed 170 rows containing non-finite values (stat_bin).”
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”
“Transformation introduced infinite values in continuous y-axis”
“Transformation introduced infinite values in continuous y-axis”
“Removed 170 rows containing non-finite values (stat_bin2d).”
“Removed 156 rows containing non-finite values (stat_bin).”
“Removed 156 rows containing non-finite values (stat_bin).”
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”
“Transformation introduced infinite values in continuous y-axis”
“Transformation introduced infinite values in continuous y-axis”
“Removed 156 rows containing non-finite values (stat_bin2d).”
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(clone.id)` instead of `clone.id` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference

In [13]:
#Extract TCR sequence for each cel barcode (aCD4)
#iteration process for dataset (CD4/CD8 Seurat object) and clonotype (Ant/Bnt)
set.seed(seed = 42)
seurat.names <- c("ICIrev_aPDL1_clust", "ICIrev_Control_clust")
sample.name <- "Cont_aPDL1"
dir.input <- "scTCR_rawdata_ContPDL1"
cores <- 12
dir.output <- "scTCR_processing"
#Threshold for valid cell barcode with single TCR sequence
count_th <- 5 #read count threshold for all TCR sequences per cell barcode
freq_th <- 0.6 #proportion threshold for largest TCR
clonotype <- "nt" #Definition of clones. nt: nucleotide sequence / aa: amino acid sequnece 
hashtag <- TRUE #Whether hashtags are used or not.
batch <- TRUE #Whether multiple chips are used or not.

################################ Processing layer #################################################
dir.create(dir.output)

##Create TCRa/TCRb combine table for cell barcodes
combined.tables <- CombineTable(dir.input, sample.name, dir.output, count_th, freq_th, clonotype, cores)

##Integrate to the Seurat object; iteration for dataset (CD4/CD8 Seurat object))
#Change batch name in combine.table
combined.tables$CB <- str_replace(combined.tables$CB, "TCR", "Target")

for(seurat.name in seurat.names){
    #load Seurat object
    name.input <- str_c(seurat.name, "rda", sep = ".")
    load(name.input)
    
    #Extract meta.data and cell BC information
    meta.data <- seu@meta.data
    meta.data$names <- row.names(meta.data)
    if(hashtag == TRUE){
        if(batch == FALSE){
            CB.info <- str_split(meta.data$names, pattern = "_", simplify = TRUE)
            meta.data$CB <- CB.info[,ncol(CB.info)]
        } else {
            CB.info <- str_split(meta.data$names, pattern = "_", simplify = TRUE)
            meta.data$CB <- str_c(CB.info[,2], CB.info[,3], sep = "_")
        }
    } else {
            meta.data$CB <- meta.data$names
    }

    #Merge to Seurat object of SCT data
    meta.data <- merge(combined.tables, meta.data, all.y = T, by ="CB")
    clone.ids <- c("clone.id.TCRa", "clone.id.TCRb", "clone.id.TCRab")
    for(clone.id in clone.ids){
        clone.id.list <- dplyr::select(meta.data, clone.id)
        row.names(clone.id.list)=as.character(meta.data$names)
        seu <- AddMetaData(object = seu, metadata = clone.id.list, col.name = clone.id)
    }
    
    #Output Seurat object
    name.output <- str_c(seurat.name, "scTCRmerged", clonotype, "rda", sep = ".")
    save(seu, file=name.output)
}

“'scTCR_processing' already exists”
“Removed 2721 rows containing non-finite values (stat_bin).”
“Removed 2721 rows containing non-finite values (stat_bin).”
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”
“Transformation introduced infinite values in continuous y-axis”
“Transformation introduced infinite values in continuous y-axis”
“Removed 2721 rows containing non-finite values (stat_bin2d).”
“Removed 1549 rows containing non-finite values (stat_bin).”
“Removed 1549 rows containing non-finite values (stat_bin).”
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”
“Transformation introduced infinite values in continuous y-axis”
“Transformation introduced infinite values in continuous y-axis”
“Removed 1549 rows containing non-finite values (stat_bin2d).”


### SCT analysis after integrating scTCRseq results

In [14]:
###Function: Introduce Signature scores into seurat object
##Input: seurat object, directory for gene_list.txt for calculating signatures
##Output: Seurat object with signature scores

Signature <- function(seu, dir.input){
    #load data
    files  <- list.files(dir.input, pattern="Sig.")

    for(i in files){
      i <- str_c(dir.input, i, sep = "/")
      gene_list <- read.table(i, header = TRUE)
      gene_list <- list(as.vector(gene_list$Genes))
      sig_name <- str_remove(i, dir.input)
      sig_name <- str_remove(sig_name, "/Sig.")
      sig_name <- str_remove(sig_name, ".txt")
      seu <- AddModuleScore(object = seu, 
                            ctrl=10,
                            features = gene_list,
                            name=sig_name)
    } 
    return(seu)
}

In [15]:
###Define function

##Make clone table by assembling clones from scTCR results 
CloneSummary <- function(seu, dir.name, sample.name, clonotype){
    #seu: Seurat object for analyze
    #dir.name: directory name for output
    dir.create(dir.name)

    #load metadata
    meta.data <- as.data.frame(seu@meta.data)
    meta.data$CellBC <- row.names(meta.data)
    
    #Define clone by TCRa / TCRb/ TCRa&b
    if(clonotype == "TCRa"){
       meta.data$clone.id <- meta.data$clone.id.TCRa 
    }
    if(clonotype == "TCRb"){
       meta.data$clone.id <- meta.data$clone.id.TCRb 
    }
    if(clonotype == "TCRab"){
       meta.data$clone.id <- meta.data$clone.id.TCRab 
    } 

    #Summarize cluster distribution of each clone
    tmp_out = table(meta.data$seurat_clusters, meta.data$clone.id)
    tmp_out2 <- as.data.frame(tmp_out, row.names = NULL,
                  responseName = "Freq", stringsAsFactors = TRUE,
                  sep = "", base = list(LETTERS))
    names(tmp_out2) <- c("Clust", "names", "Freq")
    tmp_out2 <- dcast(tmp_out2, names ~ Clust)

    #Summarize the count and frequency of clones 
    tmp = meta.data %>% group_by(`clone.id`) %>%
      dplyr::summarise(count = n()) %>%
      dplyr::arrange(desc(count))
    tmp = as.data.frame(tmp)
    tmp <- tmp[!is.na(tmp$clone.id), ] #NAである細胞を除く
    tmp[,3]=tmp[,2]/sum(tmp[,2])
    colnames(tmp)=c("ntSeq_TRA_TRB_freq", "CloneCount", "CloneFreq")
    tmp <- tmp[order(tmp$ntSeq_TRA_TRB_freq),]
    temp_count <- as.vector(tmp$CloneCount)
    temp_freq <- as.vector(tmp$CloneFreq)
    #Combine to the cluster distribution
    tmp_out3 <- cbind(temp_count, temp_freq, tmp_out2)

    #Assign ranks to clones
    tmp_out3 <- tmp_out3[order(tmp_out3$temp_freq, decreasing=T),]
    rank <-  1:nrow(tmp_out3)
    tmp_out3 <- cbind(rank, tmp_out3)
    tmp_out3 <- data.frame(tmp_out3)
    tmp_out3$rank <- paste("Top",tmp_out3$rank, sep="")
    
    ###Summarize Gene score for each TCR clone
    meta.data.calc <- dplyr::select(meta.data, c("clone.id.TCRb", "Cytotoxicity1", "Tumor.Prog1", "Tumor.Term1"))
    out_score <- aggregate(x=meta.data.calc[c("Cytotoxicity1", "Tumor.Prog1", "Tumor.Term1")],
                     by=list(meta.data.calc$clone.id.TCRb), FUN=mean)
    #Integration
    out <- merge(tmp_out3, out_score, by.x = "names", by.y = "Group.1", all.x = T)
    
    #Output
    name.out <- str_c(dir.name, sample.name, sep = "/") %>% str_c(clonotype, "clone_within_cluster.txt", sep = ".")
    write.table(out, name.out, row.names=F, col.names=T, sep="\t", quote=F) 
}

In [16]:
#Call Seurat object after merging scTCR data
load("ICIrev_Control_clust.scTCRmerged.nt.rda")
seu.cont <- seu
load("ICIrev_aPDL1_clust.scTCRmerged.nt.rda")
seu.pdl1 <- seu
load("ICIrev_aCD4_clust.scTCRmerged.nt.rda")
seu.cd4 <- seu

conditions <- c("Control", "aPDL1", "aCD4")
seu.list <- list(seu.cont, seu.pdl1, seu.cd4)

In [17]:
# Main module

###Input layer 
sample.basename <- "ICIrev"
dir.name <- "scTCR_analysis"
conditions <- c("Control", "aPDL1", "aCD4")
dir.input <- "Signatures"
clonotype <- "nt" #Definition of clones. nt: nucleotide sequence / aa: amino acid sequnece 

########################## Processing layer #############################
for(i in 1:length(conditions)){
    #Call seurat object and condition name
    seu <- seu.list[[i]]
    condition <- conditions[i]
    sample.name <- str_c(sample.basename, condition, sep = "_")

    #Put signature gene set into "Signatures" directory
    seu <- Signature(seu, dir.input)
    #Make clone table by assembling clones from scTCR results 
    CloneSummary(seu, dir.name, sample.name, "TCRb") 

    #Output Seurat object
    name.output <- str_c(sample.name, "scTCRmerged", clonotype, "AddSig", "rda", sep = ".")
    save(seu, file=name.output)
}

“The following features are not present in the object: Ifna1, not searching for symbol synonyms”
“The following features are not present in the object: Cd244, not searching for symbol synonyms”
Using Freq as value column: use value.var to override.

“The following features are not present in the object: Ifna1, not searching for symbol synonyms”
“The following features are not present in the object: Cd244, not searching for symbol synonyms”
“'scTCR_analysis' already exists”
Using Freq as value column: use value.var to override.

“The following features are not present in the object: Ifna1, not searching for symbol synonyms”
“The following features are not present in the object: Cd244, not searching for symbol synonyms”
“'scTCR_analysis' already exists”
Using Freq as value column: use value.var to override.



In [18]:
### Detect dLN-Tumor OL clones

###Input layer
#Specify query files: frequency and cluster distribution table of scTCR clones
dir.query <- "scTCR_analysis"
conditions <- c("aCD4", "aPDL1", "Control")

#Specify the repertoire data of dLN
dir.bulk <- "BulkTCR_rawdata"

#Output directory
dir.output <- "scTCR_analysis"

cores <- 4

########################## Processing layer #############################
for(condition in conditions){
    name.bulk <- str_c(dir.bulk, "CD8_dLN_SCT_", sep = "/") %>% str_c(condition, ".txt", sep = "")
    name.query <- str_c(dir.query, "ICIrev_", sep = "/") %>%
    str_c(condition, sep = "") %>%
    str_c("TCRb.clone_within_cluster.txt", sep = ".") 
    
    #load query files
    clone.q <- read.table(name.query, header = TRUE)
    clone.q$TCRb <- clone.q$names
    
    #load dLN reprtoire files
    d <- tableread_fast(name.bulk, header = TRUE, sep="\t", quote = "\"")
    #Reconstruct the information of clones for searching overlap
    d$TCRb <- str_c(d$cdr3nt, d$v, d$j, sep = "_")
    d <- dplyr::select(d, c("TCRb", "freq"))
    names(d) <- c("TCRb", "freq_dLN")
    
    #Search clones overlapped with Tumor scTCR repertoire
    d_output <- merge(clone.q, d, by = "TCRb", all.x = T)
    
    ###Define Oligoclonal / Polyclonal fraction of dLN-tumor OL
    d_output$OL <- "nonOL"
    #Extract OL clones
    data_LT <- dplyr::filter(d_output, !is.na(freq_dLN))
    data_nonOL <- dplyr::filter(d_output, is.na(freq_dLN))
    #Determine Oligoclonal / Polyclonal fraction
    data_LT <- data_LT[order(data_LT$temp_freq, decreasing = T),]
    count.poly <- nrow(data_LT)-10
    data_LT$OL <- c(rep("Oligo", times = 10), rep("Poly", times = count.poly))
    d_output <- rbind(data_LT, data_nonOL)
    
    name.out <- str_c(dir.output, condition, sep = "/") %>% str_c("scTCR.dLNOL.csv", sep = ".")
    write.csv(d_output, name.out, row.names = FALSE)
}

ERROR: Error in fread(i, header = header, sep = sep, quote = quote, nThread = 32): File 'BulkTCR_rawdata/CD8_dLN_SCT_aCD4.txt' does not exist or is non-readable. getwd()=='/work'
