In [4]:
getwd()
setwd("/liulab/galib/dlbcl_manuscript/")
library(tidyverse)
library(Seurat)
library(harmony)
library(dplyr)
library(DoubletFinder)
library(rBCS)

In [2]:
preprocess_seurat<- function(obj){
  obj <- obj %>%
    NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
    FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
    ScaleData(vars.to.regress = "percent.mt") %>%
    RunPCA(npc = 50) %>%
    RunHarmony(group.by.vars = "pool_id") %>%
    RunUMAP(reduction = "harmony", dims = 1:50) %>%
    FindNeighbors(reduction = "harmony", dims = 1:50) %>%
    FindClusters(resolution = 1.5)
  return(obj)
}

In [None]:
##############
### merged ###
##############
merged_final<- readRDS("./data/objects/merged_qc_doublet_rm.obj")

print("Start to recluster merged object after doublets removal...")

merged<- preprocess_seurat(merged_final)
saveRDS(merged, "./data/objects/merged_res1.5_obj.rds"))

merged %>% dim()
# 32285 x 376307
print("merged space done!")

In [None]:
##############
## CD3+ CD8- #
##############

print("Start to split cd3_pos_cd8_neg obj... ")

cd3_pos_cd8_neg_index_loose<- (merged[["RNA"]]@counts["Cd3d", ] != 0 |
                                 (merged[["RNA"]]@counts["Cd3g", ] != 0) |
                                 (merged[["RNA"]]@counts["Cd3e", ] != 0)) &
  merged[["RNA"]]@counts["Cd8a", ] == 0 &
  merged[["RNA"]]@counts["Cd8b1", ] == 0

cd3_pos_cd8_neg_index_strict<- (merged[["RNA"]]@counts["Cd3d", ] != 0 |
                                  (merged[["RNA"]]@counts["Cd3g", ] != 0) |
                                  (merged[["RNA"]]@counts["Cd3e", ] != 0)) &
  merged[["RNA"]]@counts["Cd8a", ] == 0 &
  merged[["RNA"]]@counts["Cd8b1", ] == 0 &
  merged[["RNA"]]@counts["Cd79b", ] == 0 &
  merged[["RNA"]]@counts["Cd19", ] == 0 &
  merged[["RNA"]]@counts["Pax5", ] == 0

cd3_pos_cd8_neg_index_loose  %>% table()
cd3_pos_cd8_neg_index_strict %>% table()

cd3_pos_cd8_neg<- merged[, cd3_pos_cd8_neg_index_strict]


print("Check markers == 0")
cd3_pos_cd8_neg[["RNA"]]@counts["Pax5",]  %>% table()
cd3_pos_cd8_neg[["RNA"]]@counts["Cd79b",]  %>% table()
cd3_pos_cd8_neg[["RNA"]]@counts["Cd19",]  %>% table()
cd3_pos_cd8_neg[["RNA"]]@counts["Cd8b1",]  %>% table()
cd3_pos_cd8_neg[["RNA"]]@counts["Cd8a",]  %>% table()

print("Check markers != 0")
cd3_pos_cd8_neg[["RNA"]]@counts["Cd4",]  %>% table()


cd3_pos_cd8_neg<- preprocess_seurat(cd3_pos_cd8_neg)
saveRDS(cd3_pos_cd8_neg, "./data/objects/cd3_pos_cd8_neg_res1.5_obj.rds")
ExportSeurat(cd3_pos_cd8_neg, "./data/objects/cd3_pos_cd8_neg_res1.5_obj.bcs", overwrite=TRUE)


cd3_pos_cd8_neg %>% dim()
# 32285 genes x 66275 cells
print("cd3_pos_cd8_neg space done!")

In [None]:
##############
## CD3+ CD4- #
##############
print("Start to split out cd3_pos_cd4_neg obj... ")

cd3_pos_cd4_neg_index_loose<- (merged[["RNA"]]@counts["Cd3d", ] != 0 |
                                 (merged[["RNA"]]@counts["Cd3g", ] != 0) |
                                 (merged[["RNA"]]@counts["Cd3e", ] != 0)) &
  merged[["RNA"]]@counts["Cd4", ] == 0

cd3_pos_cd4_neg_index_strict<- (merged[["RNA"]]@counts["Cd3d", ] != 0 |
                                  (merged[["RNA"]]@counts["Cd3g", ] != 0) |
                                  (merged[["RNA"]]@counts["Cd3e", ] != 0)) &
  merged[["RNA"]]@counts["Cd4", ] == 0 &
  merged[["RNA"]]@counts["Cd79b", ] == 0 &
  merged[["RNA"]]@counts["Cd19", ] == 0 &
  merged[["RNA"]]@counts["Pax5", ] == 0

cd3_pos_cd4_neg <- merged[, cd3_pos_cd4_neg_index_strict]
cd3_pos_cd4_neg %>% dim()

print("Get shared cell barcodes of cd3_pos_cd4_neg and cd3_pos_cd8_neg space...")
common_bc<- intersect(colnames(cd3_pos_cd4_neg), colnames(cd3_pos_cd8_neg))

common_bc  %>% length()

print("Remove shared cell barcodes from cd3_pos_cd4_neg space...")

common_index<- !(colnames(cd3_pos_cd4_neg) %in% common_bc)
common_index  %>% table()
cd3_pos_cd4_neg<- cd3_pos_cd4_neg[, common_index]

print("Dim after removing shared cells...")

cd3_pos_cd4_neg %>% dim()

print("Check markers == 0")
cd3_pos_cd4_neg[["RNA"]]@counts["Pax5",]  %>% table()
cd3_pos_cd4_neg[["RNA"]]@counts["Cd79b",]  %>% table()
cd3_pos_cd4_neg[["RNA"]]@counts["Cd19",]  %>% table()
cd3_pos_cd4_neg[["RNA"]]@counts["Cd4",]  %>% table()

print("Check markers != 0")
cd3_pos_cd4_neg[["RNA"]]@counts["Cd3e",]  %>% table()
cd3_pos_cd4_neg[["RNA"]]@counts["Cd8b1",]  %>% table()
cd3_pos_cd4_neg[["RNA"]]@counts["Cd8a",]  %>% table()


cd3_pos_cd4_neg<- preprocess_seurat(cd3_pos_cd4_neg)
saveRDS(cd3_pos_cd4_neg, "./data/objects/cd3_pos_cd4_neg_res1.5_obj.rds")

ExportSeurat(cd3_pos_cd4_neg, "./data/objects/cd3_pos_cd4_neg_res1.5_obj.bcs", overwrite=TRUE)
cd3_pos_cd4_neg %>% dim()
# 32285 genes x 38842 cells
print("cd3_pos_cd4_neg space done! ")

In [None]:
############
## B cell ##
############

print("Start to split B-cell space... ")

B_cell_index_loose<- (merged[["RNA"]]@counts["Cd79b", ] != 0 |
                        (merged[["RNA"]]@counts["Cd19", ] != 0) |
                        (merged[["RNA"]]@counts["Pax5", ] != 0))


T_cell_index <- (merged[["RNA"]]@counts["Cd4", ] != 0 |
                   (merged[["RNA"]]@counts["Cd8a", ] != 0) |
                   (merged[["RNA"]]@counts["Cd8b1", ] != 0) |
                   (merged[["RNA"]]@counts["Cd3e", ] != 0) |
                   (merged[["RNA"]]@counts["Cd3d", ] != 0) |
                   (merged[["RNA"]]@counts["Cd3g", ] != 0))


T_cell_index  %>% table()
table(!T_cell_index & B_cell_index_loose)
B_cell_index_strict <- (!T_cell_index) & B_cell_index_loose

B_cell_index_loose %>% table()
B_cell_index_strict %>% table()

B_obj<- merged[,B_cell_index_strict]


print("Check markers ==0")
B_obj[["RNA"]]@counts["Cd4", ]  %>% table()
B_obj[["RNA"]]@counts["Cd8a",]  %>% table()
B_obj[["RNA"]]@counts["Cd8b1",]  %>% table()
B_obj[["RNA"]]@counts["Cd3e",]  %>% table()
B_obj[["RNA"]]@counts["Cd3d",]  %>% table()
B_obj[["RNA"]]@counts["Cd3g",]  %>% table()

print("Check markers !=0")
B_obj[["RNA"]]@counts["Pax5", ]  %>% table()
B_obj[["RNA"]]@counts["Cd19",]  %>% table()
B_obj[["RNA"]]@counts["Cd79b",]  %>% table()


B_obj<- preprocess_seurat(B_obj)
saveRDS(B_obj, "./data/objects/B_cell_res1.5_obj.rds")
ExportSeurat(B_obj, "./data/objects/B_cell_res1.5_obj.bcs", overwrite=TRUE)

B_obj %>% dim()
# 32285 genes x 187259 cells
print("B-cell space done! ")

In [None]:
#################
## non B non T ##
#################

#Re-splited to include cd4+/cd8+ cd3- B- cells

print("Start to splitting non_B_non_T...")
non_B_non_T_obj_index <- !(B_cell_index_loose | T_cell_index)
non_B_non_T_obj_index  %>% table()


# non_B_non_T + B_strict + T_strict == sum ? Yes
merged  %>% dim()


non_B_non_T_obj <- merged[,non_B_non_T_obj_index]


# Check marker  == 0
non_B_non_T_obj[["RNA"]]@counts["Cd79b",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd4",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd19",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd3d",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd3e",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd3g",]  %>% table()

# Check marker  != 0
non_B_non_T_obj[["RNA"]]@counts["Cd68",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Fcgr3",]  %>% table()
non_B_non_T_obj[["RNA"]]@counts["Cd14",]  %>% table()

non_B_non_T_obj<- preprocess_seurat(non_B_non_T_obj)
saveRDS(non_B_non_T_obj, "./data/objects/non_B_non_T_res1.5_obj.rds")
ExportSeurat(non_B_non_T_obj, "./data/objects/non_B_non_T_res1.5_obj.bcs", overwrite=TRUE)

non_B_non_T_obj %>% dim()
# 32285 genes x 55084 cells
print("non_B_non_T space done! ")

In [15]:
# 55084 + 187259 + 38842 + 66275 = 347460 total cells

In [None]:
##################
## Feature Plot ##
##################

B_cell_features <- c("Pax5", "Cd19", "Cd79b")
B_Ig_features <- c("Ighm", "Ighd", "Igkc")

cd3_features <-  c("Cd3e", "Cd3g", "Cd3d")
cd8_features <- c("Cd8b1", "Cd8a")
cd4_features <- c("Cd4")

mono_macro_features <- c("Cd68", "Fcgr3", "Cd14")


merged_featureplot <- FeaturePlot(merged,
                                  features = c(B_cell_features,
                                               B_Ig_features,
                                               cd3_features,
                                               cd8_features,
                                               cd4_features,
                                               mono_macro_features),
                                  keep.scale = NULL, ncol =4)

cd3_pos_cd4_neg_featureplot <- FeaturePlot(cd3_pos_cd4_neg,
                                           features = c(B_cell_features,
                                                        B_Ig_features,
                                                        cd3_features,
                                                        cd8_features,
                                                        cd4_features,
                                                        mono_macro_features),
                                           keep.scale = NULL, ncol =4)

cd3_pos_cd8_neg_featureplot <- FeaturePlot(cd3_pos_cd8_neg,
                                           features = c(B_cell_features,
                                                        B_Ig_features,
                                                        cd3_features,
                                                        cd8_features,
                                                        cd4_features,
                                                        mono_macro_features),
                                           keep.scale = NULL, ncol =4)

B_cell_featureplot <- FeaturePlot(B_obj,
                                  features = c(B_cell_features,
                                               B_Ig_features,
                                               cd3_features,
                                               cd8_features,
                                               cd4_features,
                                               mono_macro_features),
                                  keep.scale = NULL, ncol =4)

non_B_non_T_featureplot<- FeaturePlot(non_B_non_T_obj,
                                      features = c(B_cell_features,
                                                   B_Ig_features,
                                                   cd3_features,
                                                   cd8_features,
                                                   cd4_features,
                                                   mono_macro_features),
                                      keep.scale = NULL, ncol =4)


ggsave("results/figures/1_merged_featureplot.pdf",merged_featureplot, width = 12, height = 10)
ggsave("results/figures/1_cd3_pos_cd4_neg_featureplot.pdf", cd3_pos_cd4_neg_featureplot, width = 12, height = 10)
ggsave("results/figures/1_cd3_pos_cd8_neg_featureplot.pdf", cd3_pos_cd8_neg_featureplot, width = 12, height = 10)
ggsave("results/figures/1_B_cell_featureplot.pdf", B_cell_featureplot, width = 12, height = 10)
ggsave("results/figures/1_non_B_non_T_featureplot.pdf", non_B_non_T_featureplot, width = 12, height = 10)

### Umap on new data objects

In [None]:
DimPlot(merged, reduction = "umap",
               label = FALSE, pt.size = 0.2, group.by = "pool_id") +
labs(title = "merged umap by samples res=1.5", y = NULL, x = NULL) +
theme(text = element_text(size = 20)) + NoLegend()
ggsave("./results/figures/1_merged_all_clusters_uamp_by_samples.pdf", width = 10, height = 8)

DimPlot(cd3_pos_cd4_neg, reduction = "umap",
               label = TRUE, pt.size = 0.2 ) +
labs(title = "CD3+ CD4- space umap by clusters res=1.5", y = NULL, x = NULL) +
theme(text = element_text(size = 20))

ggsave("./results/figures/1_cd3_pos_cd4_neg_all_clusters_umap.pdf", width = 10, height = 8)

DimPlot(cd3_pos_cd8_neg, reduction = "umap",
               label = TRUE, pt.size = 0.2 ) +
labs(title = "CD3+ CD8- space umap by clusters res=1.5", y = NULL, x = NULL) +
theme(text = element_text(size = 20))

ggsave("./results/figures/1_cd3_pos_cd8_neg_all_clusters_umap.pdf", width = 10, height = 8)

DimPlot(B_cell, reduction = "umap",
               label = TRUE, pt.size = 0.2 ) +
labs(title = "B cell space umap by clusters res=1.5", y = NULL, x = NULL) +
theme(text = element_text(size = 20))

ggsave("./results/figures/1_B_cell_all_clusters_umap.pdf", width = 10, height = 8)


DimPlot(non_B_non_T, reduction = "umap",
               label = TRUE, pt.size = 0.2 ) +
labs(title = "Non B non T space umap by clusters res=1.5", y = NULL, x = NULL) +
theme(text = element_text(size = 20))

ggsave("./results/figures/1_non_B_non_T_all_clusters_umap.pdf", width = 10, height = 8)

In [None]:
B_cell_features <- c("Pax5", "Cd19", "Cd79b")
B_Ig_features <- c("Ighm", "Ighd", "Igkc")

cd3_features <-  c("Cd3e", "Cd3g", "Cd3d")
cd8_features <- c("Cd8b1", "Cd8a")
cd4_features <- c("Cd4")

mono_macro_features <- c("Cd68", "Fcgr3", "Cd14")


merged_featureplot <- FeaturePlot(merged,
                                  features = c(B_cell_features,
                                               B_Ig_features,
                                               cd3_features,
                                               cd8_features,
                                               cd4_features,
                                               mono_macro_features),
                                  keep.scale = NULL, ncol =4)

cd3_pos_cd4_neg_featureplot <- FeaturePlot(cd3_pos_cd4_neg,
                                           features = c(B_cell_features,
                                                        B_Ig_features,
                                                        cd3_features,
                                                        cd8_features,
                                                        cd4_features,
                                                        mono_macro_features),
                                           keep.scale = NULL, ncol =4)

cd3_pos_cd8_neg_featureplot <- FeaturePlot(cd3_pos_cd8_neg,
                                           features = c(B_cell_features,
                                                        B_Ig_features,
                                                        cd3_features,
                                                        cd8_features,
                                                        cd4_features,
                                                        mono_macro_features),
                                           keep.scale = NULL, ncol =4)

B_cell_featureplot <- FeaturePlot(B_cell,
                                  features = c(B_cell_features,
                                               B_Ig_features,
                                               cd3_features,
                                               cd8_features,
                                               cd4_features,
                                               mono_macro_features),
                                  keep.scale = NULL, ncol =4)

non_B_non_T_featureplot<- FeaturePlot(non_B_non_T,
                                      features = c(B_cell_features,
                                                   B_Ig_features,
                                                   cd3_features,
                                                   cd8_features,
                                                   cd4_features,
                                                   mono_macro_features),
                                      keep.scale = NULL, ncol =4)


ggsave(here("results/figures/1_merged_all_clusters_featureplot.pdf"),merged_featureplot, width = 12, height = 10)
ggsave(here("results/figures/1_cd3_pos_cd4_neg_all_clusters_featureplot.pdf"), cd3_pos_cd4_neg_featureplot, width = 12, height = 10)
ggsave(here("results/figures/1_cd3_pos_cd8_neg_all_clusters_featureplot.pdf"), cd3_pos_cd8_neg_featureplot, width = 12, height = 10)
ggsave(here("results/figures/1_B_cell_all_clusters_featureplot.pdf"), B_cell_featureplot, width = 12, height = 10)
ggsave(here("results/figures/1_non_B_non_T_all_clusters_featureplot.pdf"), non_B_non_T_featureplot, width = 12, height = 10)