In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)

# Load Data

In [None]:
cd4 <- readRDS(file = '../Seurat_Objects/cd4_subset.rds')

In [None]:
cd4

In [None]:
table(cd4$pbmc_sample_id)

In [None]:
cd4.batch <- SplitObject(cd4, split.by = 'batch_id')

In [None]:
cd4.batch <- lapply(cd4.batch, function(x){
    NormalizeData(x, assay = 'ADT', normalization.method = 'CLR', margin = 2)})

In [None]:
df_build <- function(seurat_object){
    meta <- seurat_object@meta.data
    meta$TCRValpha7.2 <- as.vector(seurat_object@assays$ADT['TCR-Valpha7.2',])
    meta$CD161 <- as.vector(seurat_object@assays$ADT['CD161',])
    meta$CD197 <- as.vector(seurat_object@assays$ADT['CD197',])
    meta$CD45RA <- as.vector(seurat_object@assays$ADT['CD45RA',])
    meta$CD27 <- as.vector(seurat_object@assays$ADT['CD27',])
    meta$CD25 <- as.vector(seurat_object@assays$ADT['CD25',])
    meta$CD127 <- as.vector(seurat_object@assays$ADT['CD127',])
    return(meta)}

# Treg Analysis

In [None]:
nonmait_b065 <- cd4.batch$`B065`
nonmait_b069 <- cd4.batch$`B069`
nonmait_b076 <- cd4.batch$`B076`

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nonmait_b065, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'predicted.t_celltype.l2') & 
geom_vline(xintercept = 1.75) & geom_hline(yintercept = 1) & ggtitle("B065")
p2 <- FeatureScatter(object = nonmait_b069, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'predicted.t_celltype.l2') & 
geom_vline(xintercept = 1.5) & geom_hline(yintercept = 0.8) & ggtitle("B069")
p3 <- FeatureScatter(object = nonmait_b076, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'predicted.t_celltype.l2') & 
geom_vline(xintercept = 1.4) & geom_hline(yintercept = 0.7) & ggtitle("B076")

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nonmait_b065, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'batch_id', cols = c('black')) & 
ggtitle("B065")
p2 <- FeatureScatter(object = nonmait_b069, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'batch_id', cols = c('black')) & 
ggtitle("B069")
p3 <- FeatureScatter(object = nonmait_b076, feature1 = 'adt_CD127', feature2 = 'adt_CD25', group.by = 'batch_id', cols = c('black')) & 
ggtitle("B076")

p1 + p2 + p3

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
treg_b065_df <- df_build(nonmait_b065)
ggplot(treg_b065_df, aes(x = CD127, y = CD25)) + geom_point(size = 0.5)

In [None]:
treg_b065 <- subset(nonmait_b065, subset = adt_CD25 > 1 & adt_CD127 < 1.75)
treg_b069 <- subset(nonmait_b069, subset = adt_CD25 > 0.8 & adt_CD127 < 1.5)
treg_b076 <- subset(nonmait_b076, subset = adt_CD25 > 0.7 & adt_CD127 < 1.4)

In [None]:
treg_combined <- merge(treg_b065, c(treg_b069, treg_b076))

In [None]:
table(treg_combined$pbmc_sample_id)

In [None]:
treg_combined$gating_celltype <- rep("Treg", length(colnames(treg_combined[['RNA']])))

In [None]:
saveRDS(treg_combined, file = '/home/jupyter/Organized_Gating_Analysis/Seurat_Objects/treg.rds')

# Naive vs Non-naive

In [None]:
nontreg_b065 <- subset(nonmait_b065, subset = adt_CD25 > 1 & adt_CD127 < 1.75, invert = T)
nontreg_b069 <- subset(nonmait_b069, subset = adt_CD25 > 0.8 & adt_CD127 < 1.5, invert = T)
nontreg_b076 <- subset(nonmait_b076, subset = adt_CD25 > 0.7 & adt_CD127 < 1.4, invert = T)

In [None]:
nontreg_b065_df <- df_build(nontreg_b065)
nontreg_b069_df <- df_build(nontreg_b069)
nontreg_b076_df <- df_build(nontreg_b076)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- ggplot(nontreg_b065_df, aes(x = CD197, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3.2) & ggtitle('B065')

p2 <- ggplot(nontreg_b069_df, aes(x = CD197, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3.2) & ggtitle('B069')

p3 <- ggplot(nontreg_b076_df, aes(x = CD197, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 1) & geom_hline(yintercept = 3.5) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- ggplot(nontreg_b065_df, aes(x = CD27, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3.2) & ggtitle('B065')

p2 <- ggplot(nontreg_b069_df, aes(x = CD27, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3.2) & ggtitle('B069')

p3 <- ggplot(nontreg_b076_df, aes(x = CD27, y = CD45RA, color = predicted.t_celltype.l2)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = predicted.t_celltype.l2), bins = 10) +
geom_vline(xintercept = 2.3) & geom_hline(yintercept = 3.5) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nontreg_b065, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = nontreg_b069, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = nontreg_b076, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 1) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nontreg_b065, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = nontreg_b069, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = nontreg_b076, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d() &
geom_vline(xintercept = 2.3) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nontreg_b065, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = nontreg_b069, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = nontreg_b076, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 1) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = nontreg_b065, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = nontreg_b069, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 2.75) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = nontreg_b076, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'batch_id',cols = 'black', pt.size = 0.0001) & geom_density_2d(size = 1) &
geom_vline(xintercept = 2.3) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- ggplot(nontreg_b065_df, aes(x = CD197)) + geom_histogram() + geom_vline(xintercept = 1.1) & ggtitle('B065')

p2 <- ggplot(nontreg_b069_df, aes(x = CD197)) + geom_histogram() + geom_vline(xintercept = 1.1) & ggtitle('B069')

p3 <- ggplot(nontreg_b076_df, aes(x = CD197)) + geom_histogram() + geom_vline(xintercept = 1) & ggtitle('B076')

p1 + p2 + p3

In [None]:
naive_b065 <- subset(nontreg_b065, subset = adt_CD197 > 1.1 & adt_CD45RA > 3 & adt_CD27 > 2.75)
naive_b069 <- subset(nontreg_b069, subset = adt_CD197 > 1.1 & adt_CD45RA > 3.1 & adt_CD27 > 2.75)
naive_b076 <- subset(nontreg_b076, subset = adt_CD197 > 1 & adt_CD45RA > 3.35 & adt_CD27 > 2.3)

## Naive Subset Combined

In [None]:
naive_merge <- merge(naive_b065, c(naive_b069, naive_b076))

In [None]:
naive_merge$gating_celltype <- rep("CD4 Naive", length(colnames(naive_merge[['RNA']])))

In [None]:
table(naive_merge$pbmc_sample_id)

In [None]:
saveRDS(naive_merge, file = '../Seurat_Objects/cd4_naive.rds')

# Memory Analysis

In [None]:
mem_b065 <- subset(nontreg_b065, subset = adt_CD197 > 1.1 & adt_CD45RA > 3 & adt_CD27 > 2.75, invert =T)
mem_b065
mem_b069 <- subset(nontreg_b069, subset = adt_CD197 > 1.1 & adt_CD45RA > 3.1 & adt_CD27 > 2.75, invert =T)
mem_b069
mem_b076 <- subset(nontreg_b076, subset = adt_CD197 > 1 & adt_CD45RA > 3.35 & adt_CD27 > 2.3, invert =T)
mem_b076

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = mem_b065, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 1.2) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = mem_b069, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 1.2) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = mem_b076, feature1 = 'adt_CD197', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 1.1) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

In [None]:
options(repr.plot.width = 24, repr.plot.height = 8)
p1 <- FeatureScatter(object = mem_b065, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 2.8) & geom_hline(yintercept = 3) & ggtitle('B065')
p2 <- FeatureScatter(object = mem_b069, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 2.8) & geom_hline(yintercept = 3.1) & ggtitle('B069')
p3 <- FeatureScatter(object = mem_b076, feature1 = 'adt_CD27', feature2 = 'adt_CD45RA', group.by = 'predicted.t_celltype.l2', pt.size = 0.0001) & geom_density_2d(bins = 25) &
geom_vline(xintercept = 2.6) & geom_hline(yintercept = 3.35) & ggtitle('B076')

p1 + p2 + p3

## CM Subset

In [None]:
cm_b065 <- subset(mem_b065, subset = adt_CD27 > 2.8 & adt_CD45RA < 3 & adt_CD197 > 1.3)
cm_b069 <- subset(mem_b069, subset = adt_CD27 > 2.8 & adt_CD45RA < 3.1 & adt_CD197 > 1.3)
cm_b076 <- subset(mem_b076, subset = adt_CD27 > 2.6 & adt_CD45RA < 3.35 & adt_CD197 > 1.2)

In [None]:
cm_merge <- merge(cm_b065, c(cm_b069, cm_b076))

In [None]:
cm_merge$gating_celltype <- rep("CD4 CM", length(colnames(cm_merge[['RNA']])))

In [None]:
table(cm_merge$pbmc_sample_id)

In [None]:
saveRDS(cm_merge, file = '../Seurat_Objects/cd4_cm.rds')

## EM Subset

In [None]:
em1_b065 <- subset(mem_b065, subset = adt_CD27 > 2.8 & adt_CD45RA < 3 & adt_CD197 < 1.3)
em1_b069 <- subset(mem_b069, subset = adt_CD27 > 2.8 & adt_CD45RA < 3.1 & adt_CD197 < 1.3)
em1_b076 <- subset(mem_b076, subset = adt_CD27 > 2.6 & adt_CD45RA < 3.35 & adt_CD197 < 1.2)

In [None]:
em2_b065 <- subset(mem_b065, subset = adt_CD27 < 2.8 & adt_CD45RA < 3 & adt_CD197 < 1.3)
em2_b069 <- subset(mem_b069, subset = adt_CD27 < 2.8 & adt_CD45RA < 3.1 & adt_CD197 < 1.3)
em2_b076 <- subset(mem_b076, subset = adt_CD27 < 2.6 & adt_CD45RA < 3.35 & adt_CD197 < 1.2)

In [None]:
em1_merge <- merge(em1_b065, c(em1_b069, em1_b076))
em2_merge <- merge(em2_b065, c(em2_b069, em2_b076))

In [None]:
table(em1_merge$pbmc_sample_id)
table(em2_merge$pbmc_sample_id)

In [None]:
em1_merge$gating_celltype <- rep("CD4 EM1", length(colnames(em1_merge[['RNA']])))
em2_merge$gating_celltype <- rep("CD4 EM2", length(colnames(em2_merge[['RNA']])))

In [None]:
em_merge <- merge(em1_merge, em2_merge)

In [None]:
saveRDS(em_merge, file = '../Seurat_Objects/cd4_em.rds')

## EMRA Subset

In [None]:
emra_b065 <- subset(mem_b065, subset = adt_CD27 < 2.5 & adt_CD45RA > 3 & adt_CD197 < 1.3)
emra_b069 <- subset(mem_b069, subset = adt_CD27 < 2.5 & adt_CD45RA > 3.1 & adt_CD197 < 1.3)
emra_b076 <- subset(mem_b076, subset = adt_CD27 < 2.3 & adt_CD45RA > 3.35 & adt_CD197 < 1.2)

In [None]:
emra_merge <- merge(emra_b065, c(emra_b069, emra_b076))

In [None]:
emra_merge$gating_celltype <- rep("CD4 TEMRA", length(colnames(emra_merge[['RNA']])))

In [None]:
table(emra_merge$pbmc_sample_id)

In [None]:
saveRDS(emra_merge, file = '../Seurat_Objects/cd4_emra.rds')

# Recombine

In [None]:
merge_all <- merge(naive_merge, c(cm_merge, em_merge, emra_merge, treg_combined))
merge_all

In [None]:
table(merge_all$gating_celltype)

In [None]:
merge_all.split <- SplitObject(merge_all, split.by = 'gating_celltype')

In [None]:
lapply(merge_all.split, function(x){
    table(x$pbmc_sample_id)})

## Subset Remaining cells

In [None]:
bcs <- setdiff(colnames(cd4[['RNA']]), colnames(merge_all[['RNA']]))

In [None]:
unknowns <- subset(cd4, cells = bcs)

In [None]:
unknowns$gating_celltype <- rep("CD4 Unk", length(colnames(unknowns[['RNA']])))

In [None]:
saveRDS(unknowns, file = '../Seurat_Objects/cd4_unk.rds')

In [None]:
merge_all_final <- merge(merge_all, unknowns)
merge_all_final

## Major Populations Plots

In [None]:
main_subset <- merge(naive_merge, c(cm_merge, em_merge, emra_merge))

In [None]:
main.batch <- SplitObject(main_subset, 'batch_id')

In [None]:
df_main_b065 <- df_build(main.batch$`B065`)
df_main_b069 <- df_build(main.batch$`B069`)
df_main_b076 <- df_build(main.batch$`B076`)

In [None]:
ggplot(df_main_b065, aes(x = CD27, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)
ggplot(df_main_b065, aes(x = CD197, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)

In [None]:
ggplot(df_main_b069, aes(x = CD27, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)
ggplot(df_main_b069, aes(x = CD197, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)

In [None]:
ggplot(df_main_b076, aes(x = CD27, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)
ggplot(df_main_b076, aes(x = CD197, y = CD45RA, color = gating_celltype)) + 
stat_density_2d(geom = 'polygon', aes(alpha = ..level.., fill = gating_celltype), bins = 10)