In [None]:
library(Seurat)
library(dplyr)
library(ggplot2)

# Import CD8 Stim Experiment

In [None]:
in_h5s <- list.files(path = '../cellranger_output/', pattern = 'filtered_feature_bc_matrix.h5$', full.names = TRUE, recursive = TRUE)

In [None]:
length(in_h5s)

In [None]:
so_list <- lapply(in_h5s, function(x){
    mtx <- Read10X_h5(x)
    so <- CreateSeuratObject(mtx$`Gene Expression`)
    so[['ADT']] <- CreateAssayObject(mtx$`Antibody Capture`)
    so$libraryID <- substring(text = x, first = 44, last = 54)
    
    return(so)
})

In [None]:
exp618_merge <- Reduce(merge, so_list)

# Add Metadata

In [None]:
unique(exp618_merge$libraryID)

In [None]:
exp618_merge <- SetIdent(exp618_merge, value = 'libraryID')
exp618_merge <- RenameIdents(exp618_merge,
                         'LB00172-001' = 'UP1002',
                         'LB00172-002' = 'UP1002',
                         'LB00172-003' = 'UP1002',
                         'LB00173-001' = 'UP1006',
                         'LB00173-002' = 'UP1006',
                         'LB00173-003' = 'UP1006',
                         'LB00182-001' = 'UP1003',
                         'LB00182-002' = 'UP1003',
                         'LB00182-003' = 'UP1003',
                         'LB00197-001' = 'UP1010', 
                         'LB00197-002' = 'UP1010',
                         'LB00197-003' = 'UP1010',
                         'LB02232/out' = 'Pooled Peds',
                         'LB04480-011' = 'CB009',
                         'LB04480-012' = 'CB009',
                         'LB04480-013' = 'CB009')
exp618_merge$sampleID <- Idents(exp618_merge)

In [None]:
exp618_merge <- SetIdent(exp618_merge, value = 'libraryID')
exp618_merge <- RenameIdents(exp618_merge,
                         'LB00172-001' = 'TCR',
                         'LB00172-002' = 'PMA',
                         'LB00172-003' = 'IL21',
                         'LB00173-001' = 'TCR',
                         'LB00173-002' = 'PMA',
                         'LB00173-003' = 'IL21',
                         'LB00182-001' = 'TCR',
                         'LB00182-002' = 'PMA',
                         'LB00182-003' = 'IL21',
                         'LB00197-001' = 'TCR', 
                         'LB00197-002' = 'PMA',
                         'LB00197-003' = 'IL21',
                         'LB02232/out' = 'Unstim',
                         'LB04480-011' = 'Unstim',
                         'LB04480-012' = 'PMA',
                         'LB04480-013' = 'IL21')
exp618_merge$stim <- Idents(exp618_merge)

In [None]:
exp618_merge <- SetIdent(exp618_merge, value = 'libraryID')
exp618_merge <- RenameIdents(exp618_merge,
                         'LB00172-001' = 'Ped',
                         'LB00172-002' = 'Ped',
                         'LB00172-003' = 'Ped',
                         'LB00173-001' = 'Ped',
                         'LB00173-002' = 'Ped',
                         'LB00173-003' = 'Ped',
                         'LB00182-001' = 'Ped',
                         'LB00182-002' = 'Ped',
                         'LB00182-003' = 'Ped',
                         'LB00197-001' = 'Ped', 
                         'LB00197-002' = 'Ped',
                         'LB00197-003' = 'Ped',
                         'LB02232/out' = 'Ped',
                         'LB04480-011' = 'CB',
                         'LB04480-012' = 'CB',
                         'LB04480-013' = 'CB')
exp618_merge$age <- Idents(exp618_merge)

In [None]:
exp618_merge <- SetIdent(exp618_merge, value = 'libraryID')
exp618_merge <- RenameIdents(exp618_merge,
                         'LB00172-001' = 'EXP-00618',
                         'LB00172-002' = 'EXP-00618',
                         'LB00172-003' = 'EXP-00618',
                         'LB00173-001' = 'EXP-00618',
                         'LB00173-002' = 'EXP-00618',
                         'LB00173-003' = 'EXP-00618',
                         'LB00182-001' = 'EXP-00618',
                         'LB00182-002' = 'EXP-00618',
                         'LB00182-003' = 'EXP-00618',
                         'LB00197-001' = 'EXP-00618', 
                         'LB00197-002' = 'EXP-00618',
                         'LB00197-003' = 'EXP-00618',
                         'LB02232/out' = 'EXP-00618',
                         'LB04480-011' = 'EXP-00618',
                         'LB04480-012' = 'EXP-00618',
                         'LB04480-013' = 'EXP-00618')
exp618_merge$exp <- Idents(exp618_merge)

In [None]:
exp618_merge <- SetIdent(exp618_merge, value = 'age')
ped_only <- subset(exp618_merge, ident = 'Ped')

In [None]:
rm(exp618_merge)
rm(so_list)

## QC EXP-00618

In [None]:
ped_only[["percent.mt"]] <- PercentageFeatureSet(ped_only, pattern = "^MT-")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 5)
VlnPlot(ped_only, features = 'nCount_RNA', group.by = 'libraryID', pt.size = 0, log = TRUE) + NoLegend() & geom_hline(yintercept = 25000) & geom_hline(yintercept = 300)
VlnPlot(ped_only, features = 'nFeature_RNA', group.by = 'libraryID', pt.size = 0, log = FALSE) + NoLegend() & geom_hline(yintercept = 5000) & geom_hline(yintercept = 200)
VlnPlot(ped_only, features = 'nCount_ADT', group.by = 'libraryID', pt.size = 0, log = TRUE) + NoLegend()
VlnPlot(ped_only, features = 'percent.mt', group.by = 'libraryID', pt.size = 0.1, log = TRUE) + NoLegend() & geom_hline(yintercept = 10)

In [None]:
ped_only

In [None]:
ped_only <- subset(ped_only, percent.mt < 10)
ped_only

# Standard Clustering

In [None]:
ped_only <- NormalizeData(ped_only) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()

In [None]:
ElbowPlot(ped_only, ndims = 50)

In [None]:
ped_only <- RunUMAP(ped_only, dims = 1:20) %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.5)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(ped_only, group.by = 'sampleID')
p2 <- DimPlot(ped_only, group.by = 'stim')
p1 + p2

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(ped_only)
p2 <- DimPlot(ped_only, group.by = 'stim')
p1 + p2

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(ped_only, group.by = 'exp', shuffle = TRUE)
p2 <- DimPlot(ped_only, group.by = 'stim')
p1 + p2

# Integrated Version

In [None]:
# split the dataset into a list of two seurat objects (stim and CTRL)
stim.list <- SplitObject(ped_only, split.by = "stim")

# normalize and identify variable features for each dataset independently
stim.list <- lapply(X = stim.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = stim.list)

In [None]:
immune.anchors <- FindIntegrationAnchors(object.list = stim.list, anchor.features = features)

In [None]:
# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)

In [None]:
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:25)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:25)
immune.combined <- FindClusters(immune.combined, resolution = 0.5)

In [None]:
DefaultAssay(immune.combined) <- "integrated"
immune.combined <- FindClusters(immune.combined, resolution = 1)

In [None]:
# Visualization
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(immune.combined, group.by = 'sampleID')
p2 <- DimPlot(immune.combined, group.by = 'stim')
p1 + p2

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DimPlot(immune.combined, split.by = 'stim', ncol = 2)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- FeaturePlot(immune.combined, features = 'percent.mt')
p2 <- DimPlot(immune.combined, group.by = 'stim')
p1 + p2

In [None]:
immune.combined <- NormalizeData(immune.combined, assay = 'ADT', normalization.method = "CLR", margin = 2)
DefaultAssay(immune.combined) <- 'RNA'

# DEGs

In [None]:
degs <- FindAllMarkers(immune.combined, max.cells.per.ident = 500, assay = 'RNA')

In [None]:
so_small <- subset(immune.combined, downsample = 100)

In [None]:
so_small <- ScaleData(so_small, features = rownames(so_small[['RNA']]))

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
degs %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(so_small, features = top10$gene) + NoLegend()

In [None]:
head(subset(degs, cluster == 17), n=15)

# Cell Type Identification

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DimPlot(immune.combined, group.by = 'stim', split.by = 'stim', shuffle = TRUE)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
immune.combined <- SetIdent(immune.combined, value = 'integrated_snn_res.1')
VlnPlot(immune.combined, features = c('adt_CD3','adt_CD14.1','adt_CD19.1','adt_CD8'), pt.size = 0, ncol = 2)

In [None]:
options(repr.plot.width = 30, repr.plot.height = 10)
VlnPlot(immune.combined, features = c('adt_CD103','adt_CD25','adt_CD161','adt_CD194','adt_CD196',
                                      'rna_ITGAE','rna_IL2RA','rna_KLRB1','rna_CCR4','rna_CCR6'), pt.size = 0, ncol = 4)

In [None]:
sort(rownames(immune.combined[['ADT']]))

In [None]:
options(repr.plot.width = 20, repr.plot.height = 16)
FeaturePlot(immune.combined, features = c('rna_TRDC','rna_TRGC1','rna_TRGC2','adt_TCR-Vdelta2'), ncol = 2)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
FeaturePlot(immune.combined, features = 'adt_CD161')

In [None]:
immune.combined <- SetIdent(immune.combined, value = 'integrated_snn_res.1')
immune.combined <- FindSubCluster(object = immune.combined, cluster = 4, resolution = 0.3, graph.name = 'integrated_snn')
Idents(immune.combined) <- immune.combined$sub.cluster

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(immune.combined, label = TRUE)

In [None]:
degs_4_0 <- FindMarkers(immune.combined, `ident.1` = '4_0', assay = 'RNA', max.cells.per.ident = 500)
degs_4_1 <- FindMarkers(immune.combined, `ident.1` = '4_1', assay = 'RNA', max.cells.per.ident = 500)
degs_4_2 <- FindMarkers(immune.combined, `ident.1` = '4_2', assay = 'RNA', max.cells.per.ident = 500)
degs_4_3 <- FindMarkers(immune.combined, `ident.1` = '4_3', assay = 'RNA', max.cells.per.ident = 500)
degs_4_4 <- FindMarkers(immune.combined, `ident.1` = '4_4', assay = 'RNA', max.cells.per.ident = 500)

In [None]:
subset(degs_4_0, rownames(degs_4_0) %in% c('TRDC','TRGC2','TRGC1'))
subset(degs_4_1, rownames(degs_4_1) %in% c('TRDC','TRGC2','TRGC1'))
subset(degs_4_2, rownames(degs_4_2) %in% c('TRDC','TRGC2','TRGC1'))
subset(degs_4_3, rownames(degs_4_3) %in% c('TRDC','TRGC2','TRGC1'))
subset(degs_4_4, rownames(degs_4_4) %in% c('TRDC','TRGC2','TRGC1'))

In [None]:
immune.combined <- SetIdent(immune.combined, value = 'sub.cluster')
immune.combined <- RenameIdents(immune.combined,
                         '0' = 'Naive',
                         '1' = 'Naive',
                         '2' = 'Naive',
                         '3' = 'Naive',
                         '4_0' = 'MNP-2',
                         '4_1' = 'MNP-2',
                         '4_2' = 'gdT',
                         '4_3' = 'MNP-2',
                         '4_4' = 'MNP-2',
                         '5' = 'Naive',
                         '6' = 'Naive',
                         '7' = 'Memory',
                         '8' = 'Naive',
                         '9' = 'Naive', 
                         '10' = 'Memory',
                         '11' = 'Naive',
                         '12' = 'Naive',
                         '13' = 'MAIT',
                         '14' = 'Memory',
                         '15' = 'Naive',
                         '16' = 'MAIT',
                         '17' = 'Memory',   
                         '18' = 'CD3_Neg',
                         '19' = 'Naive',
                         '20' = 'CD3_Neg',
                         '21' = 'B_Cells',
                         '22' = 'CD3_Neg')
immune.combined$cell.type <- Idents(immune.combined)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DimPlot(immune.combined, group.by = 'cell.type', shuffle = TRUE, label = TRUE, label.size = 10)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)
DimPlot(immune.combined, group.by = 'cell.type', split.by = 'sampleID', shuffle = TRUE, label = FALSE, label.size = 10, ncol = 3)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)
DimPlot(immune.combined, group.by = 'cell.type', split.by = 'stim', shuffle = TRUE, label = FALSE, label.size = 10, ncol = 3)

# Remove Non-T cell populations

In [None]:
immune.combined <- SetIdent(immune.combined, value = 'cell.type')
immune.clean <- subset(immune.combined, idents = c('CD3_Neg','B_Cells'), invert = TRUE)

In [None]:
DefaultAssay(immune.clean) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.clean <- ScaleData(immune.clean, verbose = FALSE)
immune.clean <- RunPCA(immune.clean, npcs = 30, verbose = FALSE)
immune.clean <- RunUMAP(immune.clean, reduction = "pca", dims = 1:25)

In [None]:
options(repr.plot.width = 15, repr.plot.height = 10)
DimPlot(immune.clean, group.by = 'cell.type', shuffle = TRUE, label = TRUE, label.size = 10)

## Cell Type Distribution

In [None]:
l1_count_function <- function(metadata_df){
    counts <- dplyr::count(metadata_df, cell.type, stim)
    sum_counts <- metadata_df %>%
                dplyr::count(stim)
    joined_counts <- left_join(counts, sum_counts, by = 'stim')
    joined_counts$perc <- joined_counts$n.x/joined_counts$n.y
    return(joined_counts)
    }

In [None]:
l1_counts <- l1_count_function(immune.clean@meta.data)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 10)
l1_counts %>%
    mutate(stim=factor(stim, levels = c('Unstim','PMA','TCR','IL21'))) %>%
    ggplot(aes(x = reorder(cell.type,-perc), y = perc, fill = stim, label = round(perc,digits = 2))) + 
        scale_y_log10() + 
        geom_dotplot(binaxis='y', stackdir='center',
                     position=position_dodge(width = 0.6), binwidth = 1/10) + 
        geom_text(position = position_dodge(width = 0.6), size = 6) +
        theme_bw() + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 20),
              axis.text.y = element_text(size = 20),
              axis.title = element_text(size = 20)) + 
        xlab("Cell Type Level 1") + 
        ylab("Percent")

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(immune.clean, group.by = 'sampleID')

# Save

In [None]:
saveRDS(immune.combined, file = 'immune_combined_ped_only.rds')

In [None]:
immune.combined <- readRDS(file = 'immune_combined_ped_only.rds')

In [None]:
saveRDS(immune.clean, file = 'immune_clean_ped_only.rds')