# Basic 3WNN Clustering w/ Gated Labels

In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
quiet_library(ArchR)

In [None]:
addArchRThreads(32)
addArchRGenome("hg38")

# Load SP Object

In [None]:
sp_merge <- readRDS(file = '../Seurat_Objects/sp_merge_only.rds')

In [None]:
sp_merge

# RNA Clustering

In [None]:
genes_to_remove <- c('AC105402.3', 'MTRNR2L8')
rna_mtx <- sp_merge@assays$RNA@counts
length(rownames(rna_mtx))

In [None]:
idx <- which(rownames(rna_mtx) %in% genes_to_remove)
clean_rna_mtx <- rna_mtx[-idx,]
length(rownames(clean_rna_mtx))

In [None]:
sp_merge[['clean_gex']] <- CreateAssayObject(clean_rna_mtx)
DefaultAssay(sp_merge) <- 'clean_gex'

In [None]:
sp_merge <- suppressWarnings(SCTransform(sp_merge, assay = 'clean_gex')) %>% RunPCA()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
sp_merge <- suppressWarnings(RunHarmony(object = sp_merge, reduction.save = 'rna_harmony',group.by.vars = 'batch_id', reduction = 'pca', 
                                           plot_convergence = T, assay.use = 'SCT'))

In [None]:
ElbowPlot(sp_merge, ndims = 50)

In [None]:
sp_merge <- RunUMAP(sp_merge, dims = 1:25, reduction = 'rna_harmony')

## RNA UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(sp_merge, reduction = 'umap', group.by = 'gating_celltype')
p2 <- DimPlot(sp_merge, reduction = 'umap', group.by = 'pediatric_senior')
p3 <- DimPlot(sp_merge, reduction = 'umap', group.by = 'age_cmv')
p4 <- DimPlot(sp_merge, reduction = 'umap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
DimPlot(sp_merge, reduction = 'umap', group.by = 'gating_celltype', split.by = 'gating_celltype', ncol = 4)

## Age DEG Analysis on Gated cell types

In [None]:
sp_merge_split <- SplitObject(sp_merge, split.by = 'gating_celltype')

In [None]:
age_deg_list <- lapply(sp_merge_split, function(x){
    x <- SetIdent(x, value = 'pediatric_senior')
    degs <- FindAllMarkers(x, assay = 'SCT')
    return(degs)
    })

In [None]:
saveRDS(age_deg_list, file = '../gated_age_deg_list.rds')

## DEG Analysis

In [None]:
sp_merge <- SetIdent(sp_merge, value = 'gating_celltype')
gating_markers <- FindAllMarkers(sp_merge, max.cells.per.ident = 500)

In [None]:
top5 <- gating_markers %>% group_by(cluster) %>% top_n(n=5,wt=avg_log2FC)

# ADT Clustering

In [None]:
adts_to_remove <- c('CD134','CD172a','CD183','CD366','CX3CR1','TCRab','TCRgd','CD137','CD24','CD294','CD304','CD40','CD80','CD86','IgG1-K-Isotype-Control')
adt_mtx <- sp_merge@assays$ADT@counts
length(rownames(adt_mtx))

In [None]:
idx <- which(rownames(adt_mtx) %in% adts_to_remove)
clean_adt_mtx <- adt_mtx[-idx,]
length(rownames(clean_adt_mtx))

In [None]:
sp_merge[['clean_adt']] <- CreateAssayObject(clean_adt_mtx)
DefaultAssay(sp_merge) <- 'clean_adt'

In [None]:
# cluster by ADTs, make sure to save under all new reductions and keys 
VariableFeatures(sp_merge) <- rownames(sp_merge[["clean_adt"]])
sp_merge <- NormalizeData(sp_merge, normalization.method = 'CLR', margin = 2) %>% 
     ScaleData() %>% RunPCA(reduction.name = 'apca', approx = FALSE, reduction.key = 'APC_')

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
sp_merge <- suppressWarnings(RunHarmony(object = sp_merge, reduction.save = 'adt_harmony',group.by.vars = 'batch_id', reduction = 'apca', 
                                           plot_convergence = T, assay.use = 'clean_adt'))

In [None]:
sp_merge <- RunUMAP(sp_merge, reduction = 'adt_harmony', reduction.name = 'adtumap', dims = 1:20)

## ADT UMAPs

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(sp_merge, reduction = 'adtumap', group.by = 'gating_celltype')
p2 <- DimPlot(sp_merge, reduction = 'adtumap', group.by = 'pediatric_senior')
p3 <- DimPlot(sp_merge, reduction = 'adtumap', group.by = 'age_cmv')
p4 <- DimPlot(sp_merge, reduction = 'adtumap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
DimPlot(sp_merge, reduction = 'adtumap', group.by = 'gating_celltype', split.by = 'gating_celltype', ncol = 4)

# ATAC Clustering

## Load ArchR Project

In [None]:
proj <- loadArchRProject(path = '../PedSen_ATAC/')
proj

## Subset ArchR Project

In [None]:
bcs <- colnames(sp_merge[['RNA']])
head(bcs)

In [None]:
idx_new <- which(proj$cellNames_clean %in% bcs)
head(idx_new)

In [None]:
cells_subset <- proj$cellNames[idx_new]
proj_subset <- proj[cells_subset,]
proj_subset

## ATAC Clustering

In [None]:
proj_subset <- addIterativeLSI(proj_subset, name = 'IterativeLSI', force = TRUE, varFeatures = 75000)

In [None]:
proj_subset <- addHarmony(proj_subset, reducedDims = 'IterativeLSI', groupBy = 'batch_id', force = TRUE)

In [None]:
# proj_subset <- addClusters(proj_subset, reducedDims = 'IterativeLSI', name = 'Clusters', force = TRUE, resolution = 0.2)
proj_subset <- addClusters(proj_subset, reducedDims = 'Harmony', name = 'Clusters', force = TRUE, resolution = 0.2)

In [None]:
# proj_subset <- addUMAP(proj_subset, reducedDims = 'IterativeLSI', name = 'UMAP', force = TRUE)
proj_subset <- addUMAP(proj_subset, reducedDims = 'Harmony', name = 'UMAP', force = TRUE)

In [None]:
options(repr.plosp_mergeidth = 8, repr.plot.height = 8)
plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "pediatric_senior", embedding = "UMAP")
plotEmbedding(ArchRProj = proj_subset, colorBy = "cellColData", name = "gating_celltype", embedding = "UMAP")

## Export LSI

In [None]:
# lsi <- getReducedDims(proj_subset, reducedDims = 'IterativeLSI')
lsi <- getReducedDims(proj_subset, reducedDims = 'Harmony')

## Import LSI to Seurat Object

In [None]:
rna_metadata <- sp_merge@meta.data
archr_style_barcodes <- paste(rna_metadata$batch_id, "-P1_",rna_metadata$pbmc_sample_id,"#",rownames(rna_metadata), sep = "")
head(archr_style_barcodes)

In [None]:
# create data frame w/ seurat barcodes and archr style barcodes
bc_df <- data.frame(seurat_bcs = colnames(sp_merge@assays$RNA),
                    archr_style = archr_style_barcodes)

In [None]:
# Find Intersection & subset LSI
bc_idx <- intersect(rownames(lsi), archr_style_barcodes)

In [None]:
lsi_subset <- subset(lsi, subset = rownames(lsi) %in% bc_idx)
bc_df_subset <- subset(bc_df, subset = bc_df$archr_style %in% bc_idx)
length(rownames(lsi_subset))
length(rownames(bc_df_subset))

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
lsi_subset <- lsi_subset[match(bc_df_subset$archr_style, rownames(lsi_subset)),]

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
# replace LSI archr barcodes for seurat barcodes
rownames(lsi_subset) <- bc_df_subset$seurat_bcs

In [None]:
sp_merge <- subset(sp_merge, cells = bc_df_subset$seurat_bcs)

In [None]:
table(colnames(sp_merge[['RNA']]) == rownames(lsi_subset))

In [None]:
sp_merge[["lsit"]] <- CreateDimReducObject(embeddings = lsi_subset, key = "lsit_", assay = "Tiles")

## ATAC clustering in Seurat Object

In [None]:
DefaultAssay(sp_merge) <- 'Tiles'
sp_merge <- RunUMAP(sp_merge, reduction = 'lsit', reduction.name = 'atac_umap', dims = 1:29, verbose = T, reduction.key = 'atacumap_')

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1 <- DimPlot(sp_merge, reduction = 'atac_umap', group.by = 'gating_celltype')
p2 <- DimPlot(sp_merge, reduction = 'atac_umap', group.by = 'pediatric_senior')
p3 <- DimPlot(sp_merge, reduction = 'atac_umap', group.by = 'age_cmv')
p4 <- DimPlot(sp_merge, reduction = 'atac_umap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
DimPlot(sp_merge, reduction = 'atac_umap', group.by = 'gating_celltype', split.by = 'gating_celltype', ncol = 4)

# 3WNN Clustering

In [None]:
sp_merge <- FindMultiModalNeighbors(sp_merge, reduction.list = list("rna_harmony","adt_harmony",'lsit'), dims.list = list(1:25,1:20,1:29), weighted.nn.name = "X3way.weighted.nn",
                                      k.nn = 30, knn.range = 500, prune.SNN = 1/20)

In [None]:
sp_merge <- RunUMAP(sp_merge, nn.name = "X3way.weighted.nn", reduction.name = "wnn.3.umap", reduction.key = "wnn.3.umap_")

In [None]:
sp_merge <- FindClusters(sp_merge, graph.name = "wsnn", algorithm = 3, resolution = 0.5, verbose = FALSE)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
DimPlot(sp_merge, reduction = 'wnn.3.umap', label = T)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
p1 <- DimPlot(sp_merge, reduction = 'wnn.3.umap', group.by = 'gating_celltype', raster = FALSE, shuffle = TRUE)
p2 <- DimPlot(sp_merge, reduction = 'wnn.3.umap', group.by = 'pediatric_senior')
p3 <- DimPlot(sp_merge, reduction = 'wnn.3.umap', group.by = 'age_cmv')
p4 <- DimPlot(sp_merge, reduction = 'wnn.3.umap', group.by = 'pbmc_sample_id')
p1 + p2 + p3 + p4

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20)
DimPlot(sp_merge, reduction = 'wnn.3.umap', group.by = 'pediatric_senior', split.by = 'gating_celltype', ncol = 4, shuffle = TRUE)

# Save

In [None]:
saveRDS(sp_merge, file = '../Seurat_Objects/sp_merge_gating_celltype_updated.rds')

In [None]:
sessionInfo()