# D0 WNN Clustering

### Load Libraries

In [None]:
# load libraries
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)

### Helper Functions

In [None]:
# Read in ADT information
adt_info_func <- function(path){
  h5_list <- h5dump(path)
  h5_list <- h5_list_convert_to_dgCMatrix(h5_list, target = 'ADT')
  adt_mtx <- h5_list$ADT_dgCMatrix
  adt_mtx
}

# Seurat RNA Pipeline
seurat_proc_func <- function(x){
  DefaultAssay(x) <- 'RNA'
    # x <- NormalizeData(x) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()
  x <- suppressWarnings(SCTransform(x, verbose = T))
  x <- RunPCA(x, verbose = T)
  x <- RunUMAP(x, dims = 1:30, verbose = T) %>% FindNeighbors(dims = 1:30, verbose = T) %>% FindClusters(resolution = 0.5, verbose = T)
  x
}

# Seurat RNA Label transfer
label_transfer_func <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

label_transfer_func_alt <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

## Load Data & Create Seurat Objects

In [None]:
all_h5 <- list.files(path = '../ped_vs_senior_h5', pattern = '*h5', full.names = TRUE)
all_h5

In [None]:
all_so_list <- suppressWarnings(lapply(all_h5, read_h5_seurat))

In [None]:
# get adt matrices in list via lapply
all_adt_mtx_list <- lapply(all_h5, adt_info_func)

In [None]:
# merge all RNA seurat objects into single object
all_so <- Reduce(merge,all_so_list)

In [None]:
all_so

In [None]:
# merge all adt mtxs 
all_adt_mtx <- do.call(cbind, all_adt_mtx_list)

In [None]:
table(colnames(all_so[['RNA']]) == colnames(all_adt_mtx))

In [None]:
all_so[['ADT']] <- CreateAssayObject(all_adt_mtx)

### Subset only D0 samples

In [None]:
all_so <- SetIdent(all_so, value = 'pbmc_sample_id')
d0_so <- subset(all_so, idents = c('PB00593-04','PB00323-02','PB00368-04','PB00353-03','PB00334-03','PB00377-03','PB00545-02','PB00599-02',
                                   'PB00124-02','PB00172-02','PB00182-02','PB00127-02','PB00807-02','PB00173-02','PB00192-02','PB00197-02'))

In [None]:
d0_so

### Import LSI & subset cells to align w/ ATAC data

In [None]:
lsi <- readRDS("../PedSen_d0_LSI.RDS")

In [None]:
length(rownames(lsi))

#### Align barcodes

In [None]:
d0_metadata <- d0_so@meta.data
archr_style_barcodes <- paste(d0_metadata$well_id, "#", d0_metadata$original_barcodes, "-1", sep = "")

In [None]:
# create data frame w/ seurat barcodes and archr style barcodes
bc_df <- data.frame(seurat_bcs = colnames(d0_so@assays$RNA),
                    archr_style = archr_style_barcodes)

In [None]:
# Find Intersection & subset LSI
bc_idx <- intersect(rownames(lsi), archr_style_barcodes)

In [None]:
lsi_subset <- subset(lsi, subset = rownames(lsi) %in% bc_idx)
bc_df_subset <- subset(bc_df, subset = bc_df$archr_style %in% bc_idx)

In [None]:
table(bc_df_subset$archr_style == rownames(lsi_subset))

In [None]:
# replace LSI archr barcodes for seurat barcodes
rownames(lsi_subset) <- bc_df_subset$seurat_bcs

In [None]:
# subset D0_harmony for ATAC barcodes
d0_so <- subset(d0_so, cells = bc_df_subset$seurat_bcs)

In [None]:
d0_so

#### Create placeholder matris for LSI import

In [None]:
tile_mtx <- matrix(data = 1, nrow = 5, ncol = 326165)
colnames(tile_mtx) <- bc_df_subset$seurat_bcs
rownames(tile_mtx) <- c("A","B","C",'D','E')

In [None]:
d0_so[['Tiles']] <- CreateAssayObject(counts = tile_mtx)
d0_so[["lsit"]] <- CreateDimReducObject(embeddings = lsi_subset, key = "lsit_", assay = "Tiles")

## QC Filtering

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)
d0_so[["percent.mt"]] <- PercentageFeatureSet(d0_so, pattern = "^MT-")
VlnPlot(d0_so, features = c('nFeature_RNA','nCount_RNA','nCount_ADT','percent.mt'), log = F, 
        pt.size = 0, group.by = 'pbmc_sample_id', ncol = 2) & stat_summary(fun=median, geom = "point", color="black")

### Moderate QC Filtering

In [None]:
filtered_d0 <- subset(d0_so, subset = nFeature_RNA > 250 & nCount_RNA > 500 & nCount_ADT < 10000 & percent.mt < 35 & nCount_RNA < 20000)
filtered_d0

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)
VlnPlot(filtered_d0, features = c('nFeature_RNA','nCount_RNA','nCount_ADT','percent.mt'), log = F, 
        pt.size = 0, group.by = 'pbmc_sample_id', ncol = 2) & stat_summary(fun=median, geom = "point", color="black")

### Remove old objects to save memory

In [None]:
rm(d0_so)
rm(all_so)
rm(all_so_list)
rm(all_adt_mtx)
rm(all_adt_mtx_list)

## Clustering of RNA (SCTransform)

In [None]:
filtered_d0 <- seurat_proc_func(filtered_d0)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p1 <- DimPlot(filtered_d0, label = T, label.size = 6, reduction = 'umap') + NoLegend()
p2 <- DimPlot(filtered_d0, group.by = 'pbmc_sample_id', reduction = 'umap')
p1 + p2

### V4 Label Transfer

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)
reference <- readRDS("../Seurat_Objects/Hao-2021_PBMC-Multimodal-Reference_SeuratObject.rds")
DimPlot(reference, group.by = 'celltype.l2', label = T, repel = T, reduction = 'wnn.umap') + NoLegend()

In [None]:
filtered_d0 <- label_transfer_func_alt(reference = reference, query = filtered_d0)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p3 <- DimPlot(filtered_d0, label = T) + NoLegend()
p4 <- DimPlot(filtered_d0, group.by = 'predicted.celltype.l2', label = T) + NoLegend()
p3 + p4

#### Remove Contaminating Cells

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
VlnPlot(filtered_d0, features = c('sct_CD3G','sct_CD3E','sct_CD19',
                                  'sct_CD14','sct_ITGAM','sct_ITGAX',
                                  'sct_IL3RA','sct_NCAM1','sct_SLAMF7'), 
        group.by = 'SCT_snn_res.0.5', ncol = 3)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
nebulosa_p1 <- plot_density(filtered_d0, c('sct_CD3G','sct_CD3E','sct_CD19',
                                  'sct_CD14','sct_ITGAM','sct_ITGAX',
                                  'sct_IL3RA','sct_NCAM1','sct_SLAMF7'), reduction = 'umap')
nebulosa_p1 + plot_layout(ncol = 3)

In [None]:
table(filtered_d0$predicted.celltype.l1)

In [None]:
# cluster 23 = B, 25 = mono/dc
filtered_d0 <- SetIdent(filtered_d0, value = 'SCT_snn_res.0.5')
filtered_d0 <- subset(filtered_d0, idents = c(23, 25), invert = T)

### Recluster RNA

In [None]:
filtered_d0 <- suppressWarnings(SCTransform(filtered_d0, vars.to.regress = 'percent.mt')) %>% RunPCA()

In [None]:
filtered_d0 <- RunUMAP(filtered_d0, dims = 1:30, verbose = F) %>%  FindNeighbors(dims = 1:30, verbose = T) %>% FindClusters(resolution = 0.5, verbose = T)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1_rna <- DimPlot(filtered_d0, label = T, reduction = 'umap', group.by = 'SCT_snn_res.0.5') + NoLegend()
p2_rna <- DimPlot(filtered_d0, group.by = 'predicted.celltype.l2', reduction = 'umap', label = T, repel = T) + NoLegend()
p3_rna <- DimPlot(filtered_d0, group.by = 'batch_id', reduction = 'umap')
p4_rna <- DimPlot(filtered_d0, group.by = 'pbmc_sample_id', reduction = 'umap')
p1_rna + p2_rna + p3_rna + p4_rna

### Re-label Transfer w/ only T cells

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
reference <- SetIdent(reference, value = 'celltype.l1')
DimPlot(reference)

In [None]:
t_ref <- subset(reference, idents = c('CD4 T','CD8 T','other T'))

In [None]:
DefaultAssay(filtered_d0) <- 'SCT'
t_anchors <- FindTransferAnchors(
    reference = t_ref,
    query = filtered_d0,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
  )
filtered_d0 <- TransferData(anchorset = t_anchors, reference = t_ref, query = filtered_d0,
                    refdata = list(
                      t_celltype.l1 = "celltype.l1",
                      t_celltype.l2 = "celltype.l2",
                      t_celltype.l3 = "celltype.l3"))
filtered_d0

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
p1_rna <- DimPlot(filtered_d0, label = T, reduction = 'umap', group.by = 'SCT_snn_res.0.5') + NoLegend()
p2_rna <- DimPlot(filtered_d0, group.by = 'predicted.t_celltype.l2', reduction = 'umap')
p3_rna <- DimPlot(filtered_d0, group.by = 'batch_id', reduction = 'umap')
p4_rna <- DimPlot(filtered_d0, group.by = 'pbmc_sample_id', reduction = 'umap')
p1_rna + p2_rna + p3_rna + p4_rna

In [None]:
saveRDS(filtered_d0, '../Seurat_Objects/filtered_d0_label_transfer.rds')

In [None]:
sessionInfo()