# Setup

In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
quiet_library(tidyr)

## Load Data

In [None]:
sp_merge <- readRDS(file = '../Seurat_Objects/sp_merge_gating_celltype_updated.rds')

In [None]:
ref <- readRDS('/home/jupyter/reference/Hao-2021_PBMC-Multimodal-Reference_SeuratObject.rds')

## Setup Reference for Single Positive Label Transfer

In [None]:
ref <- SetIdent(ref, value = 'celltype.l1')
t_cells_ref <- subset(ref, idents = c('CD4 T', 'CD8 T', 'other T'))

In [None]:
table(t_cells_ref$celltype.l2)

In [None]:
t_cells_ref <- SetIdent(t_cells_ref, value = 'celltype.l2')
sp_t_ref <- subset(t_cells_ref, idents = 'dnT', invert = T)
table(sp_t_ref$celltype.l2)

In [None]:
DimPlot(sp_t_ref, label = TRUE) & NoLegend()

# Label transfer SP 

In [None]:
DefaultAssay(sp_merge) <- 'SCT'
anchors <- FindTransferAnchors(
    reference = sp_t_ref,
    query = sp_merge,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
)

In [None]:
sp_merge <- TransferData(anchorset = anchors, reference = sp_t_ref, query = sp_merge,
                        refdata = list(
                          sp_celltype.l1 = "celltype.l1",
                          sp_celltype.l2 = "celltype.l2",
                          sp_celltype.l3 = "celltype.l3"))

In [None]:
table(sp_merge$predicted.sp_celltype.l2)

# RNA vs ADT Labels

## Align cell types

In [None]:
unique(sp_merge$predicted.sp_celltype.l2)

In [None]:
unique(sp_merge$gating_celltype)

In [None]:
sp_merge <- SetIdent(sp_merge, value = 'gating_celltype')
sp_subset <- subset(sp_merge, idents = c('CD4 Unk', 'CD8 Unk'), invert = T)

In [None]:
# rm(sp_merge)

Rename gating subsets to align better with the label transfer cell type labels

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'gating_celltype')
sp_subset <- RenameIdents(sp_subset, 'CD8 Naive' = 'CD8 Naive',
                         'CD8 CM' = 'CD8 TCM',
                         'CD8 EM1' = 'CD8 TEM',
                         'CD8 EM2' = 'CD8 TEM',
                         'CD8 TEMRA' = 'CD8 TEM',
                         # 'MAIT' = 'MAIT',
                         'CD4 Naive' = 'CD4 Naive',
                         'CD4 CM' = 'CD4 TCM',
                         'CD4 EM1' = 'CD4 TEM',
                         'CD4 EM2' = 'CD4 TEM',
                         'CD4 TEMRA' = 'CD4 TEM',
                         'Treg' = 'Treg')
sp_subset$gating_consensus <- Idents(sp_subset)

Remove any cell types that were not in our gating scheme

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'predicted.sp_celltype.l2')
sp_subset <- subset(sp_subset, idents = c('CD8 Proliferating', 'CD4 Proliferating','gdT','MAIT'), invert = TRUE)

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'predicted.sp_celltype.l2')
sp_subset <- RenameIdents(sp_subset, 'CD8 Naive' = 'CD8 Naive',
                         'CD4 Naive' = 'CD4 Naive',
                          'CD8 TEM' = 'CD8 TEM',
                          'CD8 TCM' = 'CD8 TCM',
                          'CD4 TEM' = 'CD4 TEM',
                          'Treg' = 'Treg',
                          'CD4 TCM' = 'CD4 TCM',
                          # 'MAIT' = 'MAIT',
                          'CD4 CTL' = 'CD4 TEM')
sp_subset$predicted_consensus <- Idents(sp_subset)

## Export labels

In [None]:
adt_rna_df <- data.frame(cellnames = rownames(sp_subset@meta.data),
                         rna_l2 = sp_subset$predicted_consensus,
                         gating = sp_subset$gating_consensus)
head(adt_rna_df)

In [None]:
saveRDS(adt_rna_df, file = '..//02_Gating_Subsets/adt_rna_label_df.rds')

# ATAC vs ADT

In [None]:
quiet_library(ArchR)

In [None]:
addArchRThreads(8)
addArchRGenome("hg38")

In [None]:
sp_t_ref[['RNA']] <- CreateAssayObject(counts = sp_t_ref@assays$SCT@counts)

In [None]:
DefaultAssay(sp_t_ref) <- 'RNA'
sp_t_ref <- NormalizeData(sp_t_ref) %>% FindVariableFeatures() %>% ScaleData()

## Open project

In [None]:
proj <- loadArchRProject(path = '../PedSen_ATAC/')
proj

## Do level 1 Transfer

In [None]:
proj <- addGeneIntegrationMatrix(
  ArchRProj = proj,
  useMatrix = "GeneScoreMatrix", #You can change this
  matrixName = "GeneIntegrationMatrix", #This is the name of a matrix generated by this function. It contains RNA expression data from scATAC cell to RNA cell
  reducedDims = "IterativeLSI", 
  seRNA = sp_t_ref,
  addToArrow = FALSE, #Use this setting to avoid HDF5 errors.
  groupRNA = "celltype.l1",
  nameCell = "predictedCell_Un", 
  nameGroup = "predictedGroup_Un", #Name of metadata column to be created with the ATAC cell labels
  nameScore = "predictedScore_Un", #Name of metadata column to be created with the ATAC cell label scores.
  force=TRUE
)

In [None]:
plotEmbedding(proj, embedding = "UMAP", colorBy = "cellColData",name = "predictedGroup_Un")
plotEmbedding(proj, embedding = "UMAP", colorBy = "cellColData",name = "gating_celltype")

## Do Level 2 Transfer

In [None]:
table(proj$predictedGroup_Un)

In [None]:
totalList <- names(table(proj$predictedGroup_Un))

cd4 <- totalList[grepl("CD4", totalList)]
cd8 <- totalList[grepl("CD8",  totalList)]
other <- totalList[grepl("other",  totalList)]
#Double check your lists
length(totalList)
length(cd4) + length(cd8) + length(other)

id_clusters <- list(cd4, cd8, other)

In [None]:
#For constrained labeling, we need to give addGeneIntegrationMatrix
#a parameter that tells it which RNA cells generally align to which ATAC cells
# -- essentially, we reduce the search space.
groupList <- lapply(seq_along(id_clusters), function(x){
  
  rnaCells <- sp_t_ref@meta.data %>% mutate(Cells = rownames(.)) %>%
    filter(celltype.l1 %in% id_clusters[[x]])
  list(
    ATAC = proj$cellNames[proj$predictedGroup_Un %in% id_clusters[[x]]],
    RNA = rnaCells$Cells
  )
})

In [None]:
proj <- addGeneIntegrationMatrix(
  ArchRProj = proj,
  useMatrix = "GeneScoreMatrix",
  matrixName = "GeneIntegrationMatrix", 
  reducedDims = "IterativeLSI", 
  seRNA = sp_t_ref,
  addToArrow = FALSE, #Use this setting to avoid HDF5 errors.
  groupList = groupList, #Add your groupList
  groupRNA = "celltype.l2",#Label according to L2.5 labels
  nameCell = "predictedCell_l2",
  nameGroup = "predictedGroup_l2",
  nameScore = "predictedScore_l2", force=TRUE
)

In [None]:
plotEmbedding(proj, embedding = "UMAP", colorBy = "cellColData",name = "predictedGroup_l2")
plotEmbedding(proj, embedding = "UMAP", colorBy = "cellColData",name = "gating_celltype")

In [None]:
saveArchRProject(ArchRProj = proj, outputDirectory = '../PedSen_ATAC/', load = F)

In [None]:
l2_df <- data.frame(atac_l2 = proj$predictedGroup_l2, row.names = proj$cellNames_clean)
head(l2_df)

In [None]:
table(colnames(sp_merge[['RNA']]) %in% rownames(l2_df))
table(rownames(l2_df) %in% colnames(sp_merge[['RNA']]))

In [None]:
sp_merge <- AddMetaData(sp_merge, l2_df, 'atac_l2')

In [None]:
table(sp_merge$atac_l2)

In [None]:
sp_merge <- SetIdent(sp_merge, value = 'gating_celltype')
sp_subset <- subset(sp_merge, idents = c('CD4 Unk', 'CD8 Unk'), invert = T)

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'gating_celltype')
sp_subset <- RenameIdents(sp_subset, 'CD8 Naive' = 'CD8 Naive',
                         'CD8 CM' = 'CD8 TCM',
                         'CD8 EM1' = 'CD8 TEM',
                         'CD8 EM2' = 'CD8 TEM',
                         'CD8 TEMRA' = 'CD8 TEM',
                         # 'MAIT' = 'MAIT',
                         'CD4 Naive' = 'CD4 Naive',
                         'CD4 CM' = 'CD4 TCM',
                         'CD4 EM1' = 'CD4 TEM',
                         'CD4 EM2' = 'CD4 TEM',
                         'CD4 TEMRA' = 'CD4 TEM',
                         'Treg' = 'Treg')
sp_subset$gating_consensus <- Idents(sp_subset)

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'atac_l2')
sp_subset <- subset(sp_subset, idents = c('CD8 Proliferating', 'CD4 Proliferating','MAIT','gdT'), invert = TRUE)

In [None]:
sp_subset <- SetIdent(sp_subset, value = 'atac_l2')
sp_subset <- RenameIdents(sp_subset, 'CD8 Naive' = 'CD8 Naive',
                         'CD4 Naive' = 'CD4 Naive',
                          'CD8 TEM' = 'CD8 TEM',
                          'CD8 TCM' = 'CD8 TCM',
                          'CD4 TEM' = 'CD4 TEM',
                          'Treg' = 'Treg',
                          'CD4 TCM' = 'CD4 TCM',
                          # 'MAIT' = 'MAIT',
                          'CD4 CTL' = 'CD4 TEM')
sp_subset$predicted_consensus <- Idents(sp_subset)

In [None]:
adt_atac_df <- data.frame(cellnames = rownames(sp_subset@meta.data),
                          atac_l2 = sp_subset$predicted_consensus,
                          gating = sp_subset$gating_consensus)
head(adt_atac_df)

In [None]:
saveRDS(adt_atac_df, file = '../02_Gating_Subsets/adt_atac_label_df.rds')

# RNA vs ATAC Label Transfer

In [None]:
rna_atac_df <- data.frame(cellnames = rownames(sp_merge@meta.data),
                          atac = sp_merge$atac_l2,
                          rna = sp_merge$predicted.sp_celltype.l2)

In [None]:
saveRDS(rna_atac_df, file = '../02_Gating_Subsets/rna_atac_df.rds')