# Pre-RA teaseq analysis - object creation
- create Seurat object from H5
- perform initial qc

### Load Libraries

In [None]:
# load libraries
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(tidyverse)
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
quiet_library(future)
quiet_library(future.apply)
quiet_library(H5weaver)
quiet_library(SingleCellExperiment)


In [None]:
# define file path
fig_path <- "/home/jupyter/figures/preRA_teaseq/qc_filtering")
data_path <- "/home/jupyter/data/preRA_teaseq/EXP-00243"
meta_path <- "/home/jupyter/data/preRA_teaseq/meta_data"
output_path <- "/home/jupyter/data/preRA_teaseq/output_results"
if (!dir.exists(fig_path)) (dir.create(fig_path, recursive = TRUE))
if (!dir.exists(output_path)) (dir.create(output_path, recursive = TRUE))
# define a project name
proj_name <- "PreRA_teaseq"


In [None]:
# Check number of cores
future::availableCores()
# Set up parallel processing to run when using 'future' functions
future::plan(strategy = "multicore", workers = future::availableCores() - 3)
options(future.globals.maxSize = 1000 * 1024^5)
# to turn off parallel processing run line below
# future::plan(strategy = "sequential")


In [None]:
# define the color palette to be used
npg_color <- c("#E64B35FF", "#4DBBD5FF", "#00A087FF", "#3C5488FF", "#F39B7FFF", 
               "#8491B4FF", "#91D1C2FF", "#DC0000FF", "#7E6148FF", "#B09C85FF")
nejm_color <- c("#BC3C29FF", "#0072B5FF", "#E18727FF", "#20854EFF", "#7876B1FF", "#6F99ADFF", "#FFDC91FF", "#EE4C97FF")
jama_color <- c("#374E55FF", "#DF8F44FF", "#00A1D5FF", "#B24745FF", "#79AF97FF", "#6A6599FF", "#80796BFF")
jco_color <- c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF", "#7AA6DCFF", "#003C67FF", "#8F7700FF")
cluster_colors <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", 
    "#E78AC3", "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", 
    "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00")

### Helper Functions

In [None]:
# Read in ADT information
adt_info_func <- function(path) {
  h5_list <- h5dump(path)
  h5_list <- h5_list_convert_to_dgCMatrix(h5_list, target = "ADT")
  adt_mtx <- h5_list$ADT_dgCMatrix
  adt_mtx
}

# Seurat RNA Pipeline
seurat_proc_func <- function(x, resolution = 0.5) {
  DefaultAssay(x) <- "RNA"
  # x <- NormalizeData(x) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()
  x <- suppressWarnings(SCTransform(x, verbose = T, vars.to.regress = "percent.mt"))
  x <- RunPCA(x, verbose = T)
  x <- RunUMAP(x, dims = 1:30, verbose = T) %>%
    FindNeighbors(dims = 1:30, verbose = T) %>%
    FindClusters(resolution = resolution, verbose = T, future.seed = TRUE)
  x
}

quick_rna_clustering <- function(x, resolution = 0.5) {
  DefaultAssay(x) <- "RNA"
  x <- NormalizeData(x, future.seed = TRUE) %>%
    FindVariableFeatures() %>%
    ScaleData() %>%
    RunPCA(verbose = FALSE)
  # x <- suppressWarnings(SCTransform(x, verbose = FALSE))
  # x <- RunPCA(x, verbose = FALSE)
  x <- RunUMAP(x, dims = 1:30, verbose = FALSE) %>%
    FindNeighbors(dims = 1:30, verbose = FALSE) %>%
    FindClusters(resolution = resolution, verbose = FALSE, future.seed = TRUE)
  x
}

# ADT clustering only
adt_clustering_func <- function(x) {
  DefaultAssay(x) <- "ADT"
  VariableFeatures(x) <- rownames(x[["ADT"]])
  x <- NormalizeData(x, normalization.method = "CLR", margin = 2) %>%
    ScaleData() %>%
    RunPCA(reduction.name = "apca")
  x <- RunUMAP(x, dims = 1:25, reduction = "apca", reduction.name = "adt_umap", future.seed = TRUE) %>%
    FindNeighbors(dims = 1:25, reduction = "apca", future.seed = TRUE) %>%
    FindClusters(resolution = 0.5, future.seed = TRUE)
  DefaultAssay(x) <- "SCT"
  x
}

# ADT clustering by harmony
adt_clustering_by_harmony <- function(x, resolution = 0.5) {
  DefaultAssay(x) <- "ADT"
  VariableFeatures(x) <- rownames(x[["ADT"]])
  x <- NormalizeData(x, normalization.method = "CLR", margin = 2) %>%
    ScaleData() %>%
    RunPCA(reduction.name = "apca")
  x <- RunHarmony(
    object = x, group.by.vars = "dataset", reduction = "apca",
    assay.use = "ADT", reduction.save = "harmony_adt"
  )
  x <- RunUMAP(x, dims = 1:25, reduction = "harmony_adt", reduction.name = "harmonyadt_umap", verbose = F) %>%
    FindNeighbors(dims = 1:25, reduction = "harmony_adt") %>%
    FindClusters(resolution = resolution)
}

# ADT & RNA WNN clustering
wnn_clustering_func <- function(x) {
  x <- FindMultiModalNeighbors(
    x,
    reduction.list = list("pca", "apca"),
    dims.list = list(1:30, 1:25), modality.weight.name = "RNA.weight"
  )
  x <- RunUMAP(x, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
  x <- FindClusters(x, graph.name = "wsnn", algorithm = 3, resolution = 1, verbose = FALSE)
}

# Seurat RNA Label transfer
label_transfer_func <- function(reference, query) {
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50
  )
  query <- TransferData(
    anchorset = anchors, reference = reference, query = query,
    refdata = list(
      celltype.l1 = "celltype.l1",
      celltype.l2 = "celltype.l2",
      celltype.l3 = "celltype.l3",
      predicted_ADT = "ADT"
    )
  )
  query
}

label_transfer_func_alt <- function(reference, query) {
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
  )
  query <- TransferData(
    anchorset = anchors, reference = reference, query = query,
    refdata = list(
      celltype.l1 = "celltype.l1",
      celltype.l2 = "celltype.l2",
      celltype.l3 = "celltype.l3",
      predicted_ADT = "ADT"
    )
  )
  query
}

wnn_3way_clustering_func <- function(x) {
  x <- FindMultiModalNeighbors(
    x,
    reduction.list = list("pca", "harmony_adt", "lsit"),
    dims.list = list(1:40, 1:30, 1:29), modality.weight.name = "RNA.weight"
  )
  x <- RunUMAP(x, nn.name = "weighted.nn", reduction.name = "wnn.3.umap", reduction.key = "Uw3_")
  x <- FindClusters(x, graph.name = "wsnn", algorithm = 1, resolution = 0.5, verbose = TRUE)
  x
}


## Load Data & Create Seurat Objects

In [None]:
all_h5 <- list.files(
    path = data_path, recursive = TRUE,
    pattern = "*h5",
    full.names = TRUE
)


In [None]:
# only filter out the file needed
all_h5%>% str_extract('PB\\d\\d\\d\\d\\d.\\d\\d|IMM\\d\\d(_|-)\\d\\d\\d')

In [None]:
all_so_list <- suppressWarnings(lapply(all_h5, read_h5_seurat))

In [None]:
# get adt matrices in list via lapply
all_adt_mtx_list <- lapply(all_h5, adt_info_func)

In [None]:
# check what adt are present in the H5 dataset
all_adt_mtx_list[[1]] %>% rownames() %>% length()
all_adt_mtx_list[[1]] %>% rownames()

In [None]:
# merge all RNA seurat objects into single object
all_so <- Reduce(merge,all_so_list)

In [None]:
# merge all adt mtxs 
all_adt_mtx <- do.call(cbind, all_adt_mtx_list)

In [None]:
# check if the colnames barcodes match between rna and adt
table(all_so$original_barcodes == colnames(all_adt_mtx))

In [None]:
colnames(all_adt_mtx) <- colnames(all_so[['RNA']])

In [None]:
all_so[['ADT']] <- CreateAssayObject(all_adt_mtx)

### Save seurat obeject

In [None]:
all_so <- SetIdent(all_so, value = 'well_id')
all_so

In [None]:
all_so

In [None]:
data_path

In [None]:
# save the seurat object 
all_so %>% saveRDS(file.path(data_path, paste0(proj_name, '_seurat_all_cells.rds')))

In [None]:
# load the seurat object 
all_so <- readRDS(file.path(data_path, 'PreRA_teaseq_seurat_all_cells.rds'))

In [None]:
# add atac informtation to the seurat object in 101b_ATAC_Initial_Object_Creation.ipynb.ipynb 
# noted: some cells didn't pass atac qc 
# 138209 - 135995 

## QC Filtering

In [None]:
# load the seurat obeject with ATAC information
all_so <- readRDS(file.path(data_path, "PreRA_teaseq_seurat_all_cells.rds"))


In [None]:
# run some qc analysis on rna data
DefaultAssay(all_so) <- "RNA"

In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)

all_so[["percent.mt"]] <- PercentageFeatureSet(all_so, pattern = "^MT-")
VlnPlot(all_so, features = c('nFeature_RNA','nCount_RNA','nCount_ADT','percent.mt'), log = F, 
        pt.size = 0, group.by = 'well_id', ncol = 2) & 
    stat_summary(fun=median, geom = "point", color="black")
ggsave(file.path(fig_path, paste0(proj_name, '_qc_plots.pdf')), width = 12, height = 8)

In [None]:
well_sample_counts <- all_so@meta.data %>% group_by(well_id, pbmc_sample_id) %>% tally()
well_sample_counts %>% ggplot(aes(x=well_id, y=n)) + geom_boxplot() + geom_jitter(aes(colour=pbmc_sample_id)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
             text = element_text(size = 16))
ggsave(file.path(fig_path, paste0(proj_name, '_well_counts.pdf')), width = 8, height = 6)

In [None]:
VlnPlot(all_so, features = 'percent.mt', group.by = 'well_id', log = F )
ggsave(file.path(fig_path, paste0(proj_name, '_mt_genes_qc.pdf')))
p1 <- FeatureScatter(all_so, feature1 = "nCount_RNA", feature2 = "percent.mt")
p2 <- FeatureScatter(all_so, feature1 = "nFeature_RNA", feature2 = "percent.mt")
p3 <- FeatureScatter(all_so, feature1 = "nCount_ADT", feature2 = "percent.mt")
p1+p2+p3
ggsave(file.path(fig_path, paste0(proj_name, '_mt_genes_qc.pdf')))

In [None]:
VlnPlot(all_so, features = 'nCount_ADT', group.by = 'well_id', log = F )+ylim(0, 20000)

In [None]:
# check the cells in p1c2w6
p1c2w6 <- subset(all_so, well_id == "EXP-00243-P1C2W6")
p1c2w6 <- seurat_proc_func(p1c2w6, resolution = 0.5)
p1c2w6


In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)
p1 <- DimPlot(p1c2w6, group.by = "SCT_snn_res.0.5", label = T, reduction = "umap")
p2 <- DimPlot(p1c2w6, group.by = "nCount_RNA", label = T, reduction = "umap")
p3 <- DimPlot(p1c2w6, group.by = "cohort", label = T, reduction = "umap")
p4 <- DimPlot(p1c2w6, group.by = "subject_id", label = T, reduction = "umap")
p1 + p2 + p3 + p4
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_p1c2w6_l2_seurat_label_rnaumap.pdf")),
       width = 12, height = 6
)


In [None]:
VlnPlot(p1c2w6,
    features = c("nFeature_RNA", "nCount_RNA", "nCount_ADT", "percent.mt"), log = F,
    pt.size = 0, group.by = "subject_id", ncol = 2
) &
    stat_summary(fun = median, geom = "point", color = "black")


### Moderate QC Filtering

In [None]:
# nCount_ADT < 10000 adt cleaning
all_so_fl <- subset(all_so, subset = nFeature_RNA > 250 & nCount_RNA > 500 &
    nCount_ADT < 10000 & percent.mt < 30 & nCount_RNA < 20000)
ß

In [None]:
# double check the well is removed
all_so[[]] %>% distinct(pbmc_sample_id)

In [None]:
well_sample_counts <- all_so_fl@meta.data %>%
    group_by(well_id, pbmc_sample_id) %>%
    tally()
well_sample_counts %>% ggplot(aes(x = well_id, y = n)) +
    geom_boxplot() +
    geom_jitter(aes(colour = pbmc_sample_id)) +
    theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        text = element_text(size = 16)
    )


In [None]:
# remove the low adt quality cells
all_so_fl@meta.data %>% group_by(adt_qc_flag) %>% tally()
all_so_fl <- subset(all_so_fl, subset = adt_qc_flag=='Good')

In [None]:
all_so_fl@meta.data %>% group_by(adt_qc_flag) %>% tally()

In [None]:
# check what cells are removed from atac labels
atac_rm_barcodes <- atac_labels %>%
    dplyr::filter((!barcodes %in% all_so_fl$barcodes)) %>%
    pull(barcodes)
atac_rm <- all_so@meta.data %>%
    as_tibble() %>%
    dplyr::filter(barcodes %in% atac_rm_barcodes)
# atac_rm %>% select(nFeature_RNA, nCount_ADT,nCount_RNA, percent.mt)
atac_rm %>%
    filter(nFeature_RNA > 250 & nCount_RNA > 500 &
        nCount_ADT < 10000 & nCount_RNA < 20000) %>%
    nrow()


In [None]:
ncol(all_so_fl)/ncol(all_so)

In [None]:
# load clinical metadata
metadata <- read_csv(file.path(meta_path, "PreRA_teaseq_metadata.csv"), show_col_types = FALSE) %>%
    rename("subject_id" = "Subject") %>%
    mutate(cohort = if_else(str_detect(subject_id, "CU"), "pre-RA", "Healthy"))
metadata$pbmc_sample_id %in% all_so_fl@meta.data$pbmc_sample_id
# add addtional metadata from atac to the object
cell_id <- all_so_fl@meta.data %>% rownames()
all_so_fl@meta.data <- all_so_fl@meta.data %>% left_join(metadata, by = "pbmc_sample_id")
rownames(all_so_fl@meta.data) <- cell_id


In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)
VlnPlot(all_so_fl,
    features = c("nFeature_RNA", "nCount_RNA", "nCount_ADT", "percent.mt"), log = F,
    pt.size = 0, group.by = "well_id", ncol = 2
) &
    stat_summary(fun = median, geom = "point", color = "black")
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_qc_plots.pdf")))


In [None]:
VlnPlot(all_so_fl, features = "percent.mt", group.by = "well_id", log = F)
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_mt_genes_qc.pdf")))


In [None]:
# double check the well to be remove
all_so_fl@meta.data %>%
    group_by(well_id) %>%
    tally()
# remove the well EXP-00243-P1C2W6
all_so_fl <- subset(all_so_fl, well_id != "EXP-00243-P1C2W6")
all_so_fl@meta.data %>%
    group_by(well_id) %>%
    tally()


In [None]:
ncol(all_so_fl)
ncol(all_so)
ncol(all_so_fl)/ncol(all_so)

### Remove old objects to save memory

In [None]:
rm(all_so)
rm(all_so)
rm(all_so_list)
rm(all_adt_mtx)ß
rm(all_adt_mtx_list)

## Clustering of RNA (SCTransform)

In [None]:
# load the filetered myeloid data in
all_so_fl <- readRDS(file.path(data_path,
                                'PreRA_teaseq_seurat_qc_filtered_cells.rds'))

In [None]:
all_so_fl <- seurat_proc_func(all_so_fl)

In [None]:
all_so_fl

In [None]:
# make a umap based on atac data
# all_so_fl <- RunUMAP(all_so_fl, reduction = "lsit", dims = 2:30, 
#                      reduction.name = "umap.atac", reduction.key = "atacUMAP_")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p1 <- DimPlot(all_so_fl, label = T, label.size = 6, reduction = "umap") + NoLegend()
p2 <- DimPlot(all_so_fl, group.by = "well_id", label = T, shuffle = TRUE, reduction = "umap")
p1 + p2
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_rna_umap_cluster_well_id.pdf")),
       width = 12, height = 6
)


In [None]:
all_so_fl[[]] %>% colnames()

In [None]:
# plot precentage of mt gene in rna and n_gene atac
p1 <- FeaturePlot(all_so_fl, features = "percent.mt", reduction = "umap")
p1
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_mt_genes_umap.pdf")),
       width = 12, height = 6
)


In [None]:
# plot atac umap and doublet
p1 <- FeaturePlot(all_so_fl,
    features = "DoubletEnrichment", reduction = "umap.atac",
    min.cutoff = "q5", max.cutoff = "q95"
)
p2 <- FeaturePlot(all_so_fl,
    features = "n_fragments", reduction = "umap.atac",
    min.cutoff = "q5", max.cutoff = "q95"
)
p1 + p2 + plot_layout(ncol = 2)
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_atacumap_doublet_n_frag.pdf")), width = 12, height = 6)
# plot atac umap and doublet
p1 <- FeaturePlot(all_so_fl,
    features = "DoubletEnrichment", reduction = "umap",
    min.cutoff = "q5", max.cutoff = "q95"
)
p2 <- FeaturePlot(all_so_fl,
    features = "n_fragments", reduction = "umap",
    min.cutoff = "q5", max.cutoff = "q95"
)
p1 + p2 + plot_layout(ncol = 2)
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_rnaumap_doublet_n_frag.pdf")), width = 12, height = 6)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p3 <- DimPlot(all_so_fl, label = T) + NoLegend()
p3

In [None]:
options(repr.plot.width = 20, repr.plot.height = 15)
VlnPlot(all_so_fl,
    features = c("nFeature_RNA", "nCount_RNA", "nCount_ADT", "percent.mt"), log = F,
    pt.size = 0, group.by = "well_id", ncol = 2
) &
    stat_summary(fun = median, geom = "point", color = "black")
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_qc_plots.pdf")))


### Seurat V4 Label Transfer

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)
reference <- readRDS(file.path("/home/jupyter/data/Reference/Hao-2021_PBMC-Multimodal-Reference_SeuratObject.rds"))
DimPlot(reference, group.by = "celltype.l2", label = T, repel = T, reduction = "wnn.umap") + NoLegend()


In [None]:
all_so_fl <- obj.rna(reference = reference, query = all_so_fl)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p3 <- DimPlot(all_so_fl, label = T) + NoLegend()
p4 <- DimPlot(all_so_fl, group.by = "predicted.celltype.l2", label = T) + NoLegend()
p3 + p4
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_rna_umap_l2_labels.pdf")), width = 12, height = 6)


In [None]:
all_so_fl[[]] %>% colnames()

In [None]:
# plot umap from atac
p1 <- DimPlot(all_so_fl, group.by = "predicted.celltype.l2", reduction = "umap.atac", label = T)
p2 <- DimPlot(all_so_fl, group.by = "", reduction = "umap.atac", label = T)
p1 + p2
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_atac_umap_l2_labels.pdf")), width = 12, height = 6)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p3 <- FeaturePlot(all_so_fl, features = "percent.mt", reduction = "umap")
p4 <- DimPlot(all_so_fl, group.by = "predicted.celltype.l2", label = T) + NoLegend()
p3 + p4 + plot_layout(ncol = 2)
# plot rna umap and mt.gene
# ggsave(file.path(fig_path, paste0(proj_name, '_filtered_cells_rnaumap_doublet_n_frag.pdf')), width = 12, height = 6)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p5 <- DimPlot(all_so_fl, label = T) + NoLegend()
p6 <- DimPlot(all_so_fl, group.by = "predicted.celltype.l2", label = T) + NoLegend()
p5 + p6
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_l2_seurat_label_umap.pdf")), width = 12, height = 6)


In [None]:
# mt qc with predicted cell types
FeatureScatter(all_so_fl, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "predicted.celltype.l2")
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_mt_genes_nCount_rna.pdf")))
FeatureScatter(all_so_fl, feature1 = "nFeature_RNA", feature2 = "percent.mt", group.by = "predicted.celltype.l2")
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_mt_genes_nFeature_rna.pdf")))


In [None]:
# do clustering on adt data
DefaultAssay(all_so_fl) <- "ADT"
all_so_fl <- adt_clustering_func(all_so_fl)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p5 <- DimPlot(all_so_fl, label = T, reduction = "adt_umap") + NoLegend()
p6 <- DimPlot(all_so_fl, group.by = "predicted.celltype.l2", label = T, reduction = "adt_umap")
p5 + p6
ggsave(file.path(fig_path, paste0(proj_name, "_filtered_cells_l2_seurat_label_adtumap.pdf")), width = 12, height = 6)


In [None]:
all_so_fl@meta.data %>% colnames()
all_so_fl@meta.data %>% write_csv(file.path(output_path, paste0(proj_name, "_seurat_qc_filtered_cells_metadata.csv")))


In [None]:
# save the seurat obeject 
all_so_fl %>% saveRDS(file.path(data_path, paste0(proj_name, '_seurat_qc_filtered_cells.rds')))

## load dataset with atac reduced dimension in it
- LSI from ArchR is imported into the atac slot in the seurat object

In [None]:
# load the seurat obejct 
all_so_fl <- readRDS(file.path(data_path, 'PreRA_teaseq_seurat_qc_filtered_cells_lsi.rds'))

In [None]:
all_so_fl
all_so_fl@meta.data %>% colnames()

In [None]:
all_so_fl@meta.data %>% write_csv(file.path(output_path, 'PreRA_teaseq_seurat_qc_filtered_cells_lsi_metadata.csv'))

In [None]:
# create a clean adt assay removing the isotype control adt
# adts_to_remove <- c('Rat-IgG1-I-Isotype-Ctrl','Rat-IgG1-K-isotype-Ctrl','Rat-IgG2a-K-Isotype-Ctrl','Rat-IgG2c-K-Isotype-Ctrl',
#                     'Armenian-Hamster-IgG-Isotype-Ctrl', 'Mouse-IgG1--K-isotype-Ctrl','Mouse-IgG2a--K-isotype-Ctrl',
#                     'Mouse-IgG2b--K-isotype-Ctrl','Rat-IgG2b-K-Isotype-Ctrl')
adt_mtx <- all_so_fl@assays$ADT@counts
length(rownames(adt_mtx))
adts_to_remove <- rownames(adt_mtx) %>% str_subset('isotype')
all(adts_to_remove %in% rownames(adt_mtx))

idx <- which(rownames(adt_mtx) %in% adts_to_remove)
clean_adt_mtx <- adt_mtx[-idx,]
length(rownames(clean_adt_mtx))

In [None]:
rownames(adt_mtx) 

In [None]:
all_so_fl[['cleanadt']] <- CreateAssayObject(clean_adt_mtx)
DefaultAssay(all_so_fl) <- 'cleanadt'
rownames(all_so_fl) %>% length()

In [None]:
# plot_cluster_counts(all_so, 'seurat_pbmc_type', color.by='seurat_pbmc_type', figname = '')
# plot_cluster_freq(all_so, 'seurat_pbmc_type', color.by='seurat_pbmc_type', figname = '')

In [None]:
all_so_fl

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
p1 <- DimPlot(all_so_fl, group.by = 'predicted.celltype.l1', label = T, reduction = 'umap') 
p2 <- DimPlot(all_so_fl, group.by = 'predicted.celltype.l2', label = T, reduction = 'umap') 
p3 <- DimPlot(all_so_fl, group.by = 'predicted.celltype.l1', label = T, reduction = 'adt_umap') + NoLegend()
p4 <- DimPlot(all_so_fl, group.by = 'predicted.celltype.l2', label = T, reduction = 'adt_umap')  + NoLegend()
p1 + p2 + p3 + p4
# ggsave(file.path(fig_path, paste0(proj_name, '_filtered_cells_l2_seurat_label_adtumap.pdf')), width = 12, height = 6)