# Erythroid clean

In [None]:
# Seurat 
library(Seurat)

# Data 
library(dplyr)

# Plotting
library(ggplot2)
library(RColorBrewer)
library(patchwork)
library(gridExtra)
library(grid)
library(viridis)
library(ComplexHeatmap)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

# Parameter settings

In [None]:
# Seurat files 
so_file <- "data/object/seurat.rds"
so_out_file <- "data/object/seurat_clean.rds"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Import Seurat object

In [None]:
so <- readRDS(so_file)

In [None]:
# Create clean Seurat object
so <- CreateSeuratObject(counts = GetAssayData(so, assay = "RNA", slot = "counts"), meta.data = dplyr::select(so@meta.data, -rna_snn_res.0.8))

# Subset by treatment group

In [None]:
so_l <- SplitObject(so, split.by = "treatment")
so_l <- so_l[c("NaCl", "CpG")]

# Normalize, Scale and Dim reduction of replicate groups

In [None]:
so_l <- lapply(so_l, NormalizeData)

In [None]:
so_l <- lapply(so_l, FindVariableFeatures, nfeatures = 3000)

In [None]:
cc_kowalczyk <- read.csv("cc_kowalczyk.csv")
cc_kowalczyk <- cc_kowalczyk[cc_kowalczyk$sig_population >= 5, ]

# variable_features_reduce <- function(so, genes) {
#     variable_features <- VariableFeatures(so)
#     VariableFeatures(so) <- variable_features[!variable_features %in% genes]
#     return(so)
# }
# so_l <- lapply(so_l, variable_features_reduce, genes = cc_kowalczyk$gene)

In [None]:
so_l <- lapply(so_l, ScaleData, vars.to.regress = c("nCount_RNA"))

In [None]:
options(warn = -1)
so_l <- lapply(so_l, RunPCA, npcs = 90, verbose = FALSE)
so_l <- lapply(so_l, FindNeighbors, dims = 1:10, verbose = FALSE)
so_l <- lapply(so_l, FindClusters, verbose = FALSE)
so_l <- lapply(so_l, RunUMAP, dims = 1:90, verbose = FALSE)

In [None]:
reduction <- "umap"

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)

plot <- function(so) {
    
    dplot_1 <- DimPlot(so, reduction = reduction, group.by = "RNA_snn_res.0.8", label = TRUE) & 
        ggtitle("Cluster") & xlab("UMAP 1") & ylab("UMAP 2") &
        theme(aspect.ratio = 1, legend.position = "none") & 
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
    dplot_2 <- DimPlot(so, reduction = reduction, group.by = "tissue", label = FALSE) & 
        ggtitle("Tissue") & xlab("UMAP 1") & ylab("UMAP 2") &
        scale_color_manual(values = color$tissue, na.value = "dark gray") & 
        theme(aspect.ratio = 1, legend.position = "bottom") & 
        guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))
    
    dplot_3 <- DimPlot(so, reduction = reduction, group.by = "sample_rep", label = FALSE) & 
        ggtitle("Replicate") & xlab("UMAP 1") & ylab("UMAP 2") &
        theme(aspect.ratio = 1, legend.position = "bottom") & 
        guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))

    dplot_4 <- DimPlot(so, reduction = reduction, group.by = "main_labels", label = FALSE) &
        theme(aspect.ratio = 1, legend.position = "bottom") &
        scale_color_manual(values = color$main_labels, na.value = "dark gray") &
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))

    dplot_5 <- DimPlot(so, reduction = reduction, group.by = "fine_labels", label = FALSE) &
        theme(aspect.ratio = 1, legend.position = "bottom") &
        scale_color_manual(values = color$fine_labels, na.value = "dark gray") &
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
    dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + plot_layout(ncol = 5) + plot_annotation(title = so$treatment, theme = theme(plot.title = element_text(size = 18)))
    
}

lapply(so_l, plot)

In [None]:
so_nacl <- so_l[[1]]
so_cpg <- so_l[[2]]

# FACS sort overlap 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

tissue_nacl_cluster <- ggplot(so_nacl@meta.data, aes(x = seurat_clusters, fill = tissue)) + 
  geom_bar(stat = "count", position = "fill") + 
  scale_fill_manual(values = color$tissue) +
  ggtitle("NaCl FACS sort frequency") + xlab("Cluster") + ylab("Cell frequency") + 
  theme(legend.position = "bottom")

tissue_cpg_cluster <- ggplot(so_cpg@meta.data, aes(x = seurat_clusters, fill = tissue)) + 
  geom_bar(stat = "count", position = "fill") + 
  scale_fill_manual(values = color$tissue) +
  ggtitle("CpG FACS sort frequency") + xlab("Cluster") + ylab("Cell frequency") + 
  theme(legend.position = "bottom")

tissue_nacl_cluster + tissue_cpg_cluster + plot_layout(ncol = 2) 

## Sort out cells which overlap FACS sort in low frequncy (N<=2)

clov: cluster overlap   
clpu: cluster pure   
clmi: cluster mixed 

In [None]:
nacl_clov <- dplyr::group_by(so_nacl@meta.data, RNA_snn_res.0.8) %>% count(tissue)

# Get all pure cluster with no FACS mixture 
nacl_clpu <- nacl_clov[!nacl_clov$RNA_snn_res.0.8 %in% nacl_clov$RNA_snn_res.0.8[duplicated(nacl_clov$RNA_snn_res.0.8)], ] 

# Get all mixed cluster
nacl_clmi <- nacl_clov[!nacl_clov$RNA_snn_res.0.8 %in% nacl_clpu$RNA_snn_res.0.8, ] 

# Remove cells with low frequncy mixed cluster from Seurat object 
nacl_clmi_lf <- nacl_clmi[nacl_clmi$n <= 2, ] # Get all low frequency mixed cluster
so_nacl$clmi_lf <- ifelse(paste0(so_nacl$`RNA_snn_res.0.8`, so_nacl$tissue) %in% paste0(nacl_clmi_lf$RNA_snn_res.0.8, nacl_clmi_lf$tissue), TRUE, FALSE)
so_nacl <- subset(so_nacl, subset = clmi_lf == FALSE)

# Get all mixed cluster with high frequency for myeloid dominant and progenitor dominant 
nacl_clmi_hf <- nacl_clmi[!nacl_clmi$RNA_snn_res.0.8 %in%  nacl_clmi_lf$RNA_snn_res.0.8, ]
nacl_clmi_hf <- dplyr::group_by(nacl_clmi_hf, RNA_snn_res.0.8) %>% dplyr::filter(n == min(n))

nacl_clmi_hf_m <- nacl_clmi_hf[nacl_clmi_hf$tissue == "Myeloid", ]
nacl_clmi_hf_p <- nacl_clmi_hf[nacl_clmi_hf$tissue == "Progenitor", ]

In [None]:
cpg_clov <- dplyr::group_by(so_cpg@meta.data, RNA_snn_res.0.8) %>% count(tissue)

# Get all pure cluster with no FACS mixture 
cpg_clpu <- cpg_clov[!cpg_clov$RNA_snn_res.0.8 %in% cpg_clov$RNA_snn_res.0.8[duplicated(cpg_clov$RNA_snn_res.0.8)], ] 

# Get all mixed cluster
cpg_clmi <- cpg_clov[!cpg_clov$RNA_snn_res.0.8 %in% cpg_clpu$RNA_snn_res.0.8, ] 

# Remove cells with low frequncy mixed cluster from Seurat object 
cpg_clmi_lf <- cpg_clmi[cpg_clmi$n <= 2, ] # Get all low frequency mixed cluster
so_cpg$clmi_lf <- ifelse(paste0(so_cpg$`RNA_snn_res.0.8`, so_cpg$tissue) %in% paste0(cpg_clmi_lf$RNA_snn_res.0.8, cpg_clmi_lf$tissue), TRUE, FALSE)
so_cpg <- subset(so_cpg, subset = clmi_lf == FALSE)

# Get all mixed cluster with high frequency for myeloid dominant and progenitor dominant 
cpg_clmi_hf <- cpg_clmi[!cpg_clmi$RNA_snn_res.0.8 %in%  cpg_clmi_lf$RNA_snn_res.0.8, ]
cpg_clmi_hf <- dplyr::group_by(cpg_clmi_hf, RNA_snn_res.0.8) %>% dplyr::filter(n == min(n))

cpg_clmi_hf_m <- cpg_clmi_hf[cpg_clmi_hf$tissue == "Myeloid", ]
cpg_clmi_hf_p <- cpg_clmi_hf[cpg_clmi_hf$tissue == "Progenitor", ]

## Find DEG in cluster with mixed Myeloid / Progenitor population 

In [None]:
find_tissue_markers <- function(so, cluster, low_freq, high_freq, min_cells_group = 3) {
    
    so <- subset(so, subset = RNA_snn_res.0.8 %in% cluster)
    
    # Set idents to cluster tissue combination 
    so$cluster_tissue <- paste0(so$`RNA_snn_res.0.8`, "_", so$tissue)
    Idents(so) <- "cluster_tissue"
    
    # List to store results from looping through cluster
    deg_l <- list()
    
    for(cluster_idx in unique(so$`RNA_snn_res.0.8`)) {
        # Check if enough cells are present in both idents 
        so_cluster_idx <- subset(so, RNA_snn_res.0.8 == cluster_idx)
        sample_size_check <- all(table(so_cluster_idx$tissue) > min_cells_group) & length(unique(so_cluster_idx$tissue)) == 2
        # DEG
        if(sample_size_check) {
            deg <- FindMarkers(so, ident.1 = paste0(cluster_idx, "_", low_freq), ident.2 = paste0(cluster_idx, "_", high_freq), min.cells.group = min_cells_group, only.pos = TRUE, verbose = TRUE)
            deg <- deg[deg$p_val_adj <= 0.05, ]
            # Filter deg list
            if(nrow(deg) > 0) {
                deg <- deg[grepl("^(mt-|Rp(s|l)|Gm\\d)", rownames(deg)) == FALSE, ]
                deg <- deg[!rownames(deg) %in% cc_kowalczyk$gene, ]  
            }
            deg_l[[cluster_idx]] <- deg

        } else {
            deg_l[[cluster_idx]] <- NULL
        }
    }
    
    return(deg_l)
}

# NaCl
deg_nacl_clmi_hf_m <- find_tissue_markers(so_nacl, nacl_clmi_hf_m$RNA_snn_res.0.8, low_freq = "Myeloid", high_freq = "Progenitor")
# deg_nacl_clmi_hf_p <- find_tissue_markers(so_nacl, nacl_clmi_hf_p$RNA_snn_res.0.8, low_freq = "Progenitor", high_freq = "Myeloid")
# CpG
deg_cpg_clmi_hf_m <- find_tissue_markers(so_cpg, cpg_clmi_hf_m$RNA_snn_res.0.8, low_freq = "Myeloid", high_freq = "Progenitor")
deg_cpg_clmi_hf_p <- find_tissue_markers(so_cpg, cpg_clmi_hf_p$RNA_snn_res.0.8, low_freq = "Progenitor", high_freq = "Myeloid")

In [None]:
top_hm <- function(cluster_idx, so, deg, top = 25) {
    
    # Filter deg list
    deg <- deg[[cluster_idx]]
    
    # Get up and downreagulated genes
    deg_up <- deg[sign(deg$avg_log2FC) == 1, ]
    deg_down <- deg[sign(deg$avg_log2FC) == -1, ]

    # Select top hits by adjusted p value
    deg_up <- arrange(deg_up, p_val_adj)[1:top, ]
    deg_down <- arrange(deg_down, p_val_adj)[1:top, ]

    # Combine top hits 
    deg <- rbind(deg_up, deg_down)

    # Extract normalized count matrix for top hits
    so <- subset(so, subset = RNA_snn_res.0.8 == cluster_idx)
    cnt_norm <- GetAssayData(so, assay = "RNA", slot = "data")
    cnt_norm <- cnt_norm[rownames(cnt_norm) %in% rownames(deg), ]

    # Order count matrix by tissue 
    meta <- so@meta.data
    meta <- arrange(meta, tissue)
    cnt_norm <- cnt_norm[, rownames(meta)]
    
    # Compute mean per group
    colnames(cnt_norm) <- meta$tissue
    cnt_norm <- data.frame(
        Myeloid = rowMeans(cnt_norm[, grepl("Myeloid", colnames(cnt_norm), fixed = TRUE)]), 
        Progenitor = rowMeans(cnt_norm[, grepl("Progenitor", colnames(cnt_norm), fixed = TRUE)]))

    # ComplexHeatmap color ramp
    color_range <- max(abs(cnt_norm))
    color_break <- seq(0, color_range, 0.01)
    color_ramp <- viridis(length(color_break), option = "magma")
    
    # Top annotaion 
    annoation_col <- data.frame("FACS" = colnames(cnt_norm))
    rownames(annoation_col) <- colnames(cnt_norm)
    annotation_colors <- list("FACS" = color$tissue)
    
    # Heat map 
    hm <- grid.grabExpr(draw(ComplexHeatmap::pheatmap(
        mat           = as.matrix(cnt_norm),
        main          = paste("Cluster", cluster_idx),
        fontsize_row  =  10,
        scale         = "none",
        cluster_rows  = TRUE,
        cluster_cols  = FALSE,
        cellwidth      = 12, 
        cellheight     = 12, 
        clustering_distance_rows = "euclidean",
        clustering_distance_cols = "euclidean",
        clustering_method        = "complete",
        show_row_dend = FALSE, 
        show_rownames = TRUE,
        show_colnames = FALSE,
        annotation_col = annoation_col, 
        annotation_colors = annotation_colors, 
        color         = color_ramp,
        breaks        = color_break, 
        border_color  = NA, 
        use_raster = TRUE)))
    
    return(hm)
}

# NaCl
hm_nacl_clmi_hf_m <- lapply(names(deg_nacl_clmi_hf_m), top_hm, so = so_nacl, deg = deg_nacl_clmi_hf_m)
# hm_nacl_clmi_hf_p <- lapply(names(deg_nacl_clmi_hf_p), top_hm, so = so_nacl, deg = deg_nacl_clmi_hf_p)
# CpG
hm_cpg_clmi_hf_m <- lapply(names(deg_cpg_clmi_hf_m), top_hm, so = so_cpg, deg = deg_cpg_clmi_hf_m)
hm_cpg_clmi_hf_p <- lapply(names(deg_cpg_clmi_hf_p), top_hm, so = so_cpg, deg = deg_cpg_clmi_hf_p)

## NaCl Myeloid cells mixed in Progenitor dominant cluster

In [None]:
options(repr.plot.width = 15, repr.plot.height = 10)
gridExtra::arrangeGrob(grobs = hm_nacl_clmi_hf_m, nrow = 2) %>% grid::grid.draw()

so_nacl$clmi_hf <- ifelse(paste0(so_nacl$`RNA_snn_res.0.8`, so_nacl$tissue) %in% paste0(c(0, 1, 2, 5, 6, 9, 10, 18, 19), "Myeloid"), TRUE, FALSE)
so_nacl <- subset(so_nacl, subset = clmi_hf == FALSE)

## NaCl Progenitor cells mixed in Myeloid dominant cluster

In [None]:
# options(repr.plot.width = 5, repr.plot.height = 5)
# gridExtra::arrangeGrob(grobs = hm_nacl_clmi_hf_p, nrow = 1) %>% grid::grid.draw()

## CpG Myeloid cells mixed in Progenitor dominant cluster

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)
gridExtra::arrangeGrob(grobs = hm_cpg_clmi_hf_m, nrow = 2) %>% grid::grid.draw()

so_cpg$remove <- ifelse(paste0(so_cpg$`RNA_snn_res.0.8`, so_cpg$tissue) %in% paste0(c(0, 1, 2, 3, 4, 5, 6, 11, 16), "Myeloid"), TRUE, FALSE)
so_cpg <- subset(so_cpg, subset = remove == FALSE)

## Cpg Progenitor cells mixed in Myeloid dominant cluster

In [None]:
options(repr.plot.width = 20, repr.plot.height = 5)
gridExtra::arrangeGrob(grobs = hm_cpg_clmi_hf_p, nrow = 1) %>% grid::grid.draw()

# Remove cells by marker genes 

## B-cell cluster 

Plasmablast: Can proliferate, short half-life, high number of mitochondria, low level of B220  
Plasma cells cannot switch antibody classes, cannot act as antigen-presenting cells because they no longer display MHC-II, and do not take up antigen because they no longer display significant quantities of immunoglobulin on the cell surface

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)

plot <- function(so) {
    dplot_1 <- DimPlot(so, reduction = reduction, group.by = "RNA_snn_res.0.8", label = TRUE) & 
        ggtitle("Cluster") & xlab("UMAP 1") & ylab("UMAP 2") &
        theme(aspect.ratio = 1, legend.position = "none") & 
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    fplot_1 <- FeaturePlot(so, reduction = reduction, features = "Tnfrsf13b") & theme(aspect.ratio = 1) & ggtitle("Plasma cell (Tnfrsf13b)")
    fplot_2 <- FeaturePlot(so, reduction = reduction, features = "Sdc1") & theme(aspect.ratio = 1) & ggtitle("Plasmablast (Sdc1)")
    fplot_3 <- FeaturePlot(so, reduction = reduction, features = "msIgkc_RNA1") & theme(aspect.ratio = 1) & ggtitle("Light chain kappa")
    fplot_4 <- FeaturePlot(so, reduction = reduction, features = "msIglc_RNA1") & theme(aspect.ratio = 1) & ggtitle("Light chain lambda")
    fplot_5 <- FeaturePlot(so, reduction = reduction, features = "msIghm_RNA1") & theme(aspect.ratio = 1) & ggtitle("Ighm")
    fplot_6 <- FeaturePlot(so, reduction = reduction, features = "msIghd_RNA1") & theme(aspect.ratio = 1) & ggtitle("Ighd")
    fplot_7 <- FeaturePlot(so, reduction = reduction, features = "msIghg_RNA1") & theme(aspect.ratio = 1) & ggtitle("Ighg")
    fplot_8 <- FeaturePlot(so, reduction = reduction, features = "msIgha_RNA1") & theme(aspect.ratio = 1) & ggtitle("Igha")
    fplot_9 <- FeaturePlot(so, reduction = reduction, features = "msMHCII_RNA1") & theme(aspect.ratio = 1) & ggtitle("H2 (MHC II)")

    dplot_1 + fplot_1 + fplot_2 + fplot_3 + fplot_4 +  fplot_5 + fplot_6 + fplot_7 + fplot_8 + fplot_9 + plot_layout(ncol = 5) + plot_annotation(title = so$treatment, theme = theme(plot.title = element_text(size = 18)))

}

lapply(so_l, plot)

Plasma cells and plasmablast can be present in the B220- gate which coexpress taci (Tnfrsf13b) and Cd138 (Sdc1) respectively. However, Tnfrsf13b is also expressed by plasmacytoid dendritic cells 

Igkc - Immunoglobulin kappa constant (light chain)  
Iglc - Immunoglobulin lambda constant (light chain)

In [None]:
genes_plasma <- c("Tnfrsf13b")
genes_plasmablast <- c("Sdc1")
genes_iglc <- c("Igkc", "Iglc1", "Iglc2", "Iglc3")
genes_ighm <- c("Ighm")
genes_ighd <- c("Ighd")
genes_ighg <- c("Ighg1", "Ighg2a", "Ighg2b", "Ighg2c", "Ighg3")
genes_igha <- c("Igha")
genes_ighc <- c(genes_ighm, genes_ighd, genes_ighg, genes_igha)

so_cpg$meanPlasma_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_plasma[genes_plasma %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanPlasma_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_plasma[genes_plasma %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanPlasmablst_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_plasmablast[genes_plasmablast %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanPlasmablst_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_plasmablast[genes_plasmablast %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIglc_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_iglc[genes_iglc %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIglc_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_iglc[genes_iglc %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIghc_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_ighc[genes_ighc %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIghc_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_ighc[genes_ighc %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIghm_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_ighm[genes_ighm %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIghm_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_ighm[genes_ighm %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIghd_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_ighd[genes_ighd %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIghd_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_ighd[genes_ighd %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIghg_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_ighg[genes_ighg %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIghg_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_ighg[genes_ighg %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

so_cpg$meanIgha_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_igha[genes_igha %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())
so_nacl$meanIgha_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_igha[genes_igha %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())

## Plasmablast cell Tnfrsf13b expression vs heavy and light constant chain 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 21)


nacl_dens_plasmablast <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, color = tissue)) + geom_density() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]")
cpg_dens_plasmablast <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, color = tissue)) + geom_density() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]")

nacl_dot_plasmablast_iglc <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIglc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Iglc [LogNorm]")
cpg_dot_plasmablast_iglc <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIglc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Iglc [LogNorm]")

nacl_dot_plasmablast_ighc <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Ighc [LogNorm]")
cpg_dot_plasmablast_ighc <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Ighc [LogNorm]")

nacl_dot_plasmablast_ighm <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghm_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Ighm [LogNorm]")
cpg_dot_plasmablast_ighm <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghm_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Ighm [LogNorm]")

nacl_dot_plasmablast_ighd <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghd_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Ighd [LogNorm]")
cpg_dot_plasmablast_ighd <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghd_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Ighd [LogNorm]")

nacl_dot_plasmablast_ighg <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghg_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Ighg [LogNorm]")
cpg_dot_plasmablast_ighg <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIghg_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Ighg [LogNorm]")

nacl_dot_plasmablast_igha <- ggplot(so_nacl@meta.data, aes(x = meanPlasmablst_RNA, y = meanIgha_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasmablast [LogNorm]") + ylab("Igha [LogNorm]")
cpg_dot_plasmablast_igha <- ggplot(so_cpg@meta.data, aes(x = meanPlasmablst_RNA, y = meanIgha_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasmablast [LogNorm]") + ylab("Igha [LogNorm]")

nacl_dens_plasmablast + cpg_dens_plasmablast +
nacl_dot_plasmablast_iglc + cpg_dot_plasmablast_iglc +
nacl_dot_plasmablast_ighm + cpg_dot_plasmablast_ighc +
nacl_dot_plasmablast_ighm + cpg_dot_plasmablast_ighm +
nacl_dot_plasmablast_ighd + cpg_dot_plasmablast_ighd +
nacl_dot_plasmablast_ighg + cpg_dot_plasmablast_ighg +
nacl_dot_plasmablast_igha + cpg_dot_plasmablast_igha +
plot_layout(ncol = 2)

## Plasma cell Tnfrsf13b expression vs heavy and light constant chain 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 21)


nacl_dens_plasma <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, color = tissue)) + geom_density() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]")
cpg_dens_plasma <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, color = tissue)) + geom_density() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]")

nacl_dot_plasma_iglc <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIglc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Iglc [LogNorm]")
cpg_dot_plasma_iglc <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIglc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Iglc [LogNorm]")

nacl_dot_plasma_ighc <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIghc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Ighc [LogNorm]")
cpg_dot_plasma_ighc <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIghc_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Ighc [LogNorm]")

nacl_dot_plasma_ighm <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIghm_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Ighm [LogNorm]")
cpg_dot_plasma_ighm <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIghm_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Ighm [LogNorm]")

nacl_dot_plasma_ighd <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIghd_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Ighd [LogNorm]")
cpg_dot_plasma_ighd <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIghd_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Ighd [LogNorm]")

nacl_dot_plasma_ighg <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIghg_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Ighg [LogNorm]")
cpg_dot_plasma_ighg <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIghg_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Ighg [LogNorm]")
 
nacl_dot_plasma_igha <- ggplot(so_nacl@meta.data, aes(x = meanPlasma_RNA, y = meanIgha_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl") + xlab("Plasma cell [LogNorm]") + ylab("Igha [LogNorm]")
cpg_dot_plasma_igha <- ggplot(so_cpg@meta.data, aes(x = meanPlasma_RNA, y = meanIgha_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG") + xlab("Plasma cell [LogNorm]") + ylab("Igha [LogNorm]")

nacl_dens_plasma + cpg_dens_plasma +
nacl_dot_plasma_iglc + cpg_dot_plasma_iglc +
nacl_dot_plasma_ighm + cpg_dot_plasma_ighc +
nacl_dot_plasma_ighm + cpg_dot_plasma_ighm +
nacl_dot_plasma_ighd + cpg_dot_plasma_ighd +
nacl_dot_plasma_ighg + cpg_dot_plasma_ighg +
nacl_dot_plasma_igha + cpg_dot_plasma_igha +
plot_layout(ncol = 2)

## Remove

In [None]:
table((so_nacl$meanPlasmablst_RNA > 0 | so_nacl$meanPlasma_RNA > 0) & so_nacl$meanIglc_RNA > 0)
table((so_cpg$meanPlasmablst_RNA > 0 | so_cpg$meanPlasma_RNA > 0) & so_cpg$meanIglc_RNA > 0)

so_nacl$remove <- ifelse((so_nacl$meanPlasmablst_RNA > 0 | so_nacl$meanPlasma_RNA > 0) & so_nacl$meanIglc_RNA > 0, TRUE, FALSE)
so_cpg$remove <- ifelse((so_cpg$meanPlasmablst_RNA > 0 | so_cpg$meanPlasma_RNA > 0) & so_cpg$meanIglc_RNA > 0, TRUE, FALSE)

so_cpg <- subset(so_cpg, subset = remove == FALSE)
so_nacl <- subset(so_nacl, subset = remove == FALSE)

## T-Cell cluster 

Trac - T cell receptor alpha constant  
Trbc1 - T cell receptor beta constant 1  
Trbc2 - T cell receptor beta constant 2  
Trdc - T cell receptor delta constant  
Trgc1 - T cell receptor gamma constant 1  
Trgc2 - T cell receptor gamma constant 2  

Cd247 - T-cell surface glycoprotein Cd3 zeta chain (Cd3z)  
Cd3g - T-cell surface glycoprotein Cd3 gamma chain  
Cd3e - T-cell surface glycoprotein Cd3 epsilon chain  
Cd3d - T-cell surface glycoprotein Cd3 delta chain  

In [None]:
options(repr.plot.width = 10, repr.plot.height = 2.5)

genes_cd3 <- c("Cd247", "Cd3g", "Cd3e", "Cd3d")
genes_tcell <- c("Trac", "Trbc1", "Trbc2", "Trdc", "Trgc1", "Trgc2")

so_nacl$meanTcell_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_tcell[genes_tcell %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())
so_cpg$meanTcell_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_tcell[genes_tcell %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())

so_nacl$meanCd3_RNA <- rowMeans(GetAssayData(so_nacl, assay = "RNA", slot = "data")[genes_cd3[genes_cd3 %in% rownames(so_nacl)], , drop = FALSE] %>% as.matrix() %>% t())
so_cpg$meanCd3_RNA <- rowMeans(GetAssayData(so_cpg, assay = "RNA", slot = "data")[genes_cd3[genes_cd3 %in% rownames(so_cpg)], , drop = FALSE] %>% as.matrix() %>% t())

nacl_dens_tcell <- ggplot(so_nacl@meta.data, aes(x = meanTcell_RNA, y = meanCd3_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("NaCl")
cpg_dens_tcell <- ggplot(so_cpg@meta.data, aes(x = meanTcell_RNA, y = meanCd3_RNA, color = tissue)) + geom_point() + scale_color_manual(values = color$tissue) + ggtitle("CpG")

nacl_dens_tcell + cpg_dens_tcell  

table(so_nacl$meanTcell_RNA > 0 | so_nacl$meanCd3_RNA > 0)
table(so_cpg$meanTcell_RNA > 0 | so_cpg$meanCd3_RNA > 0)

so_nacl$remove <- ifelse(so_nacl$meanTcell_RNA > 0 | so_nacl$meanCd3_RNA > 0, TRUE, FALSE)
so_cpg$remove <- ifelse(so_cpg$meanTcell_RNA > 0 | so_cpg$meanCd3_RNA > 0, TRUE, FALSE)

so_nacl <- subset(so_nacl, subset = remove == FALSE)
so_cpg <- subset(so_cpg, subset = remove == FALSE)

## Check Myeloid Hb contamination 
Those might be myeloid cells which phagocytosed 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 2.5)

nacl_hb <- ggplot(so_nacl@meta.data, aes(x = tissue, y = pHb_RNA, color = tissue)) + geom_boxplot() + scale_color_manual(values = color$tissue) + ggtitle("NaCl")
cpg_hb <- ggplot(so_cpg@meta.data, aes(x = tissue, y = pHb_RNA, color = tissue)) + geom_boxplot() + scale_color_manual(values = color$tissue) + ggtitle("CpG")
nacl_hb + cpg_hb + plot_layout(ncol = 2)

table(subset(so_cpg, subset = tissue == "Myeloid")$pHb_RNA > 1)
table(subset(so_nacl, subset = tissue == "Myeloid")$pHb_RNA > 1)

# Remove 
so_cpg$remove <- ifelse(so_cpg$tissue == "Myeloid" & so_cpg$pHb_RNA > 1, TRUE, FALSE)
so_nacl$remove <- ifelse(so_nacl$tissue == "Myeloid" & so_nacl$pHb_RNA > 1, TRUE, FALSE)

so_cpg <- subset(so_cpg, subset = remove == FALSE)
so_nacl <- subset(so_nacl, subset = remove == FALSE)

# Recompute dimensional reduction

In [None]:
so_l <- list(so_nacl, so_cpg)

In [None]:
so_l <- lapply(so_l, FindVariableFeatures, nfeatures = 3000)

In [None]:
# so_l <- lapply(so_l, variable_features_reduce, genes = cc_kowalczyk$gene)

In [None]:
so_l <- lapply(so_l, ScaleData, vars.to.regress = c("nCount_RNA"))

In [None]:
options(warn = -1)
so_l <- lapply(so_l, RunPCA, npcs = 100, verbose = FALSE)
so_l <- lapply(so_l, FindNeighbors, dims = 1:30, verbose = FALSE)
so_l <- lapply(so_l, FindClusters, verbose = FALSE)
so_l <- lapply(so_l, RunUMAP, dims = 1:90, verbose = FALSE)
so_l <- lapply(so_l, RunTSNE, dims = 1:30, verbose = FALSE)

In [None]:
reduction <- "umap"

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)

plot <- function(so) {
    
    dplot_1 <- DimPlot(so, reduction = reduction, group.by = "RNA_snn_res.0.8", label = TRUE) & 
        ggtitle("Cluster") & xlab("UMAP 1") & ylab("UMAP 2") &
        theme(aspect.ratio = 1, legend.position = "none") & 
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
    dplot_2 <- DimPlot(so, reduction = reduction, group.by = "tissue", label = FALSE) & 
        ggtitle("Tissue") & xlab("UMAP 1") & ylab("UMAP 2") &
        scale_color_manual(values = color$tissue, na.value = "dark gray") & 
        theme(aspect.ratio = 1, legend.position = "bottom") & 
        guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))
    
    dplot_3 <- DimPlot(so, reduction = reduction, group.by = "sample_rep", label = FALSE) & 
        ggtitle("Replicate") & xlab("UMAP 1") & ylab("UMAP 2") &
        theme(aspect.ratio = 1, legend.position = "bottom") & 
        guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))

    dplot_4 <- DimPlot(so, reduction = reduction, group.by = "main_labels", label = FALSE) &
        theme(aspect.ratio = 1, legend.position = "bottom") &
        scale_color_manual(values = color$main_labels, na.value = "dark gray") &
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))

    dplot_5 <- DimPlot(so, reduction = reduction, group.by = "fine_labels", label = FALSE) &
        theme(aspect.ratio = 1, legend.position = "bottom") &
        scale_color_manual(values = color$fine_labels, na.value = "dark gray") &
        guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
    dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + plot_layout(ncol = 5) 
    
}

lapply(so_l, plot)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

tissue_nacl_cluster <- ggplot(so_l[[2]]@meta.data, aes(x = seurat_clusters, fill = tissue)) + 
  geom_bar(stat = "count", position = "fill") + 
  scale_fill_manual(values = color$tissue) +
  ggtitle("NaCl FACS sort frequency") + xlab("Cluster") + ylab("Cell frequency") + 
  theme(legend.position = "bottom")

tissue_cpg_cluster <- ggplot(so_l[[1]]@meta.data, aes(x = seurat_clusters, fill = tissue)) + 
  geom_bar(stat = "count", position = "fill") + 
  scale_fill_manual(values = color$tissue) +
  ggtitle("CpG FACS sort frequency") + xlab("Cluster") + ylab("Cell frequency") + 
  theme(legend.position = "bottom")

tissue_nacl_cluster + tissue_cpg_cluster + plot_layout(ncol = 2) 

# Compute on merged data 

In [None]:
so <- merge(so_l[[1]], so_l[[2]])

In [None]:
so <- NormalizeData(so )

In [None]:
so <- FindVariableFeatures(so, nfeatures = 3000)

In [None]:
# variable_features <- VariableFeatures(so)
# VariableFeatures(so) <- variable_features[!variable_features %in% c(cc_kowalczyk$gene)]

In [None]:
so <- ScaleData(so, vars.to.regress = c("nCount_RNA"))

In [None]:
options(warn = -1)
so <- RunPCA(so, npcs = 100, verbose = FALSE)
so <- FindNeighbors(so, dims = 1:30, verbose = FALSE)
so <- FindClusters(so, verbose = FALSE)
so <- RunUMAP(so, dims = 1:90, verbose = FALSE)
so <- RunTSNE(so, dims = 1:30, verbose = FALSE)

In [None]:
reduction <- "umap"

In [None]:
options(repr.plot.width = 25, repr.plot.height = 10)

dplot_1 <- DimPlot(so, reduction = reduction, group.by = "RNA_snn_res.0.8", label = TRUE) & 
    ggtitle("Cluster") & xlab("UMAP 1") & ylab("UMAP 2") &
    theme(aspect.ratio = 1, legend.position = "none") & 
    guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
dplot_2 <- DimPlot(so, reduction = reduction, group.by = "tissue", label = FALSE) & 
    ggtitle("Tissue") & xlab("UMAP 1") & ylab("UMAP 2") &
    scale_color_manual(values = color$tissue, na.value = "dark gray") & 
    theme(aspect.ratio = 1, legend.position = "bottom") & 
    guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))
    
dplot_3 <- DimPlot(so, reduction = reduction, group.by = "sample_rep", label = FALSE) & 
    ggtitle("Replicate") & xlab("UMAP 1") & ylab("UMAP 2") &
    theme(aspect.ratio = 1, legend.position = "bottom") & 
    guides(color = guide_legend(ncol = 1, override.aes = list(size = 2)))

dplot_4 <- DimPlot(so, reduction = reduction, group.by = "main_labels", label = FALSE) &
    theme(aspect.ratio = 1, legend.position = "bottom") &
    scale_color_manual(values = color$main_labels, na.value = "dark gray") &
    guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))

dplot_5 <- DimPlot(so, reduction = reduction, group.by = "fine_labels", label = FALSE) &
    theme(aspect.ratio = 1, legend.position = "bottom") &
    scale_color_manual(values = color$fine_labels, na.value = "dark gray") &
    guides(color = guide_legend(ncol = 3, override.aes = list(size = 2)))
    
dplot_1 + dplot_2 + dplot_3 + dplot_4 + dplot_5 + plot_layout(ncol = 5) 

# Save output 

In [None]:
saveRDS(so, so_out_file)

# Session info

In [None]:
sessionInfo()