# QC and filter seurat object

In [1]:
library(Seurat)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(patchwork)

Attaching SeuratObject


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [None]:
# Set working directory to project root
setwd(gsub("/script/seurat", "", getwd()))

In [None]:
# Source files
source("plotting_global.R")

# Parameter settings

In [None]:
# Filtering Parameter
nFeature_RNA_min_m <- 250
nFeature_RNA_min_p <- 500

nFeature_RNA_max_m <- 4000
nFeature_RNA_max_p <- 4000

nCount_RNA_min_m <- 1000
nCount_RNA_min_p <- 1500

nCount_RNA_max_m <- 20000
nCount_RNA_max_p <- 20000

pMt_RNA_max_m <- 5
pMt_RNA_max_p <- 5

# Files 
so_raw_file <- "data/seurat_object/so_raw.rds"
so_qc_file <- "data/seurat_object/so_qc.rds"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Import Seurat object

In [None]:
so_raw <- readRDS(so_raw_file)

# Add filter meta data to Seurat object 

In [None]:
so_raw$nFeature_RNA_max <- ifelse(so_raw$tissue == "Myeloid", nFeature_RNA_max_m, nFeature_RNA_max_p)
so_raw$nFeature_RNA_min <- ifelse(so_raw$tissue == "Myeloid", nFeature_RNA_min_m, nFeature_RNA_min_p)

so_raw$nCount_RNA_max <- ifelse(so_raw$tissue == "Myeloid", nCount_RNA_max_m, nCount_RNA_max_p)
so_raw$nCount_RNA_min <- ifelse(so_raw$tissue == "Myeloid", nCount_RNA_min_m, nCount_RNA_min_p)

so_raw$pMt_RNA_max <- ifelse(so_raw$tissue == "Myeloid", pMt_RNA_max_m, pMt_RNA_max_p)

# QC 
so_raw$qc_class <- ifelse(
  so_raw$cellranger_class == "Cell" &
    so_raw$nFeature_RNA <= so_raw$nFeature_RNA_max & 
    so_raw$nFeature_RNA >= so_raw$nFeature_RNA_min & 
    so_raw$nCount_RNA <= so_raw$nCount_RNA_max & 
    so_raw$nCount_RNA >= so_raw$nCount_RNA_min &
    so_raw$pMt_RNA <= so_raw$pMt_RNA_max, 
  "pass", "fail"
  )

# Filter cells by rank plot

Empty droplets were determined with CellRanger V3.0.2 Lun et al., 2019 EmptyDrop heuristic. RNAse activity of granulocytes might be wrongly identified as empty cells by CellRanger.

**Typical Sample** A steep drop-off is indicative of good separation between the cell-associated barcodes and the barcodes associated with empty GEMs. A ideal barcode rank plot has a distincitve shape, which is referred to as a "cliff and knee".

**Heterogeneous Sample** Heterogeneous populations of cells in a sample result in two "cliff and knee" distributions. However, there should still be clear separation between the bacodes. 

**Compromised Sample** Round curve and lack of steep cliff may indicate low sample quality or loss of single-cell behavior. This can be due to a wetting failure, premature cell lysis, or low cell viability. 

**Compromised Sample** Defined cliff and knee, but the total number of barcodes detected may be lower than expected. This can be caused by a sample clog or inaccurate cell count. 

In [None]:
rank_plot <- ggplot(so_raw@meta.data, aes(x = log10(nCount_RNA_rank), y = log10(nCount_RNA), color = cellranger_class)) + 
  geom_point() + 
  geom_hline(aes(yintercept = log10(nCount_RNA_min)), color = "red", linetype = "longdash") +
  scale_color_manual(values = so_color$cellranger_class) +
  ggtitle("Barcode rank plot") +
  xlab("log10(cell barcode rank)") + ylab("log10(cell UMI counts)") + 
  facet_grid(tissue~treatment) + 
  theme(aspect.ratio = 1, legend.position = "bottom")

rank_plot
ggsave(rank_plot, filename = "result/plot/seurat/rank_plot.png", width = 4, height = 4)

# Filter by cell_ranger class

In [None]:
so_qc <- subset(so_raw, subset = cellranger_class == "Cell")

In [None]:
# Filter by UMI and Feature count 

In [None]:
qc_1 <- ggplot(so_qc@meta.data, aes(x = log10(nCount_RNA), fill = tissue)) + 
  geom_density() + 
  ggtitle("Density plot UMI count") + xlab("log10(UMI count)") + ylab("Density") +
  geom_vline(aes(xintercept = log10(nCount_RNA_min)), color = "red", linetype = "longdash") +
  scale_x_continuous(breaks = integer_breaks()) + 
  scale_fill_manual(values = so_color$tissue) +
  facet_grid(tissue~treatment) + 
  theme(legend.position = "bottom", aspect.ratio = 1) 

qc_2 <- ggplot(so_qc@meta.data, aes(x = log10(nFeature_RNA), fill = tissue)) + 
  geom_density() + 
  ggtitle("Density plot Feature count") + xlab("log10(Feature count)") + ylab("Density") +
  geom_vline(aes(xintercept = log10(nFeature_RNA_min)), color = "red", linetype = "longdash") +
  scale_x_continuous(breaks = integer_breaks()) + 
  scale_fill_manual(values = so_color$tissue) +
  facet_grid(tissue~treatment) + 
  theme(legend.position = "bottom", aspect.ratio = 1)

qc_3 <- ggplot(so_qc@meta.data, aes(x = pMt_RNA, fill = tissue)) + 
  geom_density() + 
  ggtitle("Density plot Mt %") + xlab("Mt [%]") + ylab("Density") +
  geom_vline(aes(xintercept = pMt_RNA_max), color = "red", linetype = "longdash") +
  scale_x_continuous(breaks = integer_breaks()) +
  xlim(0, 20) +
  scale_fill_manual(values = so_color$tissue) +
  facet_grid(tissue~treatment)  + 
  theme(legend.position = "bottom", aspect.ratio = 1)

qc_1
qc_2
qc_3

ggsave(qc_1, filename = "result/plot/seurat/density_umi.png", width = 4, height = 4)
ggsave(qc_2, filename = "result/plot/seurat/density_feature.png", width = 4, height = 4)
ggsave(qc_3, filename = "result/plot/seurat/density_mt.png", width = 4, height = 4)


In [None]:
sc_1 <- ggplot(so_qc@meta.data, aes(x = log10(nCount_RNA), y = log10(nFeature_RNA), color = pMt_RNA)) + 
  geom_point() + ggtitle("Mitochondrial gene percentage") + ylab("log10(feature count)") + xlab("log10(umi count)") + 
  geom_vline(aes(xintercept = log10(nCount_RNA_min)), color = "red", linetype = "longdash") +
  #geom_vline(aes(xintercept = log10(nCount_RNA_max)), color = "red", linetype = "longdash") +
  geom_hline(aes(yintercept = log10(nFeature_RNA_min)), color = "red", linetype = "longdash") + 
  geom_hline(aes(yintercept = log10(nFeature_RNA_max)), color = "red", linetype = "longdash") + 
  facet_grid(tissue~treatment) + theme(aspect.ratio = 1, legend.position = "bottom") + 
  scale_size(guide = guide_legend(direction = "vertical"))

sc_2 <- ggplot(so_qc@meta.data, aes(x = log10(nCount_RNA), y = log10(nFeature_RNA), color = pHb_RNA)) + 
  geom_point() + ggtitle("Hemoglobin gene percentage") + ylab("log10(feature count)") + xlab("log10(umi count)") +
  geom_vline(aes(xintercept = log10(nCount_RNA_min)), color = "red", linetype = "longdash") +
  #geom_vline(aes(xintercept = log10(nCount_RNA_max)), color = "red", linetype = "longdash") +
  geom_hline(aes(yintercept = log10(nFeature_RNA_min)), color = "red", linetype = "longdash") + 
  geom_hline(aes(yintercept = log10(nFeature_RNA_max)), color = "red", linetype = "longdash") + 
  facet_grid(tissue~treatment) + theme(aspect.ratio = 1, legend.position = "bottom") + 
  scale_size(guide = guide_legend(direction = "vertical"))

sc_3 <- ggplot(so_qc@meta.data, aes(x = log10(nCount_RNA), y = log10(nFeature_RNA), color = pRpl_RNA)) + 
  geom_point() + ggtitle("Ribsonmal gene percentage") + ylab("log10(feature count)") + xlab("log10(umi count)") +
  geom_vline(aes(xintercept = log10(nCount_RNA_min)), color = "red", linetype = "longdash") +
  #geom_vline(aes(xintercept = log10(nCount_RNA_max)), color = "red", linetype = "longdash") +
  geom_hline(aes(yintercept = log10(nFeature_RNA_min)), color = "red", linetype = "longdash") + 
  geom_hline(aes(yintercept = log10(nFeature_RNA_max)), color = "red", linetype = "longdash") + 
  facet_grid(tissue~treatment) + theme(aspect.ratio = 1, legend.position = "bottom") + 
  scale_size(guide = guide_legend(direction = "vertical"))

sc_1
sc_2
sc_3

ggsave(sc_1, filename = "result/plot/seurat/sc_mt.png", width = 4, height = 4)
ggsave(sc_2, filename = "result/plot/seurat/sc_hg.png", width = 4, height = 4)
ggsave(sc_3, filename = "result/plot/seurat/sc_rb.png", width = 4, height = 4)

In [None]:
so_qc <- subset(so_qc, subset =  nFeature_RNA >= nFeature_RNA_min & nFeature_RNA <= nFeature_RNA_max & nCount_RNA <= nCount_RNA_max & pMt_RNA <= pMt_RNA_max)

# Violine plots 

In [None]:
qc_vln_FUN <- function(data, y, fill, ylab = "", scale_y_log10, ymin, ymax) {
  
  vln_plot_1 <- ggplot(data, aes(x = treatment, y = {{y}}, color = {{fill}})) + 
    geom_jitter(alpha = 0.2, shape = 16, color = "gray") + 
    geom_boxplot(alpha = 1.0) + xlab("") + ylim(ymin, ymax) +
    scale_color_manual(values = so_color$tissue) + 
    ggtitle("Cell containing GEM") + 
    facet_wrap(~tissue, scales = "free_x") + ylab(ylab) +
    theme(
      plot.title = element_text(size = 12, face = "bold", margin = margin(t = 0, r = 0, b = 5, l = 0)), 
      axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), 
      strip.text = element_blank()
      )
  
  vln_plot_2 <- ggplot(data[data$qc_class == "pass", ], aes(x = treatment, y = {{y}}, color = {{fill}})) +
    geom_jitter(alpha = 0.2, shape = 16, color = "gray") +
    geom_boxplot(alpha = 1.0) + xlab("") + ylim(ymin, ymax) +
    ylim(ymin, ymax) +
    scale_color_manual(values = so_color$tissue) +
    ggtitle("Filtered") +
    facet_wrap(~tissue, scales = "free_x") + ylab(ylab) + 
    theme(
      plot.title = element_text(size = 12, face = "bold", margin = margin(t = 0, r = 0, b = 5, l = 0)), 
      axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), 
      strip.text = element_blank()
      )
  
  if(scale_y_log10) {
    
    vln_plot_1 <- vln_plot_1 + scale_y_log10(limits = c(ymin, NA))
    vln_plot_2 <- vln_plot_2 + scale_y_log10(limits = c(ymin, NA))
    
    }

  vln_plot <- vln_plot_1 + vln_plot_2 + plot_layout(ncol = 2, guides = "collect") & theme(legend.position = "bottom")
  
  plot(vln_plot)

  return(vln_plot)
  
}

so_cell <- subset(so_raw, subset = cellranger_class == "Cell")

qc_vln_1 <- qc_vln_FUN(so_cell@meta.data, nCount_RNA, tissue, "UMI [count]", FALSE, ymin = 0, ymax = max(so_cell$nCount_RNA))
qc_vln_2 <- qc_vln_FUN(so_cell@meta.data, nFeature_RNA, tissue, "Feature [count]", FALSE, ymin = 0, ymax = max(so_cell$nFeature_RNA))
qc_vln_3 <- qc_vln_FUN(so_cell@meta.data, pMt_RNA, tissue, "Mt [%]", FALSE, ymin = 0, ymax = 100)
qc_vln_4 <- qc_vln_FUN(so_cell@meta.data, pHb_RNA, tissue, "Hb [%]", FALSE, ymin = 0, ymax = 100)
qc_vln_5 <- qc_vln_FUN(so_cell@meta.data, pRpl_RNA, tissue, "Rbl [%]", FALSE, ymin = 0, ymax = 100)

qc_vln_1
qc_vln_2
qc_vln_3
qc_vln_4
qc_vln_5

ggsave(qc_vln_1, filename = "result/plot/seurat/qc_vln_1.png", width = 5, height = 2.5)
ggsave(qc_vln_2, filename = "result/plot/seurat/qc_vln_2.png", width = 5, height = 2.5)
ggsave(qc_vln_3, filename = "result/plot/seurat/qc_vln_3.png", width = 5, height = 2.5)
ggsave(qc_vln_4, filename = "result/plot/seurat/qc_vln_4.png", width = 5, height = 2.5)
ggsave(qc_vln_5, filename = "result/plot/seurat/qc_vln_5.png", width = 5, height = 2.5)

rm(so_cell)