# Quality control

In [1]:
library_load <- suppressMessages(
    
    suppressWarnings(
    
        list(

            # Seurat 
            library(Seurat), 


            # Data 
            library(tidyverse), 

            # Plotting 
            library(ggplot2), 
            library(patchwork), 

            # Pyhton compatibility
            library(reticulate)

        )
    )
)

In [2]:
# Configure reticulate 
use_condaenv(condaenv='p.3.9.19-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

python:         /nobackup/peer/fdeckert/miniconda3/envs/p.3.9.19-FD20200109SPLENO/bin/python
libpython:      /nobackup/peer/fdeckert/miniconda3/envs/p.3.9.19-FD20200109SPLENO/lib/libpython3.9.so
pythonhome:     /nobackup/peer/fdeckert/miniconda3/envs/p.3.9.19-FD20200109SPLENO:/nobackup/peer/fdeckert/miniconda3/envs/p.3.9.19-FD20200109SPLENO
version:        3.9.19 | packaged by conda-forge | (main, Mar 20 2024, 12:50:21)  [GCC 12.3.0]
numpy:          /nobackup/peer/fdeckert/miniconda3/envs/p.3.9.19-FD20200109SPLENO/lib/python3.9/site-packages/numpy
numpy_version:  1.26.4

NOTE: Python version was forced by use_python() function

In [3]:
options(warn=-1)

In [4]:
random_seed <- 42
set.seed(random_seed)

In [5]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [6]:
# Source files
source("plotting_global.R")
source("bin/so_pl.R")

# Parameter settings

In [7]:
# Files 
raw_rds_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/raw.rds"
qc_rds_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/qc.rds"
qc_h5ad_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/qc.h5ad"

# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Import Seurat object

In [8]:
so <- readRDS(raw_rds_file)

# Log-normalize

In [9]:
so <- NormalizeData(so, normalization.method="LogNormalize", verbose=FALSE)

# Cellcycle 

In [10]:
cellcycle <- read.csv("result/cellcycle/cellcycle.csv", row.names=1)
so <- AddMetaData(so, cellcycle)

# Annotation with SingleR

In [11]:
singler <- read.csv("result/singler/singler.csv", row.names=1)
so <- AddMetaData(so, singler)

# DoubletFinder 

In [None]:
doubletfinder <- read.csv("result/solo/solo.csv", row.names=1)
so <- AddMetaData(so, doubletfinder)

# Set Seurat object QC class

In [None]:
qc_class_set <- function(so) {
    
    # Set QC thresholds
    so$nCount_RNA_min <- 1000
    so$nCount_RNA_max <- max(so$nCount_RNA)
    
    so$nFeature_RNA_min <- 600
    so$nFeature_RNA_max <- max(so$nFeature_RNA)
    
    so$pMt_RNA_min <- 0
    so$pMt_RNA_max <- 7.5

    so$qc_class <- ifelse(
        
        so$nCount_RNA > so$nCount_RNA_min &
        so$nCount_RNA <= so$nCount_RNA_max & 
        so$nFeature_RNA > so$nFeature_RNA_min &
        so$nFeature_RNA <= so$nFeature_RNA_max & 
        so$pMt_RNA > so$pMt_RNA_min & 
        so$pMt_RNA <= so$pMt_RNA_max &
        ifelse(so$facs=="Myeloid" & so$pHb_RNA >= 7.5, FALSE, TRUE) & 
        so$solo_label=="singlet", 
        "pass", "fail"
    
    )

    return(so)

}

In [None]:
# Set quality class 
so <- Seurat::SplitObject(so, split.by="sample_name")
so <- lapply(so, qc_class_set)

In [None]:
# Merge Seurat objects 
so <- merge(x=so[[1]], y=so[2:length(so)])

In [None]:
# Merge layers 
so <- JoinLayers(object=so, layers="counts")

# UMI and Feature count 

## Density plot

In [None]:
density_plot_qc_1 <- density_plot_qc(so=so, title="Density plot UMI count", x=nCount_RNA, xlab="log10(UMI count)", min=nCount_RNA_min, max=nCount_RNA_max, formular=~sample_name, nrow=2)
density_plot_qc_2 <- density_plot_qc(so=so, title="Density plot Feature count", x=nFeature_RNA, xlab="log10(Feature count)", min=nFeature_RNA_min, max=nFeature_RNA_max, formular=~sample_name, nrow=2)
density_plot_qc_3 <- density_plot_qc(so=so, title="Density plot Mt %", x=pMt_RNA, xlab="Mt [%]", min=0, max=pMt_RNA_max, xlim=c(0,25), log10=FALSE, formular=~sample_name, nrow=2)

In [None]:
options(repr.plot.width=20, repr.plot.height=3*10)
density_plot_qc_1 + density_plot_qc_2 + density_plot_qc_3 + plot_layout(ncol=1) & theme(legend.position="none")

## Scattern plot

In [None]:
scattern_plot_qc_1 <- scattern_plot_qc(so=so, title="Mitochondrial gene percentage", fill=pMt_RNA, formular=~sample_name, nrow=2)
scattern_plot_qc_2 <- scattern_plot_qc(so=so, title="Hemoglobin gene percentage", fill=pHb_RNA, formular=~sample_name, nrow=2)
scattern_plot_qc_3 <- scattern_plot_qc(so=so, title="Ribsonmal gene percentage", fill=pRb_RNA, formular=~sample_name, nrow=2)
scattern_plot_qc_4 <- scattern_plot_qc(so=so, title="Doublet class", fill=solo_label, formular=~sample_name, nrow=2) + scale_color_manual(values=c("doublet"="#132B43", "singlet"="#56B1F7"))
scattern_plot_qc_5 <- scattern_plot_qc(so=so, title="QC class", fill=qc_class, formular=~sample_name, nrow=2) + scale_color_manual(values=c("fail"="#132B43", "pass"="#56B1F7"))

In [None]:
options(repr.plot.width=20, repr.plot.height=6*10)
scattern_plot_qc_1 + scattern_plot_qc_2 + scattern_plot_qc_3 + scattern_plot_qc_4 + scattern_plot_qc_5 + plot_layout(ncol=1) & theme(legend.position="bottom")

## Box plots 

In [None]:
box_plot_qc_1 <- box_plot_qc(so=so, y=nCount_RNA, fill=sample_name, ylab="UMI [count]", ymin=0, formular=~sample_name)
box_plot_qc_2 <- box_plot_qc(so=so, y=nFeature_RNA, fill=sample_name, ylab="Feature [count]", ymin=0, formular=~sample_name)
box_plot_qc_3 <- box_plot_qc(so=so, y=pMt_RNA, fill=sample_name, ylab="Mt [%]", ymin=0, ymax=100, formular=~sample_name)
box_plot_qc_4 <- box_plot_qc(so=so, y=pHb_RNA, fill=sample_name, ylab="Hb [%]", ymin=0, ymax=100, formular=~sample_name)
box_plot_qc_5 <- box_plot_qc(so=so, y=pRb_RNA, fill=sample_name, ylab="Rb [%]", ymin=0, ymax=100, formular=~sample_name)

In [None]:
options(repr.plot.width=20, repr.plot.height=5*9)
box_plot_qc_1[[1]] + box_plot_qc_1[[2]] + box_plot_qc_2[[1]]  + box_plot_qc_2[[2]] + 
box_plot_qc_3[[1]] + box_plot_qc_3[[2]] + box_plot_qc_4[[1]]  + box_plot_qc_4[[2]] + 
box_plot_qc_5[[1]] + box_plot_qc_5[[2]] + plot_layout(ncol=1)

# Filter cells by QC class

In [None]:
so <- subset(so, subset=qc_class=="pass")

# Filter genes 

In [None]:
cnt <- GetAssayData(so, assay="RNA", slot="counts")
cnt <- cnt[rowSums(cnt>=1)>=1, ]

In [None]:
so <- CreateSeuratObject(counts=cnt, meta.data=so@meta.data, project="FD20200109SPLENO")

# Save results

In [None]:
# Store as rds
saveRDS(so, qc_rds_file) 

In [None]:
# Store data as h5ad 
adata <- import("anndata", as="adata", convert=FALSE)
pd <- import("pandas", as="pd", convert=FALSE)
np <- import("numpy", as="np", convert=FALSE)
    
# Transform dgCMatrix to sparse sc_sparse matrix
X <- GetAssayData(so, assay="RNA", slot="counts")    
X <- adata$AnnData(X=X)$X$T

adata <- adata$AnnData(X=X, obs=so@meta.data)
adata$var_names <- rownames(GetAssayData(so, assay="RNA", slot="counts"))

adata$raw <- adata
adata$write_h5ad(qc_h5ad_file)

# Session info

In [None]:
sessionInfo()