# BSF cell ranger output to Seurat Object
Compute seurat objects with bsf meta data for faster loading during downstream analysis. 

In [None]:
library_load <- suppressMessages(
    list(
        # Seurat 
        library(Seurat),
        # Data 
        library(dplyr),
        # Plot 
        library(ggplot2)
    )
)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

# Parameter settings

In [None]:
# Seurat parameters 
min_cells    <- 5 # Filter out features which are only present in less than min_cells cells - default 3
min_features <- 1 # Filter out cells which have fewer than min_features features - default 200

# Directories
bsf_dir <- "data/BSA_0355_SM01_10x_SPLENO/"

# Files
so_file <- "data/object/raw.rds"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Info 
Single cell 3' v3 10x project BSA_0355_SM01_10x_SPLENO with libraries T0030_1_2_3_4_L3914, T0030_5_6_7_8_L3915. CellRanger 3.0.2 with mm10-2020-A 3.0.0 transcriptome. 

# Create meta file 

In [None]:
meta <- data.frame(
  
    sample_path = list.files(paste0(bsf_dir, "OUT/COUNT"), full.names = TRUE), 
    sample_dir = list.files(paste0(bsf_dir, "OUT/COUNT"), full.names = FALSE), 
    tissue = rep(c("Myeloid", "Progenitor"), 4), 
    treatment = rep(c("CpG", "NaCl"), each = 4), 
    sample_rep = rep(c("Rep1", "Rep1", "Rep2", "Rep2"), 2), 
    sample_name = paste0(rep(c("Myeloid", "Progenitor"), 4), "_", rep(c("CpG", "NaCl"), each = 4), "_", rep(c("Rep1", "Rep1", "Rep2", "Rep2"), 2))
  
)

# Import count matrix as Seurat object

In [None]:
import_cnt <- function(sample_meta, cnt_type){
  
  print(paste("import:", paste0(sample_meta$sample_path, "/", cnt_type)))
  
  # Read 10x data in as sparse matrix (sm)
  sm_10x <- Read10X(data.dir = paste0(sample_meta$sample_path, "/", cnt_type))
  
  # Transform sparse matrix to seurat object (so)
  so_10x <- CreateSeuratObject(counts = sm_10x, min.cells = min_cells, min.features = min_features)
  
  # Add meta data 
  so_10x$sample_name <- sample_meta$sample_name
  so_10x$sample_rep <- sample_meta$sample_rep
  so_10x$tissue <- sample_meta$tissue
  so_10x$treatment <- sample_meta$treatment
  so_10x$sample_group <- paste0(so_10x$tissue, "_", so_10x$treatment)
  so_10x$sample_path <- sample_meta$sample_path
  so_10x$sample_dir <- sample_meta$sample_dir
  
  so_10x$orig.ident <- NULL
  so_10x <- SetIdent(so_10x, value = "sample_name")
  
  return(so_10x)
  
}

so <- lapply(split(meta, f = meta$sample_path), import_cnt, cnt_type = "filtered_feature_bc_matrix")
so_raw <- lapply(split(meta, f = meta$sample_path), import_cnt, cnt_type = "raw_feature_bc_matrix")

# Merge Seurat objects 
so <- merge(x = so[[1]], y = so[2:length(so)])
so_raw <- merge(x = so_raw[[1]], y = so_raw[2:length(so_raw)])

# Write meta csv 
Update sample meta with cell_id idx from merge

In [None]:
meta <- dplyr::select(so@meta.data, -nCount_RNA, -nFeature_RNA)

library(stringr)
meta$cell_id_idx <- str_split_fixed(rownames(meta), "_", 2)[, 2]
rownames(meta) <- NULL
meta <- unique(meta)

write.csv(meta, file = "meta.csv", row.names = FALSE)

# Annotate cellRanger classification and barcode rank 

In [None]:
so_raw$cellranger_class <- ifelse(colnames(so_raw) %in% colnames(so), "Cell", "Background")

## Barcode rank for UMI and Feature count
nCount_RNA_rank <- so_raw@meta.data %>%
  mutate(cell_id = rownames(.)) %>% 
  arrange(sample_name, desc(nCount_RNA)) %>%
  group_by(sample_name) %>%
  mutate(nCount_RNA_rank = row_number()) %>%
  ungroup() %>% data.frame() %>% dplyr::select(cell_id, nCount_RNA_rank)
rownames(nCount_RNA_rank) <- nCount_RNA_rank$cell_id

so_raw <- AddMetaData(so_raw, nCount_RNA_rank)

# Log normalize data 

In [None]:
so_raw <- NormalizeData(
    object               = so_raw, 
    assay                = "RNA", 
    normalization.method = "LogNormalize", 
    scale.factor         = 10000
    )

# Compute feature content

In [None]:
# Compute MT content 
mt_genes <- rownames(so_raw)[grep("^mt-", rownames(so_raw))]
so_raw[["pMt_RNA"]] <- PercentageFeatureSet(so_raw, features = mt_genes)

# Compute Hemoglobin content 
hb_genes <- rownames(so_raw)[grep("Hba-|Hbb-|Hbq1b|Hbq1a", rownames(so_raw))]
so_raw[["pHb_RNA"]] <- PercentageFeatureSet(so_raw, features = hb_genes)

# Compute Ribosomal content
rb_genes <- rownames(so_raw)[grep("^Rpl|^Rps", rownames(so_raw))]
so_raw[["pRp_RNA"]] <- PercentageFeatureSet(so_raw, features = rb_genes)

# Save output file

In [None]:
saveRDS(so_raw, file = so_file)

# Session info 

In [None]:
sessionInfo()