# BSF cell ranger output to Seurat Object  

Single cell 3' v3 10x project BSA_0355_SM01_10x_SPLENO with libraries T0030_1_2_3_4_L3914, T0030_5_6_7_8_L3915. CellRanger 3.0.2 with mm10-2020-A 3.0.0 transcriptome. 

In [None]:
library_load <- suppressMessages(
    
    suppressWarnings(
    
        list(
            
            # Seurat 
            library(Seurat),
            
            # Data 
            library(dplyr),
            library(stringr), 
            
            # Plot 
            library(ggplot2), 
            
            # Pyhton compatibility
            library(reticulate)
            
        )
    )
)

In [None]:
packageVersion("Seurat")

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.9.19-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
options(warn=-1)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

# Parameter settings

In [None]:
# Seurat parameters 
min_cells    <- 0 # Filter out features which are only present in less than min_cells cells - default 3
min_features <- 0 # Filter out cells which have fewer than min_features features - default 200

# Directories
bsf_dir <- "data/BSA_0355_SM01_10x_SPLENO/"

# Files
so_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/raw.rds"
h5ad_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/raw.h5ad"

# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Create meta data 

In [None]:
meta <- data.frame(
  
    sample_path=list.files(paste0(bsf_dir, "OUT/COUNT"), full.names=TRUE), 
    sample_dir=list.files(paste0(bsf_dir, "OUT/COUNT"), full.names=FALSE), 
    facs=rep(c("Myeloid", "Progenitor"), 4), 
    infection=rep(c("CpG", "Baseline"), each=4), 
    dpi=rep(c("D6", "D0"), each=4), 
    sample_rep=rep(c("Rep1", "Rep1", "Rep2", "Rep2"), 2), 
    sample_name=paste0(rep(c("Myeloid", "Progenitor"), 4), "_", rep(c("CpG", "NaCl"), each=4), "_", rep(c("Rep1", "Rep1", "Rep2", "Rep2"), 2))
  
)

# Import count matrix as Seurat object

In [None]:
import_cnt <- function(sample_meta, cnt_type){
  
    print(paste("import:", paste0(sample_meta$sample_path, "/", cnt_type)))

    # Read 10x data in as sparse matrix (sm)
    sm_10x <- Read10X(data.dir=paste0(sample_meta$sample_path, "/", cnt_type))

    # Transform sparse matrix to seurat object (so)
    so_10x <- CreateSeuratObject(counts=sm_10x, min.cells=min_cells, min.features=min_features)

    # Add meta data 
    so_10x$sample_name <- sample_meta$sample_name
    so_10x$sample_rep <- sample_meta$sample_rep
    so_10x$facs <- sample_meta$facs
    so_10x$infection <- sample_meta$infection
    so_10x$dpi <- sample_meta$dpi
    so_10x$sample_group <- paste0(so_10x$infection, "_", so_10x$sample_rep)
    so_10x$sample_path <- sample_meta$sample_path
    so_10x$sample_dir <- sample_meta$sample_dir

    so_10x$orig.ident <- NULL
    so_10x <- SetIdent(so_10x, value="sample_name")

    return(so_10x)
  
}

so <- lapply(split(meta, f=meta$sample_path), import_cnt, cnt_type="filtered_feature_bc_matrix")

In [None]:
# Merge Seurat objects 
so <- merge(x=so[[1]], y=so[2:length(so)])

In [None]:
# Merge layers 
so <- JoinLayers(object=so, layers="counts")

In [None]:
# Set cell_id 
so$cell_id <- colnames(so)

# Save meta data with cell id idx

In [None]:
meta <- dplyr::select(so@meta.data, -nCount_RNA, -nFeature_RNA, -cell_id)
meta$cell_id_idx <- str_split_fixed(rownames(meta), "_", 2)[, 2]
rownames(meta) <- NULL
meta <- dplyr::distinct(meta)

In [None]:
write.csv(meta, "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/meta.csv")

# Log normalize data 

In [None]:
so <- NormalizeData(
    
    object=so, 
    assay="RNA", 
    normalization.method="LogNormalize", 
    scale.factor=10000
    
)

# Feature percentage 

In [None]:
# Compute MT content 
mt_genes <- rownames(so)[grep("^mt-", rownames(so))]
so[["pMt_RNA"]] <- PercentageFeatureSet(so, features=mt_genes)

# Compute Hemoglobin content 
hb_genes <- rownames(so)[grep("Hba-|Hbb-|Hbq1b|Hbq1a", rownames(so))]
so[["pHb_RNA"]] <- PercentageFeatureSet(so, features=hb_genes)

# Compute Ribosomal content
rb_genes <- rownames(so)[grep("^Rpl|^Rps", rownames(so))]
so[["pRb_RNA"]] <- PercentageFeatureSet(so, features=rb_genes)

# Save output file

In [None]:
so <- CreateSeuratObject(counts=GetAssayData(so, assay="RNA", layer="counts", project="FD20200109SPLENO"), meta.data=so@meta.data)

In [None]:
saveRDS(so, file=so_file)

In [None]:
# Store data as h5ad 
adata <- import("anndata", as="adata", convert=FALSE)
pd <- import("pandas", as="pd", convert=FALSE)
np <- import("numpy", as="np", convert=FALSE)
    
# Transform dgCMatrix to sparse sc_sparse matrix
X <- GetAssayData(so, assay="RNA", layer="counts")    
X <- adata$AnnData(X=X)$X$T

# Combine 
adata <- adata$AnnData(X=X, obs=so@meta.data)
adata$var_names <- rownames(GetAssayData(so, assay="RNA", layer="counts"))

adata$raw <- adata

# Save
adata$write_h5ad(h5ad_file)

# Session info 

In [None]:
sessionInfo()