# BSF and VBC CellRanger import 

In [None]:
library_load <- suppressMessages(
    
    suppressWarnings(
    
        list(
            
            # Seurat 
            library(Seurat),
            
            # HTO demultiplex
            library(demuxmix), 
            library(DropletUtils), 

            # Module score 
            library(UCell), 

            # SingleR
            library(SingleR), 
            library(SingleCellExperiment), 
            
            # Data 
            library(tidyverse),
            
            # Plot 
            library(ggplot2), 
            
            # Pyhton compatibility
            library(reticulate)
            
        )
    )
)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.10.16-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
options(warn=-1)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

# Parameter settings

In [None]:
# Seurat parameters 
min_cells    <- 0 # Filter out features which are only present in less than min_cells cells - default 3
min_features <- 0 # Filter out cells which have fewer than min_features features - default 200

# Directories
run_path_bsf <- "/nobackup/peer/fdeckert/FD20200109SPLENO/data/BSA_0355_SM01_10x_SPLENO/"
run_path_vbc <- "/nobackup/peer/fdeckert/FD20200109SPLENO/data/VBC_R18518_22WYJ2LT3/"

# Files
h5ad_file <- "data/scRNAseq/object/raw.h5ad"

# BSF import 

In [None]:
meta_bsf <- data.frame(

    run_path=run_path_bsf, 
    sample_dir=c("M6_control_Mac", "M6_control_Prog", "M8_control_Mac", "M8_control_Prog", "M1_CpG_Mac", "M1_CpG_Prog", "M2_CpG_Mac", "M2_CpG_Prog"), 
    facility="BSF", 
    lib_assay="3_prime", 
    facs=rep(c("Myeloid", "Progenitor"), 4), 
    infection=rep(c("NaCl", "CpG"), each=4),
    dpi=rep("D6", each=8), 
    sample_rep=rep(c("Rep1", "Rep1", "Rep2", "Rep2"), 2), 
    genotype=rep(c("Bl6"), 8), 
    sex=rep(c("female"), 8), 
    age=rep(c("11 wks"), 8)
    
) %>% 
    dplyr::mutate(sample_group=paste0(genotype, "_", infection, "_", dpi)) %>% 
    dplyr::mutate(sample_group_rep=paste0(genotype, "_", infection, "_", dpi, "_", sample_rep)) %>% 
    dplyr::mutate(sample_name=paste0(genotype, "_", infection, "_", dpi, "_", facs, "_", sample_rep))

In [None]:
so_bsf <- lapply(split(meta_bsf, f=meta_bsf$sample_dir), function(x) {
    
    sample_path <- paste0(x$run_path, x$sample_dir, "/outs/filtered_feature_bc_matrix")
    message(paste("import:", sample_path))

    # Read 10x data in as sparse matrix (sm)
    gex <- Read10X(data.dir=sample_path)

    # Transform sparse matrix to seurat object (so)
    so <- CreateSeuratObject(counts=gex, min.cells=min_cells, min.features=min_features)
    
    # Meta 
    meta <- dplyr::cross_join(so@meta.data, x)
    rownames(meta) <- colnames(so)
    
    so@meta.data <- meta
    
    # Set Ident
    so$orig.ident <- so$sample_name
    
    return(so)
    
}
            )

# VBC import and 

In [None]:
cnt_vbc <- Read10X(paste0(run_path_vbc, "22WYJ2LT3_R18518/outs/filtered_feature_bc_matrix/"))

In [None]:
gex_vbc <- cnt_vbc[[1]]
hto_vbc <- cnt_vbc[[2]]

## Sample HTO demultiplexing 

In [None]:
# Demuxmix
dm <- demuxmix(hto=as.matrix(hto_vbc), model="naive")
dm <- dmmClassify(dm)

In [None]:
colnames(dm) <- c("dm_hto", "dm_prob", "dm_class")
dm <- tibble::rownames_to_column(dm, "cell_id")

In [None]:
table(dm[dm$dm_class=="singlet", ]$dm_hto)

In [None]:
# HashedDrops
hd <- hashedDrops(hto_vbc, confident.min=0.5) %>% as.data.frame()
hd$Best <- paste0("Sample", hd$Best)
hd$Second <- paste0("Sample", hd$Second)

In [None]:
hd$hd_class <- ifelse(hd$Doublet, "multiplet", ifelse(!hd$Confident, "uncertain", "singlet"))
hd <- hd[, c("Best", "LogFC", "hd_class")]
colnames(hd) <- c("hd_hto", "hd_logfc", "hd_class")
hd <- tibble::rownames_to_column(hd, "cell_id")

In [None]:
table(hd[hd$hd_class=="singlet", ]$hd_hto)

In [None]:
# Combine results and use hd label for joining 
hto_demux <- dplyr::left_join(dm, hd, by=join_by(cell_id)) %>% dplyr::rename("hto_label"="hd_hto") 

In [None]:
meta_vbc <- data.frame(

    run_path=run_path_vbc, 
    sample_dir="22WYJ2LT3_R18518", 
    facility="VBC", 
    lib_assay="5_prime", 
    facs=rep(c("Mix"), 6), 
    infection=rep(c("CpG", "CpG", "Baseline"), each=2),
    dpi=c("D3", "D3", "D1", "D1", "D0", "D0"), 
    sample_rep=rep("Rep1", 6), 
    genotype=rep(c("IFNAR_fl", "IFNAR_fl_LysM_cre"), 3), 
    sex=rep(c("female"), 6), 
    age=rep(c("3 wks"), 6), 
    hto_label=paste0("Sample", 1:6)

) %>% 
    dplyr::mutate(sample_group=paste0(genotype, "_", infection, "_", dpi)) %>% 
    dplyr::mutate(sample_group_rep=paste0(genotype, "_", infection, "_", dpi, "_", sample_rep)) %>% 
    dplyr::mutate(sample_name=paste0(genotype, "_", infection, "_", dpi, "_", facs, "_", sample_rep))

In [None]:
meta_vbc <- dplyr::left_join(meta_vbc, hto_demux, by=join_by(hto_label)) %>% tibble::column_to_rownames("cell_id") %>% dplyr::filter(hd_class=="singlet") # Filtered by hd singlet classification 

In [None]:
gex_vbc <- gex_vbc[, rownames(meta_vbc)]

In [None]:
so_vbc <- Seurat::CreateSeuratObject(counts=gex_vbc, assay="RNA", meta.data=meta_vbc)

In [None]:
so_vbc <- Seurat::SplitObject(so_vbc, split.by="sample_name")

# Merge data 

In [None]:
so <- c(so_bsf, so_vbc)

In [None]:
# Merge Seurat objects 
so <- merge(x=so[[1]], y=so[2:length(so)])

In [None]:
# Merge layers 
so <- JoinLayers(object=so, layers="counts")

In [None]:
# Set cell_id 
so$cell_id <- colnames(so)

In [None]:
so$nCount_RNA <- NULL
so$nFeature_RNA <- NULL

# Automated cell type annotation 

## ImmGen 

In [None]:
# Load reference data 
sce_ref <- celldex::ImmGenData()
    
# Seurat object to SingleCellExperiment
sce_qry <- SingleCellExperiment::SingleCellExperiment(assays=list(counts=Seurat::GetAssayData(so, assay="RNA", layer="counts")))
sce_qry <- scuttle::logNormCounts(sce_qry)

# Predict labels
label_main_immgen <- SingleR::SingleR(test=sce_qry, ref=sce_ref, labels=sce_ref$label.main, assay.type.test="logcounts", assay.type.ref="logcounts", de.method="classic", fine.tune=FALSE) %>%
    as.data.frame() %>% dplyr::select(pruned.labels, delta.next) %>% dplyr::rename(label_main_immgen=pruned.labels, delta_score_main_immgen=delta.next)

label_fine_immgen <- SingleR::SingleR(test=sce_qry, ref=sce_ref, labels=sce_ref$label.fine, assay.type.test="logcounts", assay.type.ref="logcounts", de.method="classic", fine.tune=FALSE) %>%
    as.data.frame() %>% dplyr::select(pruned.labels, delta.next) %>% dplyr::rename(label_fine_immgen=pruned.labels, delta_score_fine_immgen=delta.next)

## Haemopedia Mouse RNAseq from Haemosphere

In [None]:
# Load reference data 
meta_ref <- read.delim("/research/peer/fdeckert/reference/bulkRNAseq/haemosphere/data/Haemopedia-Mouse-RNASeq_samples.txt") %>% tibble::column_to_rownames("sampleId")
mat_ref <- read.delim("/research/peer/fdeckert/reference/bulkRNAseq/haemosphere/data/Haemopedia-Mouse-RNASeq_raw.txt", row.names=1)[, rownames(meta_ref)]

# Annotate ENSEML id with gene symbol
gtf <- rtracklayer::import("/research/peer/fdeckert/reference/genome/GRCm38/Mus_musculus.GRCm38.102.gtf") %>% as.data.frame() 

convert <- gtf[, c("gene_id", "gene_name")] %>% dplyr::distinct() %>% dplyr::group_by(gene_name) %>% dplyr::mutate(n=n()) %>% dplyr::filter(n==1) %>% tibble::column_to_rownames("gene_id") %>% dplyr::select(-n)
gene_id <- intersect(rownames(mat_ref), rownames(convert)) %>% unique()
convert <- convert[which(rownames(convert) %in% gene_id), , drop=FALSE]
mat_ref <- mat_ref[which(rownames(mat_ref) %in% gene_id), , drop=FALSE]
rownames(mat_ref) <- convert[rownames(mat_ref), ]

# Load reference data
sce_ref <- SingleCellExperiment::SingleCellExperiment(list(counts=mat_ref), colData=meta_ref)
sce_ref <- scuttle::logNormCounts(sce_ref)

# AnnData object to SingleCellExperiment
sce_qry <- SingleCellExperiment::SingleCellExperiment(assays=list(counts=Seurat::GetAssayData(so, assay="RNA", layer="counts")))
sce_qry <- scuttle::logNormCounts(sce_qry)

# Predict labels
label_main_haemopedia <- SingleR::SingleR(test=sce_qry, ref=sce_ref, labels=sce_ref$celltype, assay.type.test="logcounts", assay.type.ref="logcounts", de.method="classic", fine.tune=FALSE) %>%
    as.data.frame() %>% dplyr::select(pruned.labels, delta.next) %>% dplyr::rename(label_main_haemopedia=pruned.labels, delta_score_main_haemopedia=delta.next)

## Add cell type annotation to meta data

In [None]:
so <- AddMetaData(so, cbind(label_main_immgen, label_fine_immgen, label_main_haemopedia))

# Cell cycle scoring 

## Human to mouse gene symbol convert

In [None]:
# Seurat cell cycle genes 
cc_genes_s <- cc.genes.updated.2019$s.genes
cc_genes_g2m <- cc.genes.updated.2019$g2m.genes

# Get mouse orthologs from human gene simbols
httr::set_config(httr::config(ssl_verifypeer=FALSE))

hgnc_mart <- biomaRt::useMart("ensembl", dataset="hsapiens_gene_ensembl", host="https://dec2021.archive.ensembl.org/")
mm_mart <- biomaRt::useMart("ensembl", dataset="mmusculus_gene_ensembl", host="https://dec2021.archive.ensembl.org/")

## Seurat gene set based on Tirosh et al, 2015

In [None]:
# Conver genes
cc_genes_s <- biomaRt::getLDS(attributes=c("hgnc_symbol"), filters="hgnc_symbol", values=cc_genes_s, mart=hgnc_mart, attributesL=c("mgi_symbol"), martL=mm_mart, uniqueRows=TRUE)[, 2]
cc_genes_g2m <- biomaRt::getLDS(attributes=c("hgnc_symbol"), filters="hgnc_symbol", values=cc_genes_g2m, mart=hgnc_mart, attributesL=c("mgi_symbol"), martL=mm_mart, uniqueRows=TRUE)[, 2]

## Compute Module score with UCell 

In [None]:
so$msG2M <- NULL
so$msS <- NULL

In [None]:
# Cell cycle scoring anc class
so <- UCell::AddModuleScore_UCell(so, features=list(msS_RNA=cc_genes_s, msG2M_RNA=cc_genes_g2m), assay="RNA", slot="counts", name="")

In [None]:
colnames(so@meta.data) <- gsub("msS_RNA", "S_score", colnames(so@meta.data))
colnames(so@meta.data) <- gsub("msG2M_RNA", "G2M_score", colnames(so@meta.data))

# Save output file

In [None]:
# Store data as h5ad 
adata <- import("anndata", as="adata", convert=FALSE)
pd <- import("pandas", as="pd", convert=FALSE)
np <- import("numpy", as="np", convert=FALSE)
    
# Transform dgCMatrix to sparse sc_sparse matrix
X <- GetAssayData(so, assay="RNA", layer="counts")    
X <- adata$AnnData(X=X)$X$T

# Combine 
adata <- adata$AnnData(X=X, obs=so@meta.data)
adata$var_names <- rownames(GetAssayData(so, assay="RNA", layer="counts"))

adata$raw <- adata

# Save
adata$write_h5ad(h5ad_file)

# Session info 

In [None]:
sessionInfo()