# Cell Type Assignment with SingleR

Based on the SingleR package tutorials with the `celldex` package for reference datasets unless otherwise specified. https://bioconductor.org/packages/release/bioc/vignettes/SingleR/inst/doc/SingleR.html

In [None]:
reference <- Sys.getenv("SNAKEMAKE_REFERENCE")
input_file <- Sys.getenv("SNAKEMAKE_INPUT_FILE")  # Input h5ad file
output_file <- Sys.getenv("SNAKEMAKE_OUTPUT_FILE")  # Output csv mapping barcodes to cell type data
num_threads <- as.integer(Sys.getenv("SNAKEMAKE_NUM_THREADS", "1"))  # Number of threads to use

# Check that the file exists
if (!file.exists(input_file)) {
  stop(paste("Input file does not exist:", input_file))
}

cat(paste("Reference:", reference, "\n"))
cat(paste("Input file:", input_file, "\n"))
cat(paste("Output file:", output_file, "\n"))
cat(paste("Number of threads:", num_threads, "\n"))

In [None]:
# Load the reference dataset
library(celldex)
if (reference == "BlueprintEncodeData") {
    reference_dataset <- BlueprintEncodeData()
} else if (reference == "DatabaseImmuneCellExpressionData") {
    reference_dataset <- DatabaseImmuneCellExpressionData()
} else if (reference == "MouseRNAseqData") {
    reference_dataset <- MouseRNAseqData()
} else if (reference == "MonacoImmuneData") {
    reference_dataset <- MonacoImmuneData()
} else if (reference == "ImmGenData") {
    reference_datasset <- ImmGenData()
} else if (reference == "HumanPrimaryCellAtlasData") {
    reference_dataset <- HumanPrimaryCellAtlasData()
} else {  # Try to fetch from gypsum: https://rdrr.io/github/LTLA/celldex/man/fetchReference.html
    cat(paste("Fetching other gypsum reference dataset:", reference, "\n"))
    # Split the reference string by commas and use that as kwargs to pass to fetchReference
    reference_args <- strsplit(reference, ",")[[1]]
    reference_dataset <- do.call(fetchReference, as.list(reference_args))
}
reference_dataset

In [None]:
# Load the input data as a SingleCellExperiment object
library(zellkonverter)
setZellkonverterVerbose(TRUE)
adata <- readH5AD(data)
adata

In [None]:
# Ensure that the input is has logcounts, if not, compute them
if (!"logcounts" %in% assayNames(adata)) {
    library(scuttle)
    adata <- logNormCounts(adata, assay.type = "X")
}

In [None]:
# Annotate with SingleR
library(SingleR)
singleR_results <- SingleR(
    test = adata,
    ref = reference_dataset,
    labels = reference_dataset$label.main,
    de.method="wilcox",
    num.threads = num_threads,
)
table(singleR_results)

In [None]:
plotScoreHeatmap(singleR_results)

In [None]:
plotDeltaDistribution(singleR_results, ncol = 3)

In [None]:
# Save the results to a CSV file
singleR_df <- as.data.frame(singleR_results)
singleR_df$barcode <- rownames(singleR_df)
write.csv(singleR_df, output_file, row.names = FALSE)

In [None]:
sessionInfo()