# Cell Type Assignment with Garnett

Garnett performs hierarchical cell type assignment, information can be found here: https://cole-trapnell-lab.github.io/garnett/docs_m3/

In [None]:
# Get environment variables
marker.list <- Sys.getenv("SNAKEMAKE_MARKER_LIST")
data <- Sys.getenv("SNAKEMAKE_H5AD_INPUT")
output_file <- Sys.getenv("SNAKEMAKE_OUTPUT_FILE")  # Output csv mapping barcodes to cell type data
organsim <- Sys.getenv("SNAKEMAKE_ORGANISM")  # Must be either "human" or "mouse"
celltype_column <- Sys.getenv("SNAKEMAKE_CELLTYPE_COLUMN")  # Column in the classifier that contains the cell type information
# Check that the file exists
if (!file.exists(marker.list)) {
  stop(paste("Marker list file does not exist:", marker.list))
}

if (!file.exists(data)) {
  stop(paste("Input data file does not exist:", data))
}

cat(paste("Marker list file:", marker.list, "\n"))
cat(paste("Input data file:", data, "\n"))
cat(paste("Output file:", output_file, "\n"))
cat(paste("Organism:", organsim, "\n"))
cat(paste("Cell type column:", celltype_column, "\n"))

In [None]:
library(garnett)
# Load the gene database according to the organism
if (organsim == "human") {
  library(org.Hs.eg.db)
  gene_db <- org.Hs.eg.db
} else if (organsim == "mouse") {
  library(org.Mm.eg.db)
  gene_db <- org.Mm.eg.db
} else {
  stop("Organism must be either 'human' or 'mouse'.")
}

Load the single cell data as a SingleCellExperiment object and then convert to a cell_data_set (CDS) object

In [None]:
library(zellkonverter)
setZellkonverterVerbose(TRUE)
adata <- readH5AD(data)
adata

In [None]:
# Rename the cell type column to match Garnett's expected format
if (celltype_column != "cell_type") {
    # Remove any existing cell_type column if it exists
    if ("cell_type" %in% colnames(colData(adata))) {
        colData(adata)$cell_type <- NULL
    }
    colnames(colData(adata))[colnames(colData(adata)) == celltype_column] <- "cell_type"
}

In [None]:
gene_meta = as.data.frame(rowData(adata))
if (!("gene_short_name" %in% colnames(gene_meta))) {
    gene_meta$gene_short_name <- rownames(gene_meta)
}
cds <- new_cell_data_set(
    expression_data = assays(adata)$X,
    cell_metadata = as.data.frame(colData(adata)),
    gene_metadata = gene_meta
)
head(cds)

Evaluate the marker list

In [None]:
checked <- check_markers(
    cds,
    marker_file = marker.list,
    db=gene_db,
    cds_gene_id_type = "SYMBOL",
    marker_file_gene_id_type = "SYMBOL"
)
plot_markers(checked)

Train the Garnett classifier using the marker list

In [None]:
new_classifier <- train_cell_classifier(
    cds,
    marker_file = marker.list,
    db = gene_db,
    cds_gene_id_type = "SYMBOL",
    marker_file_gene_id_type = "SYMBOL"
)
new_classifier

In [None]:
feature_genes <- get_feature_genes(new_classifier,
                                   node = "root",
                                   db = gene_db)
feature_genes

Save the classifier to a RDS file

In [None]:
saveRDS(new_classifier, file = output_file)

Session Information

In [None]:
sessionInfo()