In [None]:
########################################################################
# Author    : A. Alsema
# Date      : Augustus 2021
# Dataset   : Visium Spatial Transcriptomics for MS lesions
# Purpose   : Calculate marker genes for each cluster
# Output    : csv files with cluster markers 

# Inputs: 
# - indir : contains the "XXX_sce_qX.rds" with final clustering results.
# - outpath : dir to store differentially expressed markers 
# - sampleIDs: a vector of samples to be processed
# - sampleQs: optimized q-value per sample


# Note: http://www.ezstatconsulting.com/BayesSpace/articles/thrane_melanoma.html#differential-expression-analysis-of-spatial-clusters-1
#########################################################################

In [None]:
rm(list = ls())
indir <- "<your_indir>" 
outpath <- "<your_outdir>"
dir.create(get("outpath"), recursive = TRUE)

In [None]:
# load packages
library(dplyr)
library(BayesSpace)
library(Seurat)
library(tidyverse)
library(BiocSingular)
library(scater)
library(scran)
library(future)

# vector of samples to be processed
sampleIDs <- c("ST31", "ST33", "ST34", # ST32 is skipped because it has only one cluster!
"ST37", "ST38", "ST67", "ST68", 
"ST69", "ST70", "ST71", "ST72",
"ST73", "ST74", "ST79", 
"ST55","ST56", "ST57", "ST58", 
"ST59", "ST60", "ST61", "ST62", 
"ST63", "ST64", "ST65")

# Vector of corresponding cluster numbers for each sample. A range of q-values have been tested and optimized beforehand.
sampleQs <- c(2, 10, 2, 
2, 3, 9, 8, 
6, 8, 8, 10,
6, 7, 5,
4, 5, 10, 10,
8, 9, 10, 4,
8, 8, 8) 

for (i in 1:length(sampleIDs)){
    sampleID <- sampleIDs[i]
    print(sampleID)
    q <- sampleQs[i]
    
    ### Load data  ###
    sce <- readRDS(file=paste0(indir, sampleID, "_sce_q", q,".rds"))
    print(paste('read in sce',sampleID))

    ### Convert BayesSpace to seurat  ###
    print("converting SCE to seurat...")
    # NOTE TO SELF: watch out, in sobj counts and  data slot will both contain lognorm counts, not really a normal seurat object.
    # for this reason I don't save or otherwise use this temperory intermediate sobj
    sobj <- Seurat::CreateSeuratObject(counts=logcounts(sce),
                                       assay='Spatial',
                                       meta.data=as.data.frame(colData(sce)))
    sobj <- Seurat::SetIdent(sobj, value = "spatial.cluster")
    sobj$ngenes_rate <- scale(sobj$nFeature_Spatial)
    
    ### calculate cluster markers ###
    # the best results where for ngenes_rate as covariate. 
    # it performs better than using no covariate or sizeFactor as covariate/latent.var
    markers <- Seurat::FindAllMarkers(sobj, assay='Spatial', slot='data', 
                                      test.use = "MAST",
                                      group.by='spatial.cluster',
                                      latent.vars = "ngenes_rate", # MAST paper shows this latent variable improves the results. https://doi.org/10.1186/s13059-015-0844-5 
                                      logfc.threshold=0.25, 
                                      verbose = FALSE,
                                      only.pos=FALSE)
    ### write out cluster markers ###
    write.csv(markers,file = paste0(outpath, sampleID,"_q", q, "_all_markers.csv" ))

    
    ### optional: write out subsets ###
    markers <- markers[markers$p_val_adj < 0.05, ]
    markers_up <- markers[markers$avg_log2FC > 0.25, ]   
    markers_down <- markers[markers$avg_log2FC < -0.25, ]
    write.csv(markers_up,file = paste0(outpath, sampleID,"_q", q, "_sign_up_markers.csv" ))
    write.csv(markers_down,file = paste0(outpath, sampleID,"_q", q, "_sign_down_markers.csv" ))

    ### create a message ###
    if (exists("markers_up")== TRUE){   
        print(nrow(markers_up))
    }else {
        print("there are no enriched cluster markers")
    }
    
    if (exists("markers_down")== TRUE){   
        print(nrow(markers_down))
    }else {
        print("there are no depleted cluster markers")
    }
}

In [None]:
sessionInfo()