In [2]:
#########################################################################################################
# Date: July 2021
# Author: Marion Wijering for GM neuronal layer annotation; Astrid Alsema for custom functions.
# Dataset: Visium Spatial Transcriptomics for MS lesions, 10 GM samples
# Purpose:  Pseudobulk per sample_identity. Requires that clusters have been manually annotated with neuronal layer information
# Input: SCE object
# Output: csv file with the pseudobulk counts per identity
#########################################################################################################

# load libraries
library(BayesSpace)
library(forcats)

In [2]:
########################################## CUSTOM FUNCTIONS ###############################################

#' Prepare a Clustered BayesSpace Object for Pseudobulking
#'
#' This function prepares a clustered BayesSpace SingleCellExperiment (sce) object by adding metadata for lesion type 
#' and sample ID. It also converts the spatial cluster information to a factor.
#'
#' @param sce SingleCellExperiment object, clustered with BayesSpace.
#' @param lesiontype Character string indicating the group the sample belongs to (e.g., "subpial", "CGM", "NAGM").
#' @param sample_ID Character string indicating the sample ID (e.g., "ST55").
#' 
#' @return A SingleCellExperiment object with added metadata and adjusted spatial cluster information.
#' @examples
#' sce <- readRDS(file = "path_to_rds_file.rds")
#' sce <- prep_sce(sce, lesiontype = "CGM", sample_ID = "ST55")
#' 
prep_sce <- function(sce, lesiontype, sample_ID) {
  print(clusterPlot(sce)) # original clusters
  sce$Group <- as.character(lesiontype) # add lesion type 
  sce$SampleID <- sample_ID
  sce$spatial.cluster <- factor(sce$spatial.cluster)
  return(sce)
}

#' Create a Pseudobulk DataFrame from a List of SCE Objects
#'
#' This function takes a list of SingleCellExperiment (sce) objects, sums the counts for each unique identity group within each sample,
#' and returns a pseudobulk dataframe.
#'
#' @param mylist List of SingleCellExperiment objects. Each element is one sample. Each sample colData must contain the variable 'identity' 
#' indicating the group to pseudobulk on.
#' 
#' @return A data frame containing pseudobulked counts for each unique identity group.
#' @examples
#' sce_list <- list(sce1, sce2, sce3)
#' pseudobulk_df <- CreatePseudobulk(sce_list)
#' 
CreatePseudobulk <- function(mylist) {
  ngenes <- sapply(mylist, FUN = function(x) max(nrow(x))) # find the largest number of genes in the largest sce
  pseudobulk <- data.frame(matrix(NA, nrow = ngenes, ncol = 0)) # create an empty dataframe with ngenes
  
  for (i in 1:length(mylist)) { # loop through samples
    sample <- mylist[[i]]
    identities <- unique(sample$identity)
    for (id in identities) { # sum up spots for each sample identity
      print(id)
      sce_tmp <- sample[, sample$identity == as.character(id)]
      count_tmp <- as.matrix(counts(sce_tmp))
      print("Number of genes, number of spots")
      print(dim(count_tmp))
      print("Summing counts...")
      pseudobulk_tmp <- as.data.frame(rowSums(count_tmp)) # sum counts per gene
      colnames(pseudobulk_tmp) <- as.character(id)
      pseudobulk <- cbind(pseudobulk, pseudobulk_tmp) # add a column with the latest pseudobulk sample
    }
  }
  return(pseudobulk)
}

In [3]:
########################################## Prepare 10 GM samples ########################################## 

# Load all samples

# Sample 1
sce.1 <- readRDS(file = "ST55_sce_q4.rds")
sce.1 <- prep_sce(sce.1, lesiontype = "CGM", sample_ID = "ST55")
sce.1$identity <- fct_collapse(sce.1$spatial.cluster, WM = c("2", "4"), layer_6 = c("1"), layer_5 = c("3"))
sce.1$identity <- paste("ST55", sce.1$identity, sep = "_")
clusterPlot(sce.1, label = "identity")

# Sample 2
sce.2 <- readRDS(file = "ST56_sce_q5.rds")
sce.2 <- prep_sce(sce.2, lesiontype = "NAGM", sample_ID = "ST56")
sce.2$identity <- fct_collapse(sce.2$spatial.cluster, WM = c("1"), layer_6 = c("3"), layer_5 = c("4"), layer_4 = c("5"), meninges = c("2"))
sce.2$identity <- paste("ST56", sce.2$identity, sep = "_")
clusterPlot(sce.2, label = "identity")

# Sample 3
sce.3 <- readRDS(file = "ST57_sce_q10.rds")
sce.3 <- prep_sce(sce.3, lesiontype = "subpial", sample_ID = "ST57")
sce.3$identity <- fct_collapse(sce.3$spatial.cluster, WM = c("5", "6", "10"), layer_6 = c("4"), layer_5 = c("8"), layer_4 = c("2"), layer_3 = c("7"), layer_2 = c("3"), layer_1 = c("9"), technical = c("1"))
sce.3$identity <- paste("ST57", sce.3$identity, sep = "_")
clusterPlot(sce.3, label = "identity")

# Sample 4
sce.4 <- readRDS(file = "ST58_sce_q10.rds")
sce.4 <- prep_sce(sce.4, lesiontype = "subpial", sample_ID = "ST58")
sce.4$identity <- fct_collapse(sce.4$spatial.cluster, WM = c("2", "8", "10"), WM_lesion = c("9"), layer_6 = c("3"), layer_5 = c("1"), layer_4 = c("5"), layer_3 = c("6"), meninges = c("7"))
sce.4$identity <- paste("ST58", sce.4$identity, sep = "_")
clusterPlot(sce.4, label = "identity")

# Sample 5
sce.5 <- readRDS(file = "ST59_sce_q8.rds")
sce.5 <- prep_sce(sce.5, lesiontype = "CGM", sample_ID = "ST59")
sce.5$identity <- fct_collapse(sce.5$spatial.cluster, WM = c("4", "6"), layer_1 = c("8"), layer_2 = c("1"), layer_3 = c("2"), layer_4_5_6 = c("3", "5"), technical = c("7"))
sce.5$identity <- paste("ST59", sce.5$identity, sep = "_")
clusterPlot(sce.5, label = "identity")

# Sample 6
sce.6 <- readRDS(file = "ST60_sce_q9.rds")
sce.6 <- prep_sce(sce.6, lesiontype = "NAGM", sample_ID = "ST60")
sce.6$identity <- fct_collapse(sce.6$spatial.cluster, WM = c("2", "3", "8"), layer_1 = c("9"), layer_6 = c("1"), technical = c("4"), not_clear = c("5", "6", "7"))
sce.6$identity <- paste("ST60", sce.6$identity, sep = "_")
clusterPlot(sce.6, label = "identity")

# Sample 7
sce.7 <- readRDS(file = "ST61_sce_q10.rds")
sce.7 <- prep_sce(sce.7, lesiontype = "subpial", sample_ID = "ST61")
sce.7$identity <- fct_collapse(sce.7$spatial.cluster, WM = c("3", "4", "8"), not_clear = c("1", "6", "5", "2", "10"), meninges = c("7", "9"))
sce.7$identity <- paste("ST61", sce.7$identity, sep = "_")
clusterPlot(sce.7, label = "identity")

# Sample 8
sce.8 <- readRDS(file = "ST63_sce_q8.rds")
sce.8 <- prep_sce(sce.8, lesiontype = "CGM", sample_ID = "ST63")
sce.8$identity <- fct_collapse(sce.8$spatial.cluster, WM = c("5"), layer_6 = c("3"), layer_5 = c("6"), layer_4 = c("2"), layer_3 = c("4"), layer_2 = c("1"), layer_1 = c("8"))
sce.8$identity <- paste("ST63", sce.8$identity, sep = "_")
clusterPlot(sce.8, label = "identity")

# Sample 9
sce.9 <- readRDS(file = "ST64_sce_q8.rds")
sce.9 <- prep_sce(sce.9, lesiontype = "NAGM", sample_ID = "ST64")
sce.9$identity <- fct_collapse(sce.9$spatial.cluster, WM = c("8"), layer_6 = c("4"), layer_5 = c("3"), layer_4 = c("7"), layer_3 = c("1"), layer_2 = c("6"), layer_1 = c("2"), technical = c("5"))
sce.9$identity <- paste("ST64", sce.9$identity, sep = "_")
clusterPlot(sce.9, label = "identity")

# Sample 10
sce.10 <- readRDS(file = "ST65_sce_q8.rds")
sce.10 <- prep_sce(sce.10, lesiontype = "subpial", sample_ID = "ST65")
sce.10$identity <- fct_collapse(sce.10$spatial.cluster, WM = c("3"), layer_6 = c("4"), layer_5 = c("5"), layer_4 = c("8"), layer_3 = c("1"), layer_2 = c("2"), layer_1 = c("7"), technical = c("6"))
sce.10$identity <- paste("ST65", sce.10$identity, sep = "_")
clusterPlot(sce.10, label = "identity")


##########################################  pseudobulk  ###############################################

# when you are done make a list of all samples
sample_list <- list(sce.1, sce.2, sce.3, sce.4, sce.5, sce.6, sce.7, sce.9, sce.10) 

# aggregrate per unique identity
Pseudobulk_GM <- CreatePseudobulk(sample_list)

# save pseudobulk file
write.csv(Pseudobulk_GM, file = "Pseudobulk_GM_v4.csv", row.names=TRUE)

