In [None]:
########################################################################
# Author    : A. Alsema, J. Kotah
# Date      : October 2023
# Dataset   : Visium Spatial Transcriptomics for MS lesions, 14 WM samples
# Purpose   : run first half of SpaceX analysis (until post-processing)
# Output    : RDS SpaceX analyzed object per sample that needs post-processing
# Input     : SpaceRanger output data files per sample, list of spatial coordinates, list of spatially variable genes per sample
########################################################################

In [1]:
rm(list = ls())
library(SpaceX)
library(SingleCellExperiment)
library(dplyr)

print(Sys.time())

Loading required package: PQLseq

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats,

[1] "2024-02-28 14:35:56 CET"


In [None]:
#Same code per sample, please check sample ID and code in the "SpaceX_sample_ID_sheet_file" and manually change
sample_ID <- "ST37" #corresponds to "ST_ID column"
sample_name <- "N2" #corresponds to "ST_name column"

In [None]:
#defining working directory
indir = "YOUR INPUT DIRECTORY HERE"

#define output directory
outdir = "YOUR OUTPUT DIRECTORY HERE" 

setwd(indir)

#directory with spatial coordinates,corresponds to "Coordinates_per_sample" folder in upload
coordir = "YOUR INPUT DIRECTORY HERE/Coordinates_per_sample/"

#directory with variable genes, corresponds to "BayesSpace_highly_variable_genes" folder in upload
SVGdir = "YOUR INPUT DIRECTORY HERE/BayesSpace_highly_variable_genes/"

#load the sample object
print(paste0("Loading sample ", sample_ID))
sample <- readRDS(paste0(indir, sample_ID, "_group.rds") )

In [None]:
print(paste0("Loading coordinates"))
coordinates <- read.csv(paste0(coordir, sample_name, "distance_to_lesioncore.csv"), row.names = 1)
coordinates$distance <- NULL
BC_loc <- round(coordinates)
BC_loc$barcodes <- substr(row.names(BC_loc), 1, 18) 

print("prepare gene expression matrix")
SVG.Selection <- read.csv(paste0(SVGdir, sample_ID, "HVG_top500_fromBS.csv"), row.names =1)
expr_mat <- t(as.matrix(counts(sample))) 
# filter object for only the highly variable genes
expr_mat <- expr_mat[, colnames(expr_mat) %in% SVG.Selection$x]

print("removing empty spots that do not express the HVGs")
print(dim(expr_mat)[1])
expr_mat <- expr_mat[rowSums(expr_mat) != 0,] #failsafe to remove spots with 0 genes expressed
print(dim(expr_mat)[1])
G <-dim(expr_mat)[2] # number of genes to model
N <-dim(expr_mat)[1] # number of locations

print(paste0("Number of genes is ", G, "; Number of spots is ", N))

matching_loc <- intersect(BC_loc$barcodes, rownames(expr_mat))
print(paste0("Overlap barcodes in location and expression matrix is: ", length(matching_loc)))

setdiff_loc <- setdiff(rownames(expr_mat), BC_loc$barcodes)
print(paste0("Non-overlap barcodes in location and expression matrix is: ", length(setdiff_loc))) 
# this can be due to excluded spatial clusters

BC_loc_test <- subset(BC_loc, BC_loc$barcodes %in% matching_loc)
expr_mat_test <- subset(expr_mat, rownames(expr_mat) %in% matching_loc)
BC_loc_test <- BC_loc_test[match(row.names(expr_mat_test), BC_loc_test$barcodes) , ]

# load in cluster annotations: using 'identity' column which represent lesion core, rim, plwm instead of 'spatial.cluster'
sample = sample[,rownames(expr_mat_test)]

# check before adding info
print('this should print TRUE, if FALSE investigate the filter step above')
identical(colnames(sample), BC_loc_test$barcodes)

# get information about the order in which networks in spatial clusters were created for later analyses
BC_loc_test$cluster_ann <- sample$identity
BC_loc_test$cluster_ann <- factor(BC_loc_test$cluster_ann)
order_groups <- levels(BC_loc_test$cluster_ann)
head(BC_loc_test)
write.csv(order_groups, file = paste0(outdir, sample_ID, "_orderofgroups.csv"))

#output files to be used in function for convenience and later post-processing
saveRDS(expr_mat_test, paste0(outdir, sample_ID, "_expr_mat_spacex.rds"))
saveRDS(BC_loc_test, paste0(outdir, sample_ID, "_loc_spacex.rds"))

In [None]:
#Post-process not performed in this part (ran separately) due to hardware limitations
print(paste0("Starting SpaceX function, no postProcessing"))
Sys.time()
BC_fit <- SpaceX(expr_mat_test,BC_loc_test[,1:2],BC_loc_test[,4], 
                 sPMM=FALSE, 
                 Post_process = FALSE, 
                 numCore=1) #
Sys.time() %>% print()

saveRDS(BC_fit, paste0(outdir, sample_ID, "_spaceX_not_postProcessed.rds") )

In [3]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/bcn/p283607/anaconda3/envs/spacex/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] dplyr_1.1.3                 SingleCellExperiment_1.16.0
 [3] SummarizedExperiment_1.24.0 Biobase_2.54.0             
 [5] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
 [7] IRanges_2.28.0              S4Vectors_0.32.4           
 [9] BiocGenerics_0.40.0         MatrixGen