In [None]:
########################################################################
# Author    : A. Alsema, J. Kotah
# Date      : October 2023
# Dataset   : Visium Spatial Transcriptomics for MS lesions, 14 WM samples
# Purpose   : generate spatially variable genes to be used as inputs in SpaceX analysis
# Output    : a list of top500 spatially variable genes per sample
# Input     : SpaceRanger output data files per sample
########################################################################

In [1]:
#2023-10-3 done for sample ST37
rm(list =ls())
library(BayesSpace)
library(scran)

Loading required package: SingleCellExperiment

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    

In [2]:
indir = "YOUR INPUT DIRECTORY HERE"
outdir = "YOUR OUTPUT DIRECTORY HERE" 

In [3]:
sampleID = "ST67" #change as needed

sce <- readVisium(paste0(indir, sampleID, "-out/outs"))
print("dimensions of your sample")
print(dim(sce))

sce <- sce[, colSums(counts(sce)) > 0] # remove spots with zero counts 
print(dim(sce))
sce <- logNormCounts(sce)

[1] "dimensions of your sample"
[1] 33538  4208
[1] 33538  4208


In [4]:
# extract highly variable genes in the BayesSpace method as inputs for SpaceX
top500 = scran::getTopHVGs(sce, n = 500)

write.csv(top500, paste0(outdir, sampleID, "HVG_top500_fromBS.csv"))

In [5]:
head(top500)

In [2]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/bcn/p283607/anaconda3/envs/spacex/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] scran_1.22.1                scuttle_1.4.0              
 [3] BayesSpace_1.4.1            SingleCellExperiment_1.16.0
 [5] SummarizedExperiment_1.24.0 Biobase_2.54.0             
 [7] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
 [9] IRanges_2.28.0              S4Vectors