In [None]:
#install.packages('hdf5r')

In [None]:
########################################################################
# Author    : A. Alsema
# Date      : 2021
# Dataset   : Visium Spatial Transcriptomics for MS lesions
# Purpose   : Load individual samples and combine with the metadata
# Required inputs:
# - WM_Targetfile.csv 
# - path: directory to spaceranger outputs
########################################################################
#########################################################################

In [7]:
rm(list = ls())

library(Seurat)
library(hdf5r)
library(ggplot2)
library(patchwork)
library(future)
library(dplyr)
library(RColorBrewer)
path <- "SET_THIS_TO_YOUR_SPACERANGER_OUTDIR"
setwd(path)

meta <- read.csv(paste0(path, 'WM_Targetfile.csv'), stringsAsFactors = F)
meta

sample_ID,slide,sample_name,donor_ID,lesiontype,age,sex,RIN,brain_pH,PMI,Group,PMI_min,manuscript_ID
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>,<chr>
ST31,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,6.38,5.45,CWM,345,C1
ST32,ST_1,CWM_2012_088,2012-088,CNT,85,F,7.2,6.6,6.25,CWM,385,C2
ST33,ST_1,activeWML_2011_077,2011-077,activeWML,66,F,8.6,6.45,9.35,activeWML,575,A1
ST34,ST_1,NAWM_2018_115,2018-115,NAWM,56,M,7.5,6.6,6.15,NAWM,375,N1
ST37,ST_2,NAWM_2011_048,2011-048,NAWM,53,M,6.7,6.38,10.0,NAWM,600,N2
ST38,ST_2,NAWM_2018_116,2018-116,NAWM,66,F,8.0,6.7,9.3,NAWM,570,N3
ST67,ST_10,Mixed_2014_038,2014-038,Mixed_active-inactive,35,F,7.4,6.37,10.2,Mixed_active-inactive,620,M1
ST68,ST_10,Mixed_2014_001,2014-001,Mixed_active-inactive,57,M,7.4,6.8,10.15,Mixed_active-inactive,615,M2
ST69,ST_10,ActiveWML_2015-064,2015-064,activeWML,50,M,8.7,6.55,10.5,activeWML,650,A2
ST70,ST_10,activeWML_2011_077,2011-077,activeWML,66,F,6.7,6.45,9.35,activeWML,575,A3


In [9]:
donors <- meta$sample_ID
donors

length(donors)

In [10]:
# read in the countfiles and metadata
# Warning can be safely ignored “Some cells in meta.data not present in provided counts matrix.” This is because abacus has filtered cellbarcodes with no reads already and raw_feature_bc_matrix still includes thm

if (file.exists(paste0(path, "RData/seurat/1.datasets.rds"))){
    datasets <- readRDS(paste0(path, "RData/seurat/1.datasets.rds"))
} else {
    datasets <- list()
    length(datasets) <- length(donors)
}

for (k in 1:length(donors)){
   if (is.null(datasets[[k]])) {
       print(paste0("Reading data from: ", donors[k]))
       seu <- NULL
       seu <- Load10X_Spatial(data.dir = paste0(path, "spaceranger_output/", donors[k], "-out/outs"), 
            filename = "filtered_feature_bc_matrix.h5",
              assay = "Spatial",
              slice = paste0("section", k),
              filter.matrix = TRUE,
              to.upper = FALSE)
       meta <- read.csv(paste0(path, 'WM_Targetfile.csv'), stringsAsFactors = F)
#        tmp <- meta[meta$sample_ID == donors[k] , ]
       seu$orig.ident <- donors[k]
       seu$sample_ID <- meta$sample_ID[k]
       seu$manuscript_ID <- meta$manuscript_ID[k]
       seu$slide <- meta$slide[k]
       seu$sample_name <- meta$sample_name[k]
       seu$donor_ID <- meta$donor_ID[k]
       seu$lesiontype <- meta$lesiontype[k]
       seu$Age <- meta$age[k]
       seu$Sex <- meta$sex[k]
       seu$RIN <- meta$RIN[k]
       seu$PMI_min <- meta$PMI_min[k]
       seu$brain_pH <- meta$brain_pH[k]
       seu$Group <- meta$Group[k]
       print(dim(seu))
       datasets[[k]] <- seu
       }
	else {
           print(paste0("Dataset already loaded: Skipping ", donors[k]))
  }
}

[1] "Reading data from: ST31"
[1] 33538  3550
[1] "Reading data from: ST32"
[1] 33538  3067
[1] "Reading data from: ST33"
[1] 33538  2961
[1] "Reading data from: ST34"
[1] 33538  4284
[1] "Reading data from: ST37"
[1] 33538  4436
[1] "Reading data from: ST38"
[1] 33538  3797
[1] "Reading data from: ST67"
[1] 33538  4208
[1] "Reading data from: ST68"
[1] 33538  3649
[1] "Reading data from: ST69"
[1] 33538  4789
[1] "Reading data from: ST70"
[1] 33538  3627
[1] "Reading data from: ST71"
[1] 33538  3657
[1] "Reading data from: ST72"
[1] 33538  4895
[1] "Reading data from: ST73"
[1] 33538  3751
[1] "Reading data from: ST74"
[1] 33538  4702
[1] "Reading data from: ST79"
[1] 33538  4729


In [11]:
# checks
print(length(datasets))
datasets[[1]]@meta.data

[1] 15


Unnamed: 0_level_0,orig.ident,nCount_Spatial,nFeature_Spatial,sample_ID,manuscript_ID,slide,sample_name,donor_ID,lesiontype,Age,Sex,RIN,PMI_min,brain_pH,Group
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<int>,<dbl>,<chr>
AAACAAGTATCTCCCA-1,ST31,977,684,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACACCAATAACTGC-1,ST31,1526,937,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACAGAGCGACTCCT-1,ST31,4381,2010,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACAGCTTTCAGAAG-1,ST31,2517,1427,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACAGGGTCTATATT-1,ST31,1727,1073,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACAGTGTTCCTGGG-1,ST31,2671,1482,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACATTTCCCGGATT-1,ST31,507,401,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACCGGGTAGGTACC-1,ST31,1445,847,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACCGTTCGTCCAGG-1,ST31,1388,875,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM
AAACCTAAGCAGCCGG-1,ST31,2513,1403,ST31,C1,ST_1,CWM_2012_070,2012-070,CNT,79,M,7.7,345,6.38,CWM


In [12]:
saveRDS(datasets, paste0(path,'RData/seurat/1.datasets.rds'))

In [13]:
sessionInfo()

R version 4.2.0 (2022-04-22)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/bcn/p283607/anaconda3/envs/R4.2/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] RColorBrewer_1.1-3 dplyr_1.1.0        future_1.31.0      patchwork_1.1.2   
[5] ggplot2_3.4.1      hdf5r_1.3.7        SeuratObject_4.1.3 Seurat_4.3.0      

loaded via a namespace (and not attached):
  [1] Rtsne_0.16             colorspace_2.1-0       deldir_1.0-6          
  [4] ellipsis_0.3.2   