In [1]:
########################################################################
# Author    : A. Alsema
# Date      : April 2022
# Dataset   : Visium Spatial Transcriptomics for MS lesions, 15 slices with WM 
# Purpose   : compute differentially expressed genes per selected subtrajectory

# Required Inputs     : 
# - cds_objects: collection of CellDataSet (CDS) objects corresponding to various extracted subtrajectories. 
            # Each CDS object represents a subset of the overall trajectory focusing on specific paths,
            # e.g. "4-4.startnode_to_rims-microglia.rds" contains a subset of spots belonging to subtrajectory 1, figure 6c-f.
# - root_node: This variable represents the common starting point.

# Outputs   : - new cds including subtrajectory with recalculated pseudotime,
#             - csv file containing differential expression analysis per subtrajectory
########################################################################

rm(list = ls())
library(monocle3)
library(ggplot2)

Loading required package: Biobase

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.


Loading required package: SingleCellExperiment

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading

In [6]:
# this is the starting point of our trajectory, see figure 5 script 1
root_node = readRDS("RData/monocle3/rootnode.rds")

# these cds paths corrrespond to an extracted subtrajectory, see figure 6 script 1
cds_objects <- c("RData/monocle3/4-4.startnode_to_rims-microglia.rds", 
                 "RData/monocle3/4-2.startnode_to_active_mixed.rds",
                 "RData/monocle3/4-1.startnode_to_infl.rds")
i = NULL

for (i in 1:length(cds_objects)){
    # loop over selected subtractories to calculate differential expression of genes using graph_test
  cds_path = cds_objects[i]
  cds_name = paste0("subtraj_", substr(cds_path, 16, nchar(cds_path)-4))
  print(cds_name)
  cds_full <- readRDS(file = cds_path )
  cds_full$sample_name <- NULL
  
    # recalculate pseudotime within subtrajectory.
    # not sure if this is necessary
  cds_full <- order_cells(cds_full, root_cells =  root_node) 
  
    # save re-ordered subtrajectory
  saveRDS(cds_full, file = paste0("RData/monocle3/", cds_name, "_graph.rds"))
  barcodes <- colnames(cds_full)
  write.csv(barcodes, paste0("RData/monocle3/", cds_name, "_barcodes.csv") )
  
    # compute differential expression
  print("subtraj data is saved, starting differential expression")
  cds_pr_test_res <- graph_test(cds_full, neighbor_graph="knn",
                                verbose = FALSE, reduction_method = "UMAP", cores=1)
                    ## bug in monocle3 when setting neighbor_graph = "principal_graph"
                    ## here, opted for graph test with neighbor_graph = "knn" (is also the default)
  write.csv(cds_pr_test_res, file = paste0("Routput/monocle3/trajectory_DE/1.DE_results_", cds_name, "_allgenes.csv"))
}

[1] "subtraj_4-4.startnode_to_rims-microglia"
[1] "subtraj data is saved, starting differential expression"
[1] "subtraj_4-2.startnode_to_active_mixed"
[1] "subtraj data is saved, starting differential expression"
[1] "subtraj_4-1.startnode_to_infl"
[1] "subtraj data is saved, starting differential expression"


In [4]:
sessionInfo()

R version 4.2.0 (2022-04-22)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/bcn/p283607/anaconda3/envs/R4.2/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggplot2_3.4.4               monocle3_1.3.1             
 [3] SingleCellExperiment_1.20.1 SummarizedExperiment_1.28.0
 [5] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
 [7] IRanges_2.32.0              S4Vectors_0.36.2           
 [9] MatrixGenerics_1.10.0       matrixStats