# Load libraries and Themes

In [1]:
suppressPackageStartupMessages({
    suppressWarnings({
        library(Seurat)
        library(ggplot2)
        library(tidyverse)
        library(presto)
        library(SeuratDisk)
        library(DESeq2)
        library(Rsamtools)
        })})

In [5]:
setwd("/media/daten/dmeral/scseq_analysis/2024_LV_CTRL_ALDO_REC")

In [6]:
obj <- readRDS("seurat_objects/2025_MR_HFpEF_Meral.rds")

# pseudo-bulk DESeq2


In [None]:
#Vector with cell_type names replacing "_" with "-"
seurat_object <- obj
cluster_annotations <- gsub("_", "-", unique(seurat_object@meta.data$cell_type_comb))

#Column with treatment and unique combined
seurat_object$samples <- paste0(seurat_object$treatment, seurat_object$sample_id)

cts <- AggregateExpression(seurat_object, 
                    group.by = c("cell_type_comb", "samples"),
                    assays = "RNA",
                    slot = "counts",
                    return.seurat = FALSE)
cts <- cts$RNA

# transpose
cts.t <- t(cts)

# convert to data.frame
cts.t <- as.data.frame(cts.t)

# get values where to split
splitRows <- gsub("_.*", "", rownames(cts.t))
unique(splitRows)

# split data.frame
cts.split <- split.data.frame(cts.t,
                 f = factor(splitRows))

# fix colnames and transpose
cts.split.modified <- lapply(cts.split, function(x){
  rownames(x) <- gsub(".*_(.*)", "\\1", rownames(x))
  t(x)
})

In [None]:
# Initialize an empty list to store the results for each cell type and comparison
results_list <- list()

# Define treatment comparisons
comparisons <- list(
  ALDO_vs_CTRL = c("Control", "Aldosterone"),
  REC_vs_CTRL = c("Control", "Recovery"),
  REC_vs_ALDO = c("Aldosterone", "Recovery")
)

# Loop over each cell type
for (cell_type_comb in cluster_annotations) {  # Using predefined cluster_annotations, change to cluster_annotations_CMcomb for one combined CM cluster
  if (cell_type_comb %in% names(cts.split.modified)) {
    counts_cell <- cts.split.modified[[cell_type_comb]]
    
    # Check if counts_cell is not NULL or empty
    if (!is.null(counts_cell) && ncol(counts_cell) > 0) {
      for (comparison in names(comparisons)) {
        # Extract conditions for the comparison
        conditions <- comparisons[[comparison]]
        
        colData <- data.frame(samples = colnames(counts_cell))
        
        # Assign conditions based on sample names
        colData <- colData %>%
          mutate(condition = case_when(
            grepl("CTRL", samples) ~ "Control",
            grepl("ALDO", samples) ~ "Aldosterone",
            grepl("REC", samples) ~ "Recovery",
            TRUE ~ NA_character_  # Handle unexpected cases
          ))
        
        # Filter to only include samples relevant to the current comparison
        colData <- colData %>% filter(condition %in% conditions)
        
        # Ensure condition is a factor with the correct levels for comparison
        colData$condition <- factor(colData$condition, levels = conditions)
        
        # Convert colData to a format usable by DESeq2
        colData <- column_to_rownames(colData, var = "samples")
        
        # Subset counts matrix to match filtered colData
        counts_subset <- counts_cell[, rownames(colData), drop = FALSE]
        
        # Skip if no valid counts
        if (ncol(counts_subset) < 2) {
          message(paste("Skipping cell type", cell_type_comb, "for comparison", comparison, "- not enough samples"))
          next
        }
        
        # Create DESeqDataSet
        dds <- DESeqDataSetFromMatrix(countData = counts_subset,
                                      colData = colData,
                                      design = ~ condition)
        
        # Filter low-count genes
        keep <- rowSums(counts(dds) >= 5) >= 2 #include genes with at least 5 counts in 2 samples
        dds <- dds[keep,]
        
        # Run DESeq
        dds <- DESeq(dds, quiet = TRUE)
        
        # Get results for the comparison
        res <- results(dds, name = paste0("condition_", conditions[2], "_vs_", conditions[1]))
        
        # Store results in the list with the cell type and comparison as the key
        results_list[[paste(cell_type_comb, comparison, sep = "_")]] <- res
      }
    } else {
      message(paste("No counts for cell type:", cell_type_comb))
    }
  } else {
    message(paste("Cell type not found in split data:", cell_type_comb))
  }
}

# Save results for all comparisons and cell types
output_dir <- "DEGs/DESeq2_pseudo-bulk/final/"
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

for (result_name in names(results_list)) {
  res_df <- as.data.frame(results_list[[result_name]])
  file_name <- paste0(output_dir, "degs_DESeq2_LV_", result_name, ".csv")
  write.csv(res_df, file = file_name)
}


In [2]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 24.04.1 LTS

Matrix products: default
BLAS/LAPACK: /media/daten/dmeral/micromamba/envs/scrna_dm/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] Rsamtools_2.18.0            Biostrings_2.70.3          
 [3] XVector_0.42.0              DESeq2_1.42.1              
 [5] SummarizedExperiment_1.32.0 Biobase_2.62.0             
 [7] MatrixGenerics_1.14.0       matrixStats_1.5.0          
 [9] GenomicRanges_1.