# Load libraries and Themes

In [1]:
suppressPackageStartupMessages({
    suppressWarnings({
        library(Seurat)
        library(ggplot2)
        library(tidyverse)
        library(presto)
        library(SeuratDisk)
        library(Rsamtools)
        library(svglite)
        library(scuttle)
        library(limma)
        library(dplyr)
            })})

In [None]:
setwd("/media/daten/dmeral/scseq_analysis/2024_LV_CTRL_ALDO_REC")

## pseudo-bulk Limma-Voom

In [6]:
obj <- readRDS("seurat_objects/2025_MR_HFpEF_Meral.rds")

In [None]:
design <- model.matrix(~ batch + sex + treatment, data = dge$samples)
colnames(design) <- make.names(colnames(design))

## ALDO vs. CTRL

**run for celltype = CM, FB, and EC-cap** 

In [None]:
celltype <- "CM"   

# Subset Seurat object to chosen cell type 
obj_sub <- subset(obj, subset = cell_type_comb == celltype)

# Convert to SingleCellExperiment 
sce_sub <- as.SingleCellExperiment(obj_sub)

#  Aggregate to pseudo-bulk 
pb <- aggregateAcrossCells(
    sce_sub,
    ids = DataFrame(
        sample = sce_sub$sample_id,
        treatment = sce_sub$treatment
    )
)

#  Keep only ALDO and CTRL (or any two conditions you want) 
keep <- colData(pb)$treatment %in% c("ALDO", "CTRL")
pb_sub <- pb[, keep]
colData(pb_sub)$treatment <- droplevels(factor(colData(pb_sub)$treatment, levels = c("CTRL","ALDO")))
table(colData(pb_sub)$treatment)

# Create DGEList 
library(edgeR)
counts_mat <- assay(pb_sub, "counts")
dge <- DGEList(counts = counts_mat)
dge$samples$sample <- colData(pb_sub)$sample
dge$samples$treatment <- colData(pb_sub)$treatment
# if ("batch" %in% colnames(colData(pb_sub))) dge$samples$batch <- colData(pb_sub)$batch
if ("sex"  %in% colnames(colData(pb_sub))) dge$samples$sex  <- colData(pb_sub)$sex

# Filter lowly-expressed genes 
cpm_mat <- edgeR::cpm(dge)
min_samps <- max(2, floor(ncol(dge)/4))
keep_genes <- rowSums(cpm_mat > 1) >= min_samps
table(keep_genes)
dge <- dge[keep_genes, , keep.lib.sizes = FALSE]

# TMM normalization 
dge <- edgeR::calcNormFactors(dge, method = "TMM")

# Design matrix 
design_terms <- "treatment"
# if ("batch" %in% colnames(dge$samples)) design_terms <- paste("batch +", design_terms)
if ("sex" %in% colnames(dge$samples)) design_terms <- paste(design_terms, "+ sex")
design <- model.matrix(as.formula(paste("~", design_terms)), data = dge$samples)
colnames(design) <- make.names(colnames(design))
design

# Voom transformation 
library(limma)
v <- voom(dge, design = design, plot = TRUE)

# Fit linear model 
fit <- lmFit(v, design)
fit <- eBayes(fit)

# Extract ALDO vs CTRL results 
coef_candidates <- grep("ALDO", colnames(design), value = TRUE)
coef_to_use <- coef_candidates[1]  
res <- topTable(fit, coef = coef_to_use, number = Inf, sort.by = "P")
res$gene <- rownames(res)

# Save results 
out_file <- paste0("DEGs/limma_pseudo-bulk/degs_ALDO_vs_CTRL_limma_LV_", celltype, "_wobatch.csv")
write.csv(res, out_file, row.names = FALSE)


## REC vs. ALDO

In [None]:
celltype <- "CM"   

# Subset Seurat object to chosen cell type 
obj_sub <- subset(obj, subset = cell_type_comb == celltype)

# Convert to SingleCellExperiment 
sce_sub <- as.SingleCellExperiment(obj_sub)

#  Aggregate to pseudo-bulk 
pb <- aggregateAcrossCells(
    sce_sub,
    ids = DataFrame(
        sample = sce_sub$sample_id,
        treatment = sce_sub$treatment
    )
)

#  Keep only ALDO and CTRL (or any two conditions you want) 
keep <- colData(pb)$treatment %in% c("REC", "ALDO")
pb_sub <- pb[, keep]
colData(pb_sub)$treatment <- droplevels(factor(colData(pb_sub)$treatment, levels = c("ALDO","REC")))
table(colData(pb_sub)$treatment)

# Create DGEList 
counts_mat <- assay(pb_sub, "counts")
dge <- DGEList(counts = counts_mat)
dge$samples$sample <- colData(pb_sub)$sample
dge$samples$treatment <- colData(pb_sub)$treatment
if ("sex"  %in% colnames(colData(pb_sub))) dge$samples$sex  <- colData(pb_sub)$sex

# Filter lowly-expressed genes 
cpm_mat <- edgeR::cpm(dge)
min_samps <- max(2, floor(ncol(dge)/4))
keep_genes <- rowSums(cpm_mat > 1) >= min_samps
table(keep_genes)
dge <- dge[keep_genes, , keep.lib.sizes = FALSE]

# TMM normalization 
dge <- edgeR::calcNormFactors(dge, method = "TMM")

# Design matrix 
design_terms <- "treatment"
if ("sex" %in% colnames(dge$samples)) design_terms <- paste(design_terms, "+ sex")
design <- model.matrix(as.formula(paste("~", design_terms)), data = dge$samples)
colnames(design) <- make.names(colnames(design))
design

# Voom transformation 
v <- voom(dge, design = design, plot = TRUE)

# Fit linear model 
fit <- lmFit(v, design)
fit <- eBayes(fit)

# Extract ALDO vs CTRL results 
coef_candidates <- grep("REC", colnames(design), value = TRUE)
coef_to_use <- coef_candidates[1]  
res <- topTable(fit, coef = coef_to_use, number = Inf, sort.by = "P")
res$gene <- rownames(res)

# Save results 
out_file <- paste0("DEGs/limma_pseudo-bulk/degs_REC_vs_ALDO_limma_LV_", celltype, "_wobatch.csv")
write.csv(res, out_file, row.names = FALSE)


In [2]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 24.04.1 LTS

Matrix products: default
BLAS/LAPACK: /media/daten/dmeral/micromamba/envs/scrna_dm/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] limma_3.58.1                scuttle_1.12.0             
 [3] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
 [5] Biobase_2.62.0              MatrixGenerics_1.14.0      
 [7] matrixStats_1.5.0           svglite_2.1.3              
 [9] Rsamtools_2.18.0