# Load libraries and Themes

In [1]:
suppressPackageStartupMessages({
    suppressWarnings({
        library(Seurat)
        library(ggplot2)
        library(tidyverse)
        library(presto)
        library(SeuratDisk)
        library(DESeq2)
        library(edgeR)
        library(EnhancedVolcano)
        library(Rsamtools)
        library(svglite)
        library(viridis)
        library(pals)
        library(scales)
        library(sva)
        })})

In [None]:
#Color Palettes
    
palette.treatment <- c(
    "#AF0000", # ALDO
    "#C1C1C1", # CTRL
    "#007D7D"  # REC
)

In [None]:
umap_theme <- theme(
  axis.line=element_blank(),
  axis.text.x=element_blank(),
  axis.text.y=element_blank(),
  axis.ticks=element_blank(),
  axis.title.x=element_blank(),
  axis.title.y=element_blank(),
  panel.background=element_blank(),
  panel.border=element_blank(),
  panel.grid.major=element_blank(),
  panel.grid.minor=element_blank()
)

In [None]:
setwd("/media/daten/dmeral/scseq_analysis/2024_LV_CTRL_ALDO_REC")

In [None]:
set.seed(1234)

In [None]:
obj <- LoadH5Seurat("seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename_CMcomb_onlyprotcod_ccscore.h5seurat")

# DEG analysis

In [None]:
cluster_annotations <- list(
    "0" = "CM_0",
    "1" = "EC-cap",
    "2" = "FB",
    "3" = "PER",
    "4" = "MΦ",
    "5" = "EC-art",
    "6" = "EC-end",
    "7" = "SMC",
    "8" = "CM_1",
    "9" = "EC-lym",
    "10" = "CYC",
    "11" = "CM_2",
    "12" = "BC",
    "13" = "SC",
    "14" = "TC")

In [None]:
cluster_annotations_CMcomb <- list(
    "0" = "CM",
    "1" = "EC-cap",
    "2" = "FB",
    "3" = "PER",
    "4" = "MΦ",
    "5" = "EC-art",
    "6" = "EC-end",
    "7" = "SMC",
    "8" = "CM",
    "9" = "EC-lym",
    "10" = "CYC",
    "11" = "CM",
    "12" = "BC",
    "13" = "SC",
    "14" = "TC")

## Wilcoxon Rank Sum test all combinations

In [None]:
doAnalysis <- function(LA_LV, cluster_annotations) {
    # Subset based on chamber
    seurat_table <- subset(x = obj, subset = chamber == LA_LV)
   
    # Set treatment as identity class
    Idents(seurat_table) <- seurat_table@meta.data$"treatment"
    
    # Initialize a list to store DE results for each comparison
    results <- list()
    
    # Loop over each cluster annotation
    for (cl in names(cluster_annotations)) {
        name_of_clust <- cluster_annotations[[cl]]
        
        # Subset by cluster
        my_subseur <- subset(x = seurat_table, subset = seurat_clusters == cl)
        my_subseur <- NormalizeData(my_subseur)
        my_subseur <- ScaleData(my_subseur)
        
        # Perform comparisons for each pairwise condition
        comparisons <- list(
            "ALDO_vs_CTRL" = c("ALDO", "CTRL"),
            "REC_vs_CTRL" = c("REC", "CTRL"),
            "ALDO_vs_REC" = c("ALDO", "REC")
        )
        
        # Loop through each comparison
        cluster_comparison_results <- list()
        for (comparison_name in names(comparisons)) {
            ident.1 <- comparisons[[comparison_name]][1]
            ident.2 <- comparisons[[comparison_name]][2]
            
            # Find markers
            my_markers <- FindMarkers(my_subseur, ident.1 = ident.1, ident.2 = ident.2, assay = "RNA", test.use = "wilcox", logfc.threshold = 0) # modify treshold
            
            # Save to CSV
            file_name <- paste0("DEGs/nothreshold/", comparison_name, "_WRSum/degs_wilcox_", LA_LV, "_", name_of_clust, ".csv") # notreshold for logfc.threshold = 0
            write.csv(my_markers, file = file_name)
            
            # Store results
            cluster_comparison_results[[comparison_name]] <- my_markers
        }
        
        # Store the results for this cluster
        results[[name_of_clust]] <- cluster_comparison_results
    }
    
    return(results)
}

In [None]:
doAnalysis("LV", cluster_annotations)

In [None]:
# For combined CM cluster
doCMAnalysis <- function(LA_LV) {
    # Subset the Seurat object for the selected chamber
    seurat_table <- subset(obj, subset = chamber == LA_LV)
    
    # Assign "treatment" as the active identity class
    Idents(seurat_table) <- seurat_table@meta.data$"treatment"
    
    # Subset for the combined "CM" cluster (using Cardiomyocyte column)
    my_subseur <- subset(seurat_table, subset = Cardiomyocyte == "CM")
    
    # Check if there are any cells in the CM cluster
    if (ncol(my_subseur) == 0) {
        stop("No cells found in CM cluster.")
    }
    
    # Preprocess data for the combined CM cluster
    my_subseur <- NormalizeData(my_subseur)
    my_subseur <- ScaleData(my_subseur)
    
    # Run differential expression comparisons
    aldo_vs_ctrl <- FindMarkers(my_subseur, ident.1 = "ALDO", ident.2 = "CTRL", assay = "RNA", test.use = "wilcox", logfc.threshold = 0)
    write.csv(aldo_vs_ctrl, file = paste0("DEGs/nothreshold/ALDO_vs_CTRL_WRSum/degs_wilcox_", LA_LV, "_CM.csv"))
    
    rec_vs_ctrl <- FindMarkers(my_subseur, ident.1 = "REC", ident.2 = "CTRL", assay = "RNA", test.use = "wilcox", logfc.threshold = 0)
    write.csv(rec_vs_ctrl, file = paste0("DEGs/nothreshold/REC_vs_CTRL_WRSum/degs_wilcox_", LA_LV, "_CM.csv"))
    
    aldo_vs_rec <- FindMarkers(my_subseur, ident.1 = "ALDO", ident.2 = "REC", assay = "RNA", test.use = "wilcox", logfc.threshold = 0)
    write.csv(aldo_vs_rec, file = paste0("DEGs/nothreshold/ALDO_vs_REC_WRSum/degs_wilcox_", LA_LV, "_CM.csv"))
    
    # Return results in a list
    return(list(ALDO_vs_CTRL = aldo_vs_ctrl, REC_vs_CTRL = rec_vs_ctrl, ALDO_vs_REC = aldo_vs_rec))
}

In [None]:
doCMAnalysis("LV")

### filter mt-genes, p_val_adj < 0.05 and avg_log2FC < -0.58 | avg_log2FC > 0.58

In [None]:
# mt_genes.csv is a list of mt-genes with >0 counts in the dataset

# Load mt-genes
mt_genes <- unlist(read.csv("DEGs/mt_genes.csv", header = FALSE))

# List of DEG files ALDO vs. CTRL
file_names <- list.files("DEGs/ALDO_vs_CTRL_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            !Gene_names %in% mt_genes,                  # Exclude mt-genes
            (avg_log2FC < -0.58 | avg_log2FC > 0.58),   # Filter by avg_log2FC
            p_val_adj < 0.05                            # Filter by adjusted p-value
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/ALDO_vs_CTRL_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# List of DEG files ALDO vs. REC
file_names <- list.files("DEGs/ALDO_vs_REC_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            !Gene_names %in% mt_genes,                  # Exclude mt-genes
            (avg_log2FC < -0.58 | avg_log2FC > 0.58),   # Filter by avg_log2FC
            p_val_adj < 0.05                            # Filter by adjusted p-value
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/ALDO_vs_REC_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# List of DEG files REC vs. CTRL
file_names <- list.files("DEGs/REC_vs_CTRL_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            !Gene_names %in% mt_genes,                  # Exclude mt-genes
            (avg_log2FC < -0.58 | avg_log2FC > 0.58),   # Filter by avg_log2FC
            p_val_adj < 0.05                            # Filter by adjusted p-value
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/REC_vs_CTRL_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# Create REC_vs_ALDO from ALDO_vs_REC by changing sign of avg_log2FC column
# Directory containing the original files
input_dir <- "DEGs/ALDO_vs_REC_WRSum/filtered/"

# Directory to save modified files
output_dir <- "DEGs/REC_vs_ALDO_WRSum/filtered/"

# Create output directory if it doesn't exist
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

# List all CSV files in the input directory
file_names <- list.files(input_dir, pattern = "\\.csv$", full.names = TRUE)

# Process each file
lapply(file_names, function(file) {
  # Read the CSV file
  data <- read.csv(file, header = TRUE)
  
  # Switch the sign of the avg_log2FC column
  if ("avg_log2FC" %in% colnames(data)) {
    data$avg_log2FC <- -data$avg_log2FC
  } else {
    stop(paste("Column 'avg_log2FC' not found in file:", file))
  }
  
  # Construct the output file path
  output_file <- file.path(output_dir, basename(file))
  
  # Save the modified data to the output directory
  write.csv(data, output_file, row.names = FALSE)
})

In [None]:
# Create REC_vs_ALDO from ALDO_vs_REC by changing sign of avg_log2FC column (for unfiltered)
input_dir <- "DEGs/ALDO_vs_REC_WRSum/"

output_dir <- "DEGs/REC_vs_ALDO_WRSum/"

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

file_names <- list.files(input_dir, pattern = "\\.csv$", full.names = TRUE)

lapply(file_names, function(file) {
  data <- read.csv(file, header = TRUE)
  
  if ("avg_log2FC" %in% colnames(data)) {
    data$avg_log2FC <- -data$avg_log2FC
  } else {
    stop(paste("Column 'avg_log2FC' not found in file:", file))
  }
  
  output_file <- file.path(output_dir, basename(file))
  
  write.csv(data, output_file, row.names = FALSE)
})

### Filter only protein coding genes (remove GM... and ...Rik) and non mt-genes 

In [None]:
# Load the list of protein-coding genes
protein_coding_genes <- unlist(read.csv("DEGs/nothreshold/protein_coding_gene_names_filtered.txt", header = TRUE, stringsAsFactors = FALSE))

# List of DEG files ALDO vs. CTRL
file_names <- list.files("DEGs/nothreshold/ALDO_vs_CTRL_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            Gene_names %in% protein_coding_genes               # Keep only protein-coding genes
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/nothreshold/ALDO_vs_CTRL_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# filter only protein coding genes (remove GM... and ...Rik) and non mt-genes 

# Load the list of protein-coding genes
protein_coding_genes <- unlist(read.csv("DEGs/nothreshold/protein_coding_gene_names_filtered.txt", header = TRUE, stringsAsFactors = FALSE))

# List of DEG files for ALDO vs. REC
file_names <- list.files("DEGs/nothreshold/ALDO_vs_REC_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            Gene_names %in% protein_coding_genes               # Keep only protein-coding genes
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/nothreshold/ALDO_vs_REC_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# filter only protein coding genes (remove GM... and ...Rik) and non mt-genes 

# Load the list of protein-coding genes
protein_coding_genes <- unlist(read.csv("DEGs/nothreshold/protein_coding_gene_names_filtered.txt", header = TRUE, stringsAsFactors = FALSE))

# List of DEG files for REC vs. CTRL
file_names <- list.files("DEGs/nothreshold/REC_vs_CTRL_WRSum", pattern = "*.csv", full.names = TRUE)

# Load and process DEG files
listDEG <- lapply(file_names, function(deg_list) {
    read.csv(deg_list, header = TRUE, row.names = 1)
})

# Add filtering criteria
listDEG <- lapply(listDEG, function(deg_list) {
    deg_list <- cbind(deg_list, Gene_names = rownames(deg_list))
    deg_list_filtered <- deg_list %>%
        filter(
            Gene_names %in% protein_coding_genes               # Keep only protein-coding genes
        )
    deg_list_filtered <- deg_list_filtered[, !names(deg_list_filtered) %in% c("Gene_names")]
    return(deg_list_filtered)
})

# Assign names to the DEG list
listDEG_names <- gsub("\\..*", "", gsub(".*wilcox_", "", file_names))
names(listDEG) <- listDEG_names

# Save filtered DEG files
sapply(names(listDEG), function(name) {
    write.csv(
        listDEG[[name]],
        file = paste0("DEGs/nothreshold/REC_vs_CTRL_WRSum/filtered/degs_wilcox_", name, "_filtered.csv")
    )
})

In [None]:
# Create REC_vs_ALDO from ALDO_vs_REC by changing sign of avg_log2FC column (for unfiltered)
input_dir <- "DEGs/nothreshold/ALDO_vs_REC_WRSum/filtered/"

output_dir <- "DEGs/nothreshold/REC_vs_ALDO_WRSum/filtered/"

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

file_names <- list.files(input_dir, pattern = "\\.csv$", full.names = TRUE)

lapply(file_names, function(file) {
  data <- read.csv(file, header = TRUE)
  
  if ("avg_log2FC" %in% colnames(data)) {
    data$avg_log2FC <- -data$avg_log2FC
  } else {
    stop(paste("Column 'avg_log2FC' not found in file:", file))
  }
  
  output_file <- file.path(output_dir, basename(file))
  
  write.csv(data, output_file, row.names = FALSE)
})

# MR target gene signature

In [None]:
# Run for MT target genes and use normalized counts directly, no weighing by log2FC
# Subset Seurat object by cluster
cluster_ID <- "CM"
obj_cluster <- subset(obj, subset = cell_type_CMcomb == cluster_ID)

# Get sample-wise metadata
sample_metadata <- obj_cluster@meta.data$treatment

# Extract key genes
key_genes <- c("Per1", "Per3", "Dbp", "Zbtb16", "Hlf")

# Get expression data for key genes from the Seurat object
expr_data <- FetchData(obj_cluster, vars = key_genes)

# Get sample IDs and add them to expression data
expr_data$sample_id <- obj_cluster@meta.data$sample_id

# Reshape the data to long format and calculate mean expression per sample
expr_data_long <- expr_data %>%
  gather(key = "gene", value = "expression", -sample_id) %>%
  group_by(sample_id, gene) %>%
  summarize(mean_expression = mean(expression), .groups = "drop")

# Min-max scale the mean expression for each gene to be between 0 and 1
scaled_expr_data <- expr_data_long %>%
  group_by(gene) %>%
  mutate(scaled_expression = scales::rescale(mean_expression)) %>%
  ungroup()

# Create scores based on the scaled expression values
scores <- data.frame(sample_id = unique(scaled_expr_data$sample_id))

# Create table with "score_XXX" solumns with "XXX" being an entry of key_genes
for (i in 1:length(key_genes)) {
  gene_expression <- scaled_expr_data$scaled_expression[scaled_expr_data$gene == key_genes[i]]
  scores[paste0("score_", key_genes[i])] <- gene_expression
}

# Sum scores across key genes to get total score for each sample
scores$total_score <- rowSums(scores[, -1], na.rm = TRUE)

# Normalize total score across samples
scores$normalized_score <- scales::rescale(scores$total_score)

# Add sample metadata to the weighted scores table for better visualization
scores$treatment <- obj_cluster@meta.data$treatment[match(scores$sample_id, obj_cluster@meta.data$sample_id)]

# View results
scores

# Save results as CSV
write.csv(scores, paste0("DEGs/nothreshold/recovery_score/GOI_polygenic_recovery_scores_scaled_", cluster_ID, ".csv"), row.names = FALSE)

In [2]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS/LAPACK: /media/daten/dmeral/micromamba/envs/scrna_dm/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] sva_3.50.0                  BiocParallel_1.36.0        
 [3] genefilter_1.84.0           mgcv_1.9-1                 
 [5] nlme_3.1-166                scales_1.3.0               
 [7] pals_1.9           