# Load libraries and Themes

In [3]:
suppressPackageStartupMessages({
    suppressWarnings({
        library(Seurat)
        library(ggplot2)
        library(tidyverse)
        library(SeuratDisk)
        library(SingleCellExperiment)
        library(DESeq2)
        library(dplyr)
        library(EnhancedVolcano)
        library(Rsamtools)
        library(svglite)
        library(viridis)
        library(harmony)
        library(patchwork)
        library(ggpubr)
        library(pals)
        library(RCurl)
        library(AnnotationHub)
        library(ensembldb)
        library(networkD3)
            })})

In [None]:
#Color Palettes
palette.3 <- c(
    "#440154", #G1
    "#21908d", #S
    "#fde725" #G2M
)

palette.12 <- DiscretePalette(12, palette = "stepped", shuffle = TRUE)

palette.15 <- DiscretePalette(15, palette = "stepped", shuffle = TRUE)

palette.17 <- DiscretePalette(17, palette = "stepped", shuffle = TRUE)

palette.CMcomb <- c(
    "#aad902", #BC 
    "#990902", #CM
    "#aba790", #CYC
    "#005f99", #ECart   
    "#05b1eb", #ECcap
    "#0303a3", #ECend
    "#003ae8", #EClym    
    "#ebe6c7", #FB
    "#077a01", #MP
    "#422c00", #PER
    "#FFD100", #SC
    "#ffab00", #SMC
    "#56B400"  #TC
)

palette.treatment <- c(
    "#C1C1C1", # CTRL
    "#AF0000", # ALDO
    "#006AF3"  # REC
)

palette.treatmentid <- c(
    "#C1C1C1", # CTRL
    "#C1C1C1", # CTRL
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#C1C1C1", # CTRL
    "#C1C1C1", # CTRL
    "#006AF3",  # REC
    "#006AF3",  # REC
    "#006AF3",  # REC
    "#006AF3"  # REC
)

palette.sex <- c(
    "#006AF3", 
    "#AF0000" ) #female

In [None]:
umap_theme <- theme(
  axis.line=element_blank(),
  axis.text.x=element_blank(),
  axis.text.y=element_blank(),
  axis.ticks=element_blank(),
  axis.title.x=element_blank(),
  axis.title.y=element_blank(),
  panel.background=element_blank(),
  panel.border=element_blank(),
  panel.grid.major=element_blank(),
  panel.grid.minor=element_blank()
)

In [None]:
setwd("/media/daten/dmeral/scseq_analysis/2024_LV_CTRL_ALDO_REC")

In [None]:
set.seed(1234)

# Identify viable clusters and cell types

In [None]:
obj_harmony_addmodule <- LoadH5Seurat("seurat_objects/obj_seu_merge_harmony_sgl_addmodule.h5seurat")

In [None]:
# Remove doublet cluster 15
options(repr.plot.width = 5, repr.plot.height = 4, repr.plot.res = 300) 

VlnPlot1 <- VlnPlot(obj_harmony, group.by = "seurat_clusters", 
                      features = c("scDblFinder.weighted"),
                      pt.size = 0, raster = TRUE, ncol = 2, cols = palette.21) & NoLegend()


VlnPlot1

In [None]:
# manually exclude cluster#15 due to high dbl score and uneven sample composition
obj <- subset(obj_harmony_addmodule, seurat_clusters %in% c(15) == FALSE)

## FindAllMarkers

In [None]:
# Sanity check
options(repr.plot.width = 8, repr.plot.height = 3, repr.plot.res = 300)

DimPlot(obj, reduction = "umap", label = TRUE, label.size = 5, shuffle = TRUE, group.by = "seurat_clusters", cols = palette.15) + umap_theme + NoLegend() + theme(text = element_text(size = 10))|
DimPlot(obj, reduction = "umap", label = FALSE, label.size = 5, shuffle = TRUE, group.by = "treatment", cols = palette.treatment) + umap_theme + theme(text = element_text(size = 10))

In [None]:
all.markers <- FindAllMarkers(obj, only.pos = TRUE, min.pct = 0.50, logfc.threshold = 0.58, test.use = "wilcox")

write.csv(all.markers, file = "DEGs/all.markers_wilcox.csv")

## Rename Cluster annotations

In [None]:
cluster_annotations <- list(
    "0" = "CM_0",
    "1" = "EC-cap",
    "2" = "FB",
    "3" = "PER",
    "4" = "MΦ",
    "5" = "EC-art",
    "6" = "EC-end",
    "7" = "SMC",
    "8" = "CM_1",
    "9" = "EC-lym",
    "10" = "CYC",
    "11" = "CM_2",
    "12" = "BC",
    "13" = "SC",
    "14" = "TC")

In [None]:
cluster_annotations <- unlist(cluster_annotations, use.names = FALSE)

obj@meta.data$Ident_numerical <- Idents(obj)
names(cluster_annotations) <- levels(obj)
obj <- RenameIdents(obj, cluster_annotations)
obj@meta.data$cell_type <- Idents(obj)
Idents(obj) <- obj@meta.data$"Ident_numerical"

SaveH5Seurat(obj, "seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename")

In [None]:
# Cellnumbers after rename
number <- table(obj@meta.data$sample_id, 
                          obj@meta.data$seurat_clusters)
write.csv(number, file = "cellnumbers/number_perCluster_sample_id_sgl_rename.csv")

number <- table(obj@meta.data$treatment, 
                          obj@meta.data$seurat_clusters)
write.csv(number, file = "cellnumbers/number_perCluster_teatment_sgl_rename.csv")

In [None]:
# Combine CM clusters 
cluster_annotations_CMcomb <- list(
    "0" = "CM",
    "1" = "EC-cap",
    "2" = "FB",
    "3" = "PER",
    "4" = "MΦ",
    "5" = "EC-art",
    "6" = "EC-end",
    "7" = "SMC",
    "8" = "CM",
    "9" = "EC-lym",
    "10" = "CYC",
    "11" = "CM",
    "12" = "BC",
    "13" = "SC",
    "14" = "TC")

In [None]:
names(cluster_annotations_CMcomb) <- levels(obj)
obj <- RenameIdents(obj, cluster_annotations_CMcomb)

obj$"cell_type_CMcomb" <- Idents(obj)

In [None]:
# Reorder cell types (levels) 
new_levels <- c(
    "BC",      
    "CM",    
    "CYC", 
    "EC-art", 
    "EC-cap",
    "EC-end", 
    "EC-lym", 
    "FB",     
    "MΦ",  
    "PER",   
    "SC",    
    "SMC",   
    "TC"   
)

# Reorder the levels in the cell_type_CMcomb column
obj@meta.data$cell_type_CMcomb <- factor(
    obj@meta.data$cell_type_CMcomb, 
    levels = new_levels
)

# Verify the new order of levels
levels(obj@meta.data$cell_type_CMcomb)

obj_CMcomb <- obj

In [None]:
SaveH5Seurat(obj, "seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename_CMcomb")

## Plots of markers

In [None]:
Top5_markers_CMcomb <- list(
      "12" = c("Bank1", "Aff3", "Dock2", "Prkcb", "Pax5"),
      "0" = c("Ttn", "Rbm20", "Fhod3", "Ryr2", "Pde4dip"),
      "10" = c("Diaph3", "Rad51b", "Top2a", "Mki67", "Knl1"),
      "5" = c("St6galnac3", "Ldb2", "Vegfc", "Mecom", "Rasgrf2"),
      "1" = c("Flt1", "Cyyr1", "Adgrl4", "Plcb1", "Ptprb"),
      "6" = c("Tmem108", "Chrm3", "Npr3", "Cgnl1", "Cdh11"),
      "9" = c("Reln", "Mmrn1", "Flt4", "Wdr17", "Pard6g"),
      "2" = c("Pcdh9", "Bicc1", "Abca8a", "Gpc6", "Kcnt2"),
      "4" = c("Slc9a9", "Mrc1", "Mctp1", "Arhgap15", "F13a1"),
      "3" = c("Trpc3", "Pdgfrb", "Pde8b", "Ano1", "Notch3"),
      "13" = c("Csmd1", "Cadm2", "Slc35f1", "Adam23", "Nkain2"),
      "7" = c("Myh11", "Dgkb", "Kcnab1", "Mrvi1", "Mylk"),
      "14" = c("Skap1", "Ptprc", "Gm2682", "Ikzf1", "Grap2"))

# Check for duplicates in "Top5_markers" 
marker_df <- data.frame(
  Group = rep(names(Top5_markers_CMcomb), times = sapply(Top5_markers_CMcomb, length)),
  Marker = unlist(Top5_markers_CMcomb)
)

duplicated_markers <- marker_df$Marker[duplicated(marker_df$Marker)]

duplicated_info <- marker_df[marker_df$Marker %in% duplicated_markers, ]

print(duplicated_info)

In [None]:
options(repr.plot.width = 15, repr.plot.height = 4, repr.plot.res = 300)

DOT_Top5_markers <- DotPlot(
  obj,
  features = unlist(Top5_markers_CMcomb, use.names = FALSE), group.by = "cell_type_CMcomb") + 
  geom_point(aes(size = pct.exp), shape = 21, colour = "black", stroke = 0.5) +  
  guides(
    size = guide_legend(override.aes = list(shape = 21, colour = "black", fill = "white")),  
    colour = guide_colorbar(frame.colour = "black", ticks.colour = "black")  
  ) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(panel.border = element_rect(color = "black", linewidth = 0.5)) +
  scale_colour_distiller(palette = "OrRd", direction = 0) +
  #scale_colour_viridis_c(option = "D", direction = 1) +  
  scale_y_discrete(limits = rev)

num_features <- length(unlist(Top5_markers_CMcomb, use.names = FALSE)) 
vline_positions <- seq(5.5, num_features, by = 5)

DOT_Top5_markers <- DOT_Top5_markers + 
  geom_vline(xintercept = vline_positions, color = "black", linetype = "solid", size = 0.5)

ggsave("Plots/DOT_Top5_markers_CMcomb.svg", width = 38, height = 10, units = "cm", dpi = 300)

DOT_Top5_markers

## Visualize QC and UMAP

In [None]:
# Cluster-wise QC
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)

VlnPlot_QC <- VlnPlot(obj, group.by = "cell_type_CMcomb", 
                      features = c("nCount_RNA", "nFeature_RNA", "percent.mt", "scDblFinder.weighted"),
                      pt.size = 0, raster = TRUE, ncol = 2, cols = palette.CMcomb)

plots <- lapply(1:4, function(i) {
  if (i == 4) {
    VlnPlot_QC[[i]] + theme(
      axis.text.x = element_text(size = 15),  
      axis.text.y = element_text(size = 15),  
      axis.ticks.x = element_line(),   
      axis.title.x = element_blank(),
      plot.title = element_text(size = 15)  
    )
  } else {
    VlnPlot_QC[[i]] + theme(
      axis.text.x = element_blank(), 
      axis.text.y = element_text(size = 15),  
      axis.ticks.x = element_blank(), 
      axis.title.x = element_blank(),
      plot.title = element_text(size = 15)  
    )
  }
})

combined_plot <- wrap_plots(plots, ncol = 2)

ggsave("Plots/VlnPlot_QC_by_cell_type_CMcomb.svg", combined_plot, units = "cm", dpi = 300, width = 30, height = 20)

suppressWarnings(print(combined_plot))

In [None]:
# Sample-wise QC
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)

VlnPlot_QC <- VlnPlot(obj, group.by = "sample_id", 
                      features = c("nCount_RNA", "nFeature_RNA", "percent.mt", "scDblFinder.weighted"),
                      pt.size = 0, raster = TRUE, ncol = 2, cols = palette.treatmentid)

plots <- lapply(1:4, function(i) {
  if (i == 4) {
    VlnPlot_QC[[i]] + theme(
      axis.text.x = element_text(size = 15),  
      axis.text.y = element_text(size = 15),  
      axis.ticks.x = element_line(),   
      axis.title.x = element_blank(),
      plot.title = element_text(size = 15)  
    )
  } else {
    VlnPlot_QC[[i]] + theme(
      axis.text.x = element_blank(), 
      axis.text.y = element_text(size = 15),  
      axis.ticks.x = element_blank(), 
      axis.title.x = element_blank(),
      plot.title = element_text(size = 15)  
    )
  }
})

combined_plot <- wrap_plots(plots, ncol = 2)

ggsave("Plots/VlnPlot_QC_by_sample_id.svg", combined_plot, units = "cm", dpi = 300, width = 30, height = 20)

suppressWarnings(print(combined_plot))

In [None]:
# UMAP plots with all genes
options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

UMAP_rename_seurat_clusters <- DimPlot(obj, reduction = "umap_all", group.by = "seurat_clusters", label = TRUE, shuffle = TRUE, cols = palette.15) +
  umap_theme + ggtitle("UMAP seurat clusters") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_cell_type <- DimPlot(obj, reduction = "umap_all", group.by = "cell_type_CMcomb", label = TRUE, shuffle = TRUE, cols = palette.CMcomb) +
  umap_theme + ggtitle("UMAP cell types") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_treatment <- DimPlot(obj, reduction = "umap_all", label = FALSE, group.by = "treatment", shuffle = TRUE, cols = palette.treatment) +
  umap_theme + ggtitle("UMAP treatment") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_sample_id <- DimPlot(obj, reduction = "umap_all", label = FALSE, group.by = "sample_id", shuffle = TRUE, cols = palette.12) +
  umap_theme + ggtitle("UMAP sample_id") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_sex <- DimPlot(obj, reduction = "umap_all", label = FALSE, group.by = "sex", shuffle = TRUE, cols = palette.sex) +
  umap_theme + ggtitle("UMAP sex") + theme(text = element_text(size = 15)) & NoLegend()

ggsave("Plots/UMAP_all_genes/UMAP_rename_seurat_clusters.svg", UMAP_rename_seurat_clusters, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_all_genes/UMAP_rename_cell_type.svg", UMAP_rename_cell_type, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_all_genes/UMAP_rename_treatment.svg", UMAP_rename_treatment, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_all_genes/UMAP_rename_sample_id.svg", UMAP_rename_sample_id, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_all_genes/UMAP_rename_sex.svg", UMAP_sex, units = "cm", dpi = 300, width = 30, height = 20)

UMAP_rename_seurat_clusters
suppressWarnings(print(UMAP_rename_cell_type))
UMAP_rename_treatment
UMAP_rename_sample_id
UMAP_sex

## Do PCA anaylsis with only protein coding genes 

In [None]:
# Save reductions calculated with all genes to "pca_all"
obj@reductions$pca_all <- obj@reductions$pca
obj@reductions$umap_all <- obj@reductions$umap
obj@reductions$harmony_all <- obj@reductions$harmony

names(obj@reductions)

In [None]:
# .txt contains protein-coding genes based on the reference genome annotation refdata-gex-mm10-2020-A
protein_coding_genes <- unlist(read.csv("DEGs/nothreshold/protein_coding_gene_names_filtered.txt", header = TRUE, stringsAsFactors = FALSE))  

In [None]:
# Ensure the protein-coding genes are present in the dataset
selected_genes <- protein_coding_genes[protein_coding_genes %in% rownames(obj)]

# Normalize and scale the data for the selected genes
obj <- ScaleData(obj, features = selected_genes, verbose = FALSE)

# Run PCA using only the selected protein-coding genes
obj <- RunPCA(obj, features = selected_genes, npcs = 35, reduction.name = "pca_protein_coding", verbose = FALSE)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4, repr.plot.res = 300)

plot1 <- DimPlot(obj, reduction = "pca_all", dims = c(1, 5), shuffle = TRUE, group.by = "batch", cols = palette.treatment) + ggtitle("all genes")
plot2 <- DimPlot(obj, reduction = "pca_protein_coding", dims = c(1, 6), shuffle = TRUE, group.by = "batch", cols = palette.treatment) + ggtitle("only protein_coding")

# Combine the plots side by side
combined_plot <- plot1|plot2

# Display the combined plot
print(combined_plot)

# Save the combined plot to a file
ggsave("Plots/PCA_Batch_Comparison_protein_coding.svg", plot = combined_plot, width = 20, height = 9, scale = 1,  units = "cm", dpi = 300)

In [None]:
# Integrate "batch"
obj$batch <- as.factor(obj$batch)

obj <- obj %>%
  RunHarmony(group.by.vars = c("batch"), plot_convergence = TRUE, assay.use = "RNA", reduction = "pca_protein_coding", reduction.save = "harmony_protein_coding", verbose = FALSE)

# Run UMAP
obj <- obj %>%
  RunUMAP(dims = 1:35, reduction = "harmony_protein_coding", reduction.name = "umap_protein_coding", verbose = FALSE) %>%
  FindNeighbors(reduction = "harmony_protein_coding", dims = 1:35, verbose = FALSE) %>%
  FindClusters(resolution = 0.25)

In [None]:
# Save reductions calculated with only protein coding genes as dafault
obj@reductions$pca <- obj@reductions$pca_protein_coding
obj@reductions$umap <- obj@reductions$umap_protein_coding
obj@reductions$harmony <- obj@reductions$harmony_protein_coding

names(obj@reductions)

In [None]:
# Reassign initial seurat_clusters back to object
obj$seurat_clusters_protein_coding <- obj$seurat_clusters
obj$seurat_clusters <- obj_CMcomb$seurat_clusters

In [None]:
SaveH5Seurat(obj, "seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename_CMcomb_onlyprotcod")

In [None]:
# UMAP plots with only protein coding genes
options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

UMAP_rename_seurat_clusters <- DimPlot(obj, reduction = "umap_protein_coding", group.by = "seurat_clusters", label = TRUE, shuffle = TRUE, cols = palette.17) +
  umap_theme + ggtitle("UMAP seurat clusters") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_cell_type <- DimPlot(obj, reduction = "umap_protein_coding", group.by = "cell_type_CMcomb", label = TRUE, shuffle = TRUE, cols = palette.CMcomb) +
  umap_theme + ggtitle("UMAP cell types") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_treatment <- DimPlot(obj, reduction = "umap_protein_coding", label = FALSE, group.by = "treatment", shuffle = TRUE, cols = palette.treatment) +
  umap_theme + ggtitle("UMAP treatment") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_sample_id <- DimPlot(obj, reduction = "umap_protein_coding", label = FALSE, group.by = "sample_id", shuffle = TRUE, cols = palette.12) +
  umap_theme + ggtitle("UMAP sample_id") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_sex <- DimPlot(obj, reduction = "umap_protein_coding", label = FALSE, group.by = "sex", shuffle = TRUE, cols = palette.sex) +
  umap_theme + ggtitle("UMAP sex") + theme(text = element_text(size = 15)) & NoLegend()

ggsave("Plots/UMAP_rename_seurat_clusters.svg", UMAP_rename_seurat_clusters, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_cell_type.svg", UMAP_rename_cell_type, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_treatment.svg", UMAP_rename_treatment, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_sample_id.svg", UMAP_rename_sample_id, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_sex.svg", UMAP_sex, units = "cm", dpi = 300, width = 30, height = 20)

UMAP_rename_seurat_clusters
suppressWarnings(print(UMAP_rename_cell_type))
UMAP_rename_treatment
UMAP_rename_sample_id
UMAP_sex

In [None]:
# Density plot UMAP
options(repr.plot.width = 10, repr.plot.height = 3, repr.plot.res = 300)

Idents(obj) <- "cell_type_CMcomb"

# generate UMAP plot
pl1 <- UMAPPlot(obj, cols = palette.CMcomb, 
                combine = FALSE # returns full ggplot object
                )

# custom color scale
scale.col <- cet_pal(16, name = "fire")

# make plot
umap_density_plot  <- pl1[[1]] & 
  stat_density_2d(aes_string(x = "umapproteincoding_1", y = "umapproteincoding_2", fill = "after_stat(level)"), 
                  linewidth = 0.2, geom = "density_2d_filled", 
                  colour = "ivory", alpha = 0.4, n = 300, h = c(1.3, 1.3)) & 
  scale_fill_gradientn(colours = scale.col)


ggsave("Plots/UMAP_density_plot.png", plot = umap_density_plot, width = 8, height = 4, dpi = 300)

In [None]:
# Subset to only CTRL&ALDO or ALDO&REC nuclei and calculating proportions

obj_CTRL_ALDO <- subset(obj, subset = treatment %in% c("CTRL", "ALDO"))
obj_ALDO_REC <- subset(obj, subset = treatment %in% c("ALDO", "REC"))


# Get metadata for CTRL ALDO
metadata <- obj_CTRL_ALDO@meta.data

# Count total nuclei
total_nuclei <- nrow(metadata)

# Count nuclei per cell type and percentage
cell_type_counts <- table(metadata$cell_type_CMcomb)
cell_type_percentage <- round((cell_type_counts / total_nuclei) * 100, 1)

# Print results
print("cell_type_percentage CTRL and ALDO")
print(cell_type_percentage)

# Repeat for ALDO REC
metadata <- obj_ALDO_REC@meta.data
total_nuclei <- nrow(metadata)
cell_type_counts <- table(metadata$cell_type_CMcomb)
cell_type_percentage <- round((cell_type_counts / total_nuclei) * 100, 1)

# Print results
print("cell_type_percentage ALDO and REC")
print(cell_type_percentage)

In [None]:
# UMAPs only with CTRL and ALDO nuclei

options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

UMAP_rename_cell_type <- DimPlot(obj_CTRL_ALDO, reduction = "umap_protein_coding", group.by = "cell_type_CMcomb", label = TRUE, shuffle = TRUE, cols = palette.CMcomb) +
  umap_theme + ggtitle("UMAP cell types") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_treatment <- DimPlot(obj_CTRL_ALDO, reduction = "umap_protein_coding", label = FALSE, group.by = "treatment", shuffle = TRUE, cols = c("#C1C1C1", "#AF0000")) +
  umap_theme + ggtitle("UMAP treatment") + theme(text = element_text(size = 15)) & NoLegend()

ggsave("Plots/UMAP_rename_cell_type_CTRL_ALDO.svg", UMAP_rename_cell_type, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_treatment_CTRL_ALDO.svg", UMAP_rename_treatment, units = "cm", dpi = 300, width = 30, height = 20)

suppressWarnings(print(UMAP_rename_cell_type))
UMAP_rename_treatment

In [None]:
# UMAPs only with ALDO and REC nuclei

options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

UMAP_rename_cell_type <- DimPlot(obj_ALDO_REC, reduction = "umap_protein_coding", group.by = "cell_type_CMcomb", label = TRUE, shuffle = TRUE, cols = palette.CMcomb) +
  umap_theme + ggtitle("UMAP cell types") + theme(text = element_text(size = 15)) & NoLegend()

UMAP_rename_treatment <- DimPlot(obj_ALDO_REC, reduction = "umap_protein_coding", label = FALSE, group.by = "treatment", shuffle = TRUE, cols = c("#AF0000", "#006AF3")) +
  umap_theme + ggtitle("UMAP treatment") + theme(text = element_text(size = 15)) & NoLegend()

ggsave("Plots/UMAP_rename_cell_type_ALDO_REC.svg", UMAP_rename_cell_type, units = "cm", dpi = 300, width = 30, height = 20)
ggsave("Plots/UMAP_rename_treatment_ALDO_REC.svg", UMAP_rename_treatment, units = "cm", dpi = 300, width = 30, height = 20)

suppressWarnings(print(UMAP_rename_cell_type))
UMAP_rename_treatment

# Features with non-zero counts 

In [None]:
export_non_zero_counts <- function(seurat_object, LA_LV, cluster_annotations) { 
    # Filter the Seurat object for the specified chamber and cell type 
    seurat_table <- subset(x = seurat_object, subset = chamber == LA_LV) 
    # Iterate over cluster annotations and export non-zero counts 
    for (cluster_id in names(cluster_annotations)) { 
        cell <- cluster_annotations[[cluster_id]] 
        seurat_table_filtered <- subset(x = seurat_table, subset = cell_type == cell) 
        # Get the counts for the filtered Seurat object 
        counts <- GetAssayData(seurat_table_filtered, assay = "RNA", layer = "counts") 
        # Find non-zero counts 
        nonzero <- as.data.frame(rowSums(counts) > 0) 
        names(nonzero)[names(nonzero) == "rowSums(counts) > 0"] <- "nonzerofeature"
        nonzero <- filter(nonzero, nonzerofeature == TRUE)
        nonzero$names <- rownames(nonzero)
        nonzero$nonzerofeature <- NULL

        # Export to CSV 
        filename <- paste0("nonzerocounts/nonzerocounts_", LA_LV, "_", cell, ".csv") 
        write.csv(nonzero, file = filename, row.names = FALSE) 
    } 
}

In [None]:
export_non_zero_counts(obj, "LV", cluster_annotations)

In [None]:
# Load the list of protein-coding genes
protein_coding_genes <- unlist(read.csv(
  "DEGs/nothreshold/protein_coding_gene_names_filtered.txt", 
  header = TRUE, 
  stringsAsFactors = FALSE
)$protein_coding)

# List of non-zero files
file_names <- list.files(
  "nonzerocounts", 
  pattern = "*.csv", 
  full.names = TRUE
)

# Load, process, and filter non-zero files
filtered_files <- lapply(file_names, function(file_path) {
  # Read the file
  nonzero_data <- read.csv(file_path, header = TRUE, stringsAsFactors = FALSE)
  
  # Filter for protein-coding genes
  filtered_data <- nonzero_data %>%
    filter(names %in% protein_coding_genes)
  
  # Return the filtered data
  return(filtered_data)
})

# Save filtered files with "_filtered" appended to the filename
sapply(seq_along(file_names), function(i) {
  # Extract the original file name
  original_name <- basename(file_names[i])
  new_name <- paste0(gsub("\\.csv$", "", original_name), "_filtered.csv")
  
  # Save the filtered file
  write.csv(
    filtered_files[[i]], 
    file = paste0("nonzerocounts/filtered/", new_name), 
    row.names = FALSE
  )
})

# Cell-cycle score

In [None]:
# Download cell cycle genes for organism at https://github.com/hbc/tinyatlas/tree/master/cell_cycle. Read it in with:

cc_file <- getURL("https://raw.githubusercontent.com/hbc/tinyatlas/master/cell_cycle/Mus_musculus.csv") 
cell_cycle_genes <- read.csv(text = cc_file)

In [None]:
# Connect to AnnotationHub
ah <- AnnotationHub()

# Access the Ensembl database for organism
ahDb <- query(ah, 
              pattern = c("Mus musculus", "EnsDb"), 
              ignore.case = TRUE)

# Acquire the latest annotation files
id <- ahDb %>%
        mcols() %>%
        rownames() %>%
        tail(n = 1)

# Download the appropriate Ensembldb database
edb <- ah[[id]]

# Extract gene-level information from database
annotations <- genes(edb, 
                     return.type = "data.frame")

# Select annotations of interest
annotations <- annotations %>%
        dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)

In [None]:
# Get gene names for Ensembl IDs for each gene
cell_cycle_markers <- dplyr::left_join(cell_cycle_genes, annotations, by = c("geneID" = "gene_id"))

# Acquire the S phase genes
s_genes <- cell_cycle_markers %>%
        dplyr::filter(phase == "S") %>%
        pull("gene_name")
        
# Acquire the G2M phase genes        
g2m_genes <- cell_cycle_markers %>%
        dplyr::filter(phase == "G2/M") %>%
        pull("gene_name")

In [None]:
# Perform cell cycle scoring
obj <- CellCycleScoring(obj,
                        g2m.features = g2m_genes,
                        s.features = s_genes)

In [None]:
# Reorder the treatment factor to show CTRL, ALDO, REC from left to right
metadata <- obj@meta.data

metadata$Phase <- factor(metadata$Phase, levels = c("G1", "S", "G2M"))
metadata$treatment <- factor(metadata$treatment, levels = c("CTRL", "ALDO", "REC"))

In [None]:
# Save final objects
SaveH5Seurat(obj, "seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename_CMcomb_onlyprotcod_ccscore")
saveRDS(obj, file = "seurat_objects/obj_seu_merge_harmony_sgl_addmodule_rename_CMcomb_onlyprotcod_ccscore.rds")

In [None]:
options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

# Visualize the PCA, grouping by cell cycle phase
DimPlot_cellcycle <- DimPlot(obj, reduction = "pca_protein_coding", dims = c(10, 11), group.by = "Phase", cols = palette.3)

ggsave("Plots/DimPlot_cellcycle.svg", DimPlot_cellcycle, width = 11, height = 9, units = "cm", dpi = 300)

DimPlot_cellcycle

In [None]:
# Calculate average expression for cluster CYC marker genes
options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

# List of active macrophage markers
active_cell_cycle_markers <- c("Top2a",
"Mki67",
"Kif11",
"Cenpp",
"E2f7",
"Smc2",
"Cep192")

# Check if all genes are present in the dataset
present_genes <- active_cell_cycle_markers[active_cell_cycle_markers %in% rownames(obj)]
missing_genes <- setdiff(active_cell_cycle_markers, present_genes)
cat("Missing genes:", missing_genes, "\n")

# Calculate average expression for the present genes
average_expression <- Matrix::colMeans(GetAssayData(obj, assay = "RNA", layer = "data")[present_genes, , drop = FALSE])

# Add the average expression to the metadata
obj$active_cell_cycle_marker_score <- average_expression

# Create a FeaturePlot using the 'Macrophage_Activation_Score'
FeatureCC <- FeaturePlot(obj, 
            features = "active_cell_cycle_marker_score", 
            reduction = "pca", 
            dims = c(10, 11)) +
  ggplot2::scale_color_gradient(low = "#002ed4", high = "#AF0000") +
  ggplot2::labs(title = "active CC Score on PCA", 
                color = "Activation Score") +
  ggplot2::theme_minimal()

ggsave("Plots/PCA_cellcycle_score.svg", FeatureCC, units = "cm", dpi = 300, width = 12, height = 9)

FeatureCC

In [None]:
options(repr.plot.width = 3, repr.plot.height = 2, repr.plot.res = 300)

UMAP_cycle <- DimPlot(obj, reduction = "umap_protein_coding", label = FALSE, group.by = "Phase", shuffle = TRUE, cols = palette.3) +
  umap_theme + ggtitle("UMAP cellcycle") + theme(text = element_text(size = 15)) 

ggsave("Plots/UMAP_cellcycle.svg", UMAP_cycle, units = "cm", dpi = 300, width = 30, height = 20)

UMAP_cycle

In [None]:
options(repr.plot.width = 5, repr.plot.height = 10, repr.plot.res = 300)

# Combine CM_0, CM_1, and CM_2 into a single category 'CM'
metadata <- metadata %>%
  mutate(cell_type_combined = case_when(
    cell_type %in% c("CM_0", "CM_1", "CM_2") ~ "CM",  # Combine CM_0, CM_1, CM_2
    TRUE ~ cell_type  # Keep other cell types as they are
  ))

# Reorder cell_type_combined based on decreasing number of entries
cell_type_order <- metadata %>%
  group_by(cell_type_combined) %>%
  summarize(count = n()) %>%
  arrange(desc(count)) %>%
  pull(cell_type_combined)

# Reorder cell_type_combined factor in metadata
metadata$cell_type_combined <- factor(metadata$cell_type_combined, levels = cell_type_order)

# Calculate phase proportions by combined cell type and treatment
phase_counts <- metadata %>%
  group_by(cell_type_combined, treatment, Phase) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(cell_type_combined, treatment) %>%
  mutate(proportion = count / sum(count),
         percentage = round(proportion * 100, 1))  # Add percentage column

# Plot pie charts with black borders on the slices and percentage labels
Piechart_cellcycle <- ggplot(phase_counts, aes(x = "", y = proportion, fill = Phase)) +
  geom_bar(stat = "identity", width = 0.2, color = "black") +  # Add black border to each slice
  coord_polar("y") +
  facet_grid(cell_type_combined ~ treatment) +  # Use the new combined column for faceting
  scale_fill_manual(values = palette.3) +  # Apply custom colors
  theme_void() +
  theme(strip.text = element_text(size = 10)) +
  labs(title = "Cell Cycle Phase Distribution by Treatment") +
  geom_text(aes(label = paste(percentage)), 
            position = position_stack(vjust = 0.5),  # Position text in the center of each slice
            color = "darkgrey", size = 2)  # Customize the text appearance

ggsave("Plots/Piechart_cellcycle.svg", Piechart_cellcycle, units = "cm", dpi = 300, width = 15, height = 30)

suppressWarnings(print(Piechart_cellcycle))

In [None]:
# Calculate total number of nuclei for each combination of cell_type_combined and treatment
total_nuclei <- phase_counts %>%
  group_by(cell_type_combined, treatment) %>%
  summarize(total_nuclei = sum(count), .groups = "drop") 

# Pivot the data to have treatments as columns and cell types as rows
total_nuclei_table <- total_nuclei %>%
  pivot_wider(names_from = treatment, values_from = total_nuclei, values_fill = list(total_nuclei = 0))

## Calculate the column-wise percentages and round to 1 decimal place
total_nuclei_table_percent <- total_nuclei_table %>%
  mutate(across(CTRL:REC, ~ round(. / sum(.) * 100, 1)))  # Round percentages to 1 decimal place

# View the table with rounded percentages
total_nuclei_table_percent

In [4]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS/LAPACK: /media/daten/dmeral/micromamba/envs/scrna_dm/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] networkD3_0.4               ensembldb_2.26.0           
 [3] AnnotationFilter_1.26.0     GenomicFeatures_1.54.4     
 [5] AnnotationDbi_1.64.1        AnnotationHub_3.10.1       
 [7] BiocFileCache_2.10.