# Load libraries and Themes

In [1]:
suppressPackageStartupMessages({
  suppressWarnings({
    library(Seurat)
    library(SoupX)
    library(ggplot2)
    library(tidyverse)
    library(harmony)
    library(SeuratDisk)
    library(SingleCellExperiment)
    library(DESeq2)
    library(dplyr)
    library(EnhancedVolcano)
    library(Rsamtools)
    library(svglite)
    library(viridis)
    library(gridExtra)
    library(dplyr)
    library(pals)
            })})

In [None]:
#Color Palettes

palette.21 <- DiscretePalette(21, palette = "stepped", shuffle = TRUE)

palette.ex <- c(
    "#000000",#doublet
    "#bababa"#singlet
)
    
palette.treatment <- c(
    "#C1C1C1", # CTRL
    "#AF0000", # ALDO
    "#006AF3"  # REC
)

palette.treatmentid <- c(
    "#C1C1C1", # CTRL
    "#C1C1C1", # CTRL
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#AF0000", # ALDO
    "#C1C1C1", # CTRL
    "#C1C1C1", # CTRL
    "#006AF3",  # REC
    "#006AF3",  # REC
    "#006AF3",  # REC
    "#006AF3"  # REC
)

In [None]:
umap_theme <- theme(
  axis.line=element_blank(),
  axis.text.x=element_blank(),
  axis.text.y=element_blank(),
  axis.ticks=element_blank(),
  axis.title.x=element_blank(),
  axis.title.y=element_blank(),
  panel.background=element_blank(),
  panel.border=element_blank(),
  panel.grid.major=element_blank(),
  panel.grid.minor=element_blank()
)

In [None]:
setwd("/media/daten/dmeral/scseq_analysis/2024_LV_CTRL_ALDO_REC")

In [None]:
set.seed(1234)

# Split to even out contamination fraction across samples and merge

**Until here code was executed with iterating setContaminationFraction in SoupX.** After integration with Harmony, CM gene expression was visualized in UMAP and marker genes were determined across clusters. Based on this, sample specific contamination fractions were determined. Here non-integrated seurat objects are subsetted to respective samples and merged to final object.    

In [None]:
# Define sample names
samples <- c("LV_2108", "LV_2109", "LV_2110", "LV_2111", 
             "LV_2112", "LV_2113", "LV_2114", "LV_2115", 
             "LV_2310", "LV_2311", "LV_2312", "LV_2313")

# Create metadata matrix
meta.data_tab <- matrix(c(
  rep(1, each = 12),  # replicate
  "CTRL", "CTRL", "ALDO", "ALDO", "ALDO", "ALDO", "CTRL", "CTRL", "REC", "REC", "REC", "REC",  # treatment
  "f", "f", "f", "f", "m", "m", "m", "m", "f", "f", "m", "m",  # sex
  2, 1, 2, 1, 1, 2, 2, 1, rep(3, each = 4),  # batch
  rep("LV", each = 12),  # chamber
  1:12,  # unique
  25, 30, 30, 20, 20, 25, 35, 30, 30, 15, 10, 15  # ContaminationFraction
), ncol = 12, byrow = TRUE)

rownames(meta.data_tab) <- c("replicate", "treatment", "sex", "batch", "chamber", "unique", "ContaminationFraction")
colnames(meta.data_tab) <- samples

meta.data_tab <- as.table(meta.data_tab)

meta.data_tab

In [None]:
# e.g setContaminationFraction_0.1 reflects 10% of reads removed as contamonation
setContaminationFraction_0.10_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.1_dbl.h5seurat")
setContaminationFraction_0.15_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.15_dbl.h5seurat")
setContaminationFraction_0.20_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.2_dbl.h5seurat")
setContaminationFraction_0.25_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.25_dbl.h5seurat")
setContaminationFraction_0.30_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.3_dbl.h5seurat")
setContaminationFraction_0.35_dbl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.35_dbl.h5seurat")

In [None]:
subset_010 <- subset(setContaminationFraction_0.10_dbl, subset = sample_id %in% c("LV_2312"))
subset_015 <- subset(setContaminationFraction_0.15_dbl, subset = sample_id %in% c("LV_2311", "LV_2313"))
subset_020 <- subset(setContaminationFraction_0.20_dbl, subset = sample_id %in% c("LV_2111", "LV_2112"))
subset_025 <- subset(setContaminationFraction_0.25_dbl, subset = sample_id %in% c("LV_2108", "LV_2113"))
subset_030 <- subset(setContaminationFraction_0.30_dbl, subset = sample_id %in% c("LV_2110", "LV_2310", "LV_2109", "LV_2115"))
subset_035 <- subset(setContaminationFraction_0.35_dbl, subset = sample_id %in% c("LV_2114"))

In [None]:
obj_seu_merge_dbl <- merge(subset_010, y = c(subset_015, subset_020, subset_025, subset_030, subset_035), merge.data = TRUE, project = "LV_C57Bl6_Aldosterone")

In [None]:
SaveH5Seurat(obj_seu_merge_dbl, "seurat_objects/obj_seu_merge_dbl")

In [None]:
#Filter Singlets
obj_seu_merge <- subset(obj_seu_merge_dbl, subset = scDblFinder.class == "singlet")

# Visualize doublets in UMAP

obj_seu_merge_dbl <- LoadH5Seurat("seurat_objects/obj_seu_merge_dbl.h5seurat")

In [None]:
# switch default assay to RNA
DefaultAssay(object = obj_seu_merge_dbl) <- "RNA"

In [None]:
obj <- subset(obj_seu_merge_dbl, subset = nFeature_RNA > 300 & nFeature_RNA < 5000 & 
                        nCount_RNA > 500 & nCount_RNA < 15000 &
                        percent.mt < 5)

obj <- NormalizeData(obj, verbose = FALSE)
obj <- FindVariableFeatures(obj, verbose = FALSE)
obj <- ScaleData(obj, verbose = FALSE)
obj <- RunPCA(obj, assay = "RNA", npcs = 35, verbose = FALSE)

In [None]:
options(repr.plot.width = 3, repr.plot.height = 2)

ElbowPlot(obj, ndims = 50)

In [None]:
obj$batch <- as.factor(obj$batch)

In [None]:
#Run Harmony. n=2 to low for multifactoial analysis, obj_umap similar processing w/o this step
obj <- obj %>%
  RunHarmony(group.by.vars = c("batch"), plot_convergence = FALSE, assay.use = "RNA", verbose = FALSE)

In [None]:
obj_harmony_umap <- obj %>%
  RunUMAP(reduction = "harmony", dims = 1:35, min.dist = 0.4, spread = 1.0, verbose = FALSE) %>%
  FindNeighbors(reduction = "harmony", dims = 1:35, verbose = FALSE) %>%
  FindClusters(resolution = 0.25)

SaveH5Seurat(obj_harmony_umap, "seurat_objects/obj_seu_merge_harmony_dbl")

In [None]:
number <- table(obj_harmony_umap@meta.data$sample_id, 
                          obj_harmony_umap@meta.data$seurat_clusters)
write.csv(number, file = "cellnumbers/number_perCluster_sample_id_dbl.csv")

number <- table(obj_harmony_umap@meta.data$treatment, 
                          obj_harmony_umap@meta.data$seurat_clusters)
write.csv(number, file = "cellnumbers/number_perCluster_Teatment_dbl.csv")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4, repr.plot.res = 300)

UMAP_dbl <- DimPlot(obj_harmony_umap, group.by = "scDblFinder.class", label = FALSE, shuffle = TRUE, raster = FALSE, cols = palette.ex) +
  umap_theme + ggtitle("UMAP excluded doublets") + theme(text = element_text(size = 15)) + theme(legend.position = "none")

ggsave("Plots/UMAP_dbl.svg", UMAP_dbl, units = "cm", dpi = 300, width = 30, height = 20)

UMAP_dbl

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4, repr.plot.res = 300)

UMAP_dbl_seurat_clusters <- DimPlot(obj_harmony_umap, group.by = "seurat_clusters", label = TRUE, shuffle = TRUE, raster = FALSE, cols = palette.21) +
  umap_theme + ggtitle("UMAP excluded doublets") + theme(text = element_text(size = 15)) + theme(legend.position = "none")

ggsave("Plots/UMAP_dbl_seurat_clusters.svg", UMAP_dbl_seurat_clusters, units = "cm", dpi = 300, width = 30, height = 20)

UMAP_dbl_seurat_clusters

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4, repr.plot.res = 300) 

VlnPlot_dbl <- VlnPlot(obj_harmony_umap, group.by = "seurat_clusters", 
                      features = c("scDblFinder.weighted"),
                      pt.size = 0, raster = TRUE, ncol = 2, cols = palette.21) & NoLegend()

ggsave("Plots/VlnPlot_dbl_scDblFinder.weighted.svg", VlnPlot_dbl, units = "cm", dpi = 300, width = 30, height = 20)

VlnPlot_dbl

In [None]:
all.markers <- FindAllMarkers(obj_harmony_umap, only.pos = TRUE, min.pct = 0.50, logfc.threshold = 0.58, test.use = "wilcox")
all.markers %>%
  group_by("seurat_clusters") %>%
  slice_max(n = 5, order_by = avg_log2FC)

write.csv(all.markers, file = "DEGs/all.markers_wilcox_harmony_dbl.csv")

# Harmony (integration)

**run Harmony, UMAP using Harmony embeddings instead of PCA and visualize**
Resolution can dramatically effect your clustering. While the overall shape of the UMAP will not change the number of clusterings will be altered. 
This was dynamically adjusted. Too high a resolution leads to ill defined clusters which lack unique markers. Too low leads to clusters that are too large missing meaningful info.

In [None]:
# Switch default assay to RNA
DefaultAssay(object = obj_seu_merge) <- "RNA"
DefaultAssay(object = obj_seu_merge)

In [None]:
# Filter based on nFeature_RNA, nCount_RNA and sanity check for percent.mt
obj <- subset(obj_seu_merge, subset = nFeature_RNA > 300 & nFeature_RNA < 5000 & 
                        nCount_RNA > 500 & nCount_RNA < 15000 &
                        percent.mt < 5)

obj <- NormalizeData(obj, verbose = FALSE)
obj <- FindVariableFeatures(obj, verbose = FALSE)
obj <- ScaleData(obj, verbose = FALSE)
obj <- RunPCA(obj, assay = "RNA", npcs = 35, verbose = FALSE)

In [None]:
options(repr.plot.width = 3, repr.plot.height = 2)

ElbowPlot(obj, ndims = 50)

In [None]:
# Process UMAP before harmony for comparison
obj_seu_merge_UMAP <- obj %>%
  RunUMAP(dims = 1:35, min.dist = 0.4, spread = 1.0, verbose = FALSE) %>%
  FindNeighbors(dims = 1:35, verbose = FALSE) %>%
  FindClusters(resolution = 0.25)

SaveH5Seurat(obj_seu_merge_UMAP, "seurat_objects/obj_seu_merge_beforeharmony_sgl")

In [None]:
# Integrate "batch" to minimize batch-specific clustering
options(repr.plot.width = 3, repr.plot.height = 2)

obj$batch <- as.factor(obj$batch)

obj <- obj %>%
  RunHarmony(group.by.vars = c("batch"), plot_convergence = TRUE, assay.use = "RNA", verbose = FALSE)

In [None]:
# Run UMAP
obj_harmony <- obj %>%
  RunUMAP(reduction = "harmony", dims = 1:35, verbose = FALSE) %>%
  FindNeighbors(reduction = "harmony", dims = 1:35, verbose = FALSE) %>%
  FindClusters(resolution = 0.25)

SaveH5Seurat(obj_harmony, "seurat_objects/obj_seu_merge_harmony_sgl")

In [None]:
# Sanity check
options(repr.plot.width = 10, repr.plot.height = 4, repr.plot.res = 300) 

DimPlot(obj_harmony, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")|
DimPlot(obj_harmony, reduction = "umap", label = TRUE, label.size = 3, group.by = "scDblFinder.class")

# Visualize CM contamination

In [None]:
# Top20 marker genes defining cluster_0 (main CM cluster)
CMgenes <- list(c(
    "Malat1", "Ttn", "Ryr2", "Cacna1c", "Pde4dip", "Rbm20", "Pde4d", "Myh6",
    "Ctnna3", "Slc8a1", "Sorbs1", "Fhl2", "Palld", "Fhod3", 
    "Atp2a2", "Pcdh7", "Ldb3", "Tnnt2", "Ank2", "Pln"
))

In [None]:
# Similar processing and setting ContaminationFraction to 0.00
obj_woSoupX <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0_harmony_sgl.h5seurat")

## For comparison data Set w/o SoupX

In [None]:
obj_woSoupX$Cardiomyocyte <- ifelse(test = obj_woSoupX$seurat_clusters %in% c(0, 5, 10), yes = "CM", no = "non-CM")

In [None]:
obj_woSoupX_addmodule <- AddModuleScore(obj_woSoupX,
                  features = CMgenes,
                  name = "CMgenes")

In [None]:
# Treatment-wise plot of module expression of CM marker genes without SoupX
options(repr.plot.width = 10, repr.plot.height = 5, repr.plot.res = 300)

Vln_CMgenes_wo <- VlnPlot(obj_woSoupX_addmodule, 
        features = "CMgenes1", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "treatment", cols = palette.treatment) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)
Vln_Myh6_wo <- VlnPlot(obj_woSoupX_addmodule, 
        features = "Myh6", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "treatment", cols = palette.treatment) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)

ggsave("Plots/VlnPlot_CMcontamination_woSoupX_CMgenes.svg", Vln_CMgenes_wo, units = "cm", dpi = 300, width = 15, height = 20)
ggsave("Plots/VlnPlot_CMcontamination_woSoupX_Myh6.svg", Vln_Myh6_wo, units = "cm", dpi = 300, width = 15, height = 20)

Vln_CMgenes_wo|Vln_Myh6_wo

In [None]:
# Sample-wise plot of module expression of CM marker genes without SoupX
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)

Vln_CMgenes_wo <- VlnPlot(obj_woSoupX_addmodule, 
        features = "CMgenes1", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "sample_id", cols = palette.treatmentid) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)
Vln_Myh6_wo <- VlnPlot(obj_woSoupX_addmodule, 
        features = "Myh6", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "sample_id", cols = palette.treatmentid) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)

ggsave("Plots/VlnPlot_CMcontamination_woSoupX_CMgenes_splitbyid.svg", Vln_CMgenes_wo, units = "cm", dpi = 300, width = 40, height = 20)
ggsave("Plots/VlnPlot_CMcontamination_woSoupX_Myh6_splitbyid.svg", Vln_Myh6_wo, units = "cm", dpi = 300, width = 40, height = 20)

Vln_CMgenes_wo
Vln_Myh6_wo

## Data Set w/ SoupX

In [None]:
obj_harmony$Cardiomyocyte <- ifelse(test = obj_harmony$seurat_clusters %in% c(0, 8, 11), yes = "CM", no = "non-CM")

In [None]:
obj_harmony_addmodule <- AddModuleScore(obj_harmony,
                  features = CMgenes,
                  name = "CMgenes")

In [None]:
SaveH5Seurat(obj_harmony_addmodule, "seurat_objects/obj_seu_merge_harmony_sgl_addmodule")

In [None]:
# Treatment-wise plot of module expression of CM marker genes with SoupX
options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)

Vln_CMgenes <- VlnPlot(obj_harmony_addmodule, 
        features = "CMgenes1", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "treatment", cols = palette.treatment) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)
Vln_Myh6 <- VlnPlot(obj_harmony_addmodule, 
        features = "Myh6", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "treatment", cols = palette.treatment) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)

ggsave("Plots/VlnPlot_CMcontamination_afterSoupX_CMgenes.svg", Vln_CMgenes, units = "cm", dpi = 300, width = 15, height = 20)
ggsave("Plots/VlnPlot_CMcontamination_afterSoupX_Myh6.svg", Vln_Myh6, units = "cm", dpi = 300, width = 15, height = 20)

Vln_CMgenes|Vln_Myh6

In [None]:
# Sample-wise plot of module expression of CM marker genes with SoupX

options(repr.plot.width = 20, repr.plot.height = 10, repr.plot.res = 300)

Vln_CMgenes <- VlnPlot(obj_harmony_addmodule, 
        features = "CMgenes1", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "sample_id", cols = palette.treatmentid) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)
Vln_Myh6 <- VlnPlot(obj_harmony_addmodule, 
        features = "Myh6", group.by = "Cardiomyocyte", pt.size = NULL, split.by = "sample_id", cols = palette.treatmentid) + geom_boxplot(position = position_dodge(0.9), notch = FALSE, colour = "white", outlier.shape = NA, linewidth = 1, width = 0.5)

ggsave("Plots/VlnPlot_CMcontamination_afterSoupX_CMgenes_splitbyid.svg", Vln_CMgenes, units = "cm", dpi = 300, width = 40, height = 20)
ggsave("Plots/VlnPlot_CMcontamination_afterSoupX_Myh6_splitbyid.svg", Vln_Myh6, units = "cm", dpi = 300, width = 40, height = 20)

Vln_CMgenes
Vln_Myh6

## Plot UMAP grids and histograms with iterating setContaminationFraction

In [None]:
# e.g setContaminationFraction_0.1 reflects 10% of reads removed as contamonation
setContaminationFraction_0.00_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0_harmony_sgl.h5seurat")
setContaminationFraction_0.05_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.05_harmony_sgl.h5seurat")
setContaminationFraction_0.10_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.1_harmony_sgl.h5seurat")
setContaminationFraction_0.15_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.15_harmony_sgl.h5seurat")
setContaminationFraction_0.20_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.2_harmony_sgl.h5seurat")
setContaminationFraction_0.25_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.25_harmony_sgl.h5seurat")
setContaminationFraction_0.30_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.3_harmony_sgl.h5seurat")
setContaminationFraction_0.35_sgl <- LoadH5Seurat("seurat_objects/setContaminationFraction/setContaminationFraction_0.35_harmony_sgl.h5seurat")

In [None]:
object_names <- c("setContaminationFraction_0.00_sgl",
                  "setContaminationFraction_0.05_sgl",
                  "setContaminationFraction_0.10_sgl",
                  "setContaminationFraction_0.15_sgl",
                  "setContaminationFraction_0.20_sgl",
                  "setContaminationFraction_0.25_sgl",
                  "setContaminationFraction_0.30_sgl",
                  "setContaminationFraction_0.35_sgl")

# Loop through each object and run AddModuleScore, store in a new object
for (names in object_names) {
  obj <- get(names) 
  new_obj <- AddModuleScore(obj, features = CMgenes, name = "CMgenes")
  
  assign(paste0(names, "_addmodule"), new_obj)
}


In [None]:
# use q5 to exclude all values >q5
UMAP_000 <- FeaturePlot(setContaminationFraction_0.00_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(256), max.cutoff = "q25") 
UMAP_005 <- FeaturePlot(setContaminationFraction_0.05_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_010 <- FeaturePlot(setContaminationFraction_0.10_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_015 <- FeaturePlot(setContaminationFraction_0.15_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_020 <- FeaturePlot(setContaminationFraction_0.20_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_025 <- FeaturePlot(setContaminationFraction_0.25_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_030 <- FeaturePlot(setContaminationFraction_0.30_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 
UMAP_035 <- FeaturePlot(setContaminationFraction_0.35_sgl_addmodule,
    features = c("CMgenes1"), split.by = "sample_id", pt.size = 0, cols = viridis(258), max.cutoff = "q25") 

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18, repr.plot.res = 100)

UMAP_list <- list(
    UMAP_000,
    UMAP_005,
    UMAP_010,
    UMAP_015,
    UMAP_020,
    UMAP_025,
    UMAP_030,
    UMAP_035
)

UMAP_grid <- patchwork::wrap_plots(UMAP_list, 
    nrow = 8, ncol = 1) & NoAxes() & NoLegend()

ggsave("Plots/UMAP_grid_setContamination_CMgenes_q25.svg", plot = UMAP_grid, units = "cm", dpi = 300, width =35, height = 21)

UMAP_grid

In [None]:
UMAP_000 <- DimPlot(setContaminationFraction_0.00_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_005 <- DimPlot(setContaminationFraction_0.05_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_010 <- DimPlot(setContaminationFraction_0.10_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_015 <- DimPlot(setContaminationFraction_0.15_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_020 <- DimPlot(setContaminationFraction_0.20_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_025 <- DimPlot(setContaminationFraction_0.25_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_030 <- DimPlot(setContaminationFraction_0.30_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")
UMAP_035 <- DimPlot(setContaminationFraction_0.35_sgl, reduction = "umap", label = TRUE, label.size = 3, group.by = "seurat_clusters")

In [None]:
UMAP_list <- list(
    UMAP_000,
    UMAP_005,
    UMAP_010,
    UMAP_015,
    UMAP_020,
    UMAP_025,
    UMAP_030,
    UMAP_035
)

UMAP_grid <- patchwork::wrap_plots(UMAP_list, 
    nrow = 8, ncol = 1) & NoAxes() & NoLegend()

ggsave("Plots/UMAP_setContaminationFraction_clusters.svg", plot = UMAP_grid, units = "cm", dpi = 300, width = 5, height = 40)

In [None]:
# Histogram of AddModuleScores from gene set "CMgenes"
plot_histograms_per_sample <- function(object_names, feature_name = "CMgenes1", fixed_y = 1500) {
  plots_list <- list()

  for (obj_name in object_names) {
    obj <- get(obj_name) 
    
    # Truncate the object name to the first 29 characters for the title
    truncated_obj_name <- substr(obj_name, 26, 29)
    
    # Loop over the sample_id 
    sample_ids <- unique(obj@meta.data$sample_id)
    for (sample_id_idx in seq_along(sample_ids)) {
      sample_id <- sample_ids[sample_id_idx]
      column_data <- obj@meta.data[obj@meta.data$sample_id == sample_id, feature_name]

      # Create data frame for plotting
      plot_data <- data.frame(CMgenes1 = column_data)

      # Generate the histogram
      p <- ggplot(plot_data, aes(x = CMgenes1)) +
        geom_histogram(bins = 75, fill = "grey", color = "grey", alpha = 1) +
        geom_vline(xintercept = 0, color = "black", linetype = "solid", size = 0.5) + # Add vertical line
        geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 0.5) + # Add horizontal line
        scale_y_continuous(limits = c(0, fixed_y)) + # Fixed y-axis limit
        theme_minimal() +
        labs(title = paste("cF =", truncated_obj_name, ",", sample_id)) +
        theme(
          axis.title.x = element_blank(),  
          axis.title.y = element_blank(),  
          axis.text.x = element_blank(),   
          axis.text.y = element_blank(),   
          axis.ticks = element_blank() 
        )
      
      # Add axis numbers only to the bottom-left corner plot
      if (obj_name == object_names[8] && sample_id_idx == 1) {
        p <- p + theme(
          axis.text.x = element_text(size = 10), 
          axis.text.y = element_text(size = 10), 
          axis.ticks = element_line()  
        )
      }
      
      # Add plot to list
      plots_list[[paste(obj_name, sample_id, sep = "_")]] <- p
    }
  }
  
  # Arrange plots into a grid (each row for one object, each column for one sample_id)
  grid_plots <- gridExtra::grid.arrange(grobs = plots_list, ncol = length(sample_ids))
  
  # Suppress printing the output
  invisible(grid_plots)
}

# Plot
histogram_setContamination <- plot_histograms_per_sample(c(
                             "setContaminationFraction_0.00_sgl_addmodule", 
                             "setContaminationFraction_0.05_sgl_addmodule", 
                             "setContaminationFraction_0.10_sgl_addmodule", 
                             "setContaminationFraction_0.15_sgl_addmodule", 
                             "setContaminationFraction_0.20_sgl_addmodule",
                             "setContaminationFraction_0.25_sgl_addmodule",
                             "setContaminationFraction_0.30_sgl_addmodule",
                             "setContaminationFraction_0.35_sgl_addmodule"))


ggsave("Plots/histogram_setContamination_CMgenes.svg", plot = histogram_setContamination, units = "cm", dpi = 300, width = 60, height = 45)

invisible(histogram_setContamination)

In [2]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS/LAPACK: /media/daten/dmeral/micromamba/envs/scrna_dm/lib/libopenblasp-r0.3.27.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] pals_1.9                    gridExtra_2.3              
 [3] viridis_0.6.5               viridisLite_0.4.2          
 [5] svglite_2.1.3               Rsamtools_2.18.0           
 [7] Biostrings_2.70.3  