In [None]:
########################################################################
# Author    : A. Alsema
# Date      : May-July 2021
# Dataset   : Visium Spatial Transcriptomics for MS lesions
# Purpose   : boxplots with the cluster distribution per lesion type
# Required input: 
#  - "3.WM.clustered.res0.2.rds"
#  - "WM_Targetfile.csv"
# Output    : figure 1e, custom boxplots. PDF.
#########################################################################

In [1]:
rm(list = ls())

library(Seurat)
library(hdf5r)
library(ggplot2)
library(patchwork)
library(future)
library(dplyr)
library(RColorBrewer)


In [1]:
res = 0.2
datasets <- readRDS(file = paste0("./RData/seurat/3.WM.clustered.res", res, ".rds"))
levels(datasets$Group)

Attaching SeuratObject


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


“package ‘RColorBrewer’ was built under R version 4.2.1”


In [138]:
# add donor group info
rel_counts <- table(datasets$seurat_clusters, datasets$sample_ID)
write.csv(rel_counts, file = "Routput/Seurat/Clustering/WM_ST_cluster_counts.csv")

for (i in 1:ncol(rel_counts)){
 rel_counts[,i] <- (prop.table(rel_counts[,i]))*100
}
dg <- as.data.frame(rel_counts)
colnames(dg) <- c("cluster", "variable", "percentage")
group_info <- read.csv('WM_Targetfile.csv')
group_info <- group_info[, c("sample_ID", "Group")]

head(group_info)
head(dg)
match(group_info$sample_ID, dg$variable)a

Unnamed: 0_level_0,sample_ID,Group
Unnamed: 0_level_1,<chr>,<chr>
1,ST31,CWM
2,ST32,CWM
3,ST33,activeWML
4,ST34,NAWM
5,ST37,NAWM
6,ST38,NAWM


Unnamed: 0_level_0,cluster,variable,percentage
Unnamed: 0_level_1,<fct>,<fct>,<dbl>
1,0,ST31,66.90483
2,1,ST31,12.060589
3,2,ST31,9.374107
4,3,ST31,4.401258
5,4,ST31,5.230066
6,5,ST31,2.029151


In [90]:
identical(group_info$sample_ID[match(dg$variable, group_info$sample_ID)], as.character(dg$variable) )
dg$group <- group_info$Group[match(dg$variable, group_info$sample_ID)]
dg$group <- factor(dg$group, levels = unique(dg$group), labels = c('CWM', 'NAWM', 'active', 'act/inact'))

In [122]:
# plotting proportions: is there a CWM/NAWM enriched cluster? Is there a lesion-enriched cluster?
my_plot <- function(df, var, my_breaks, my_lims) {
  ggplot(data = df, aes(x = group, y = percentage, fill = group)) +
    facet_grid(~ {{var}}) +
    geom_boxplot(alpha = 1) +
    geom_point(shape = 21, size = 4) +
    scale_fill_manual(values = group_cols) +
    theme_classic() +
    theme(text = element_text(hjust = 0.5, face = "plain", size = (16)),
                    axis.text.x = element_blank(),
                    axis.ticks.x = element_blank(), 
                    plot.title = element_text(hjust = 0.5, face = "plain", size = (16)),
                    axis.title.y = element_text(face = "plain", size = (16)),
                    axis.text = element_text(size = (16), colour = "black"),
                    plot.subtitle = element_text(hjust = 0.5),
                    panel.background = element_blank(),
                    panel.border = element_blank(),
                    panel.grid.major = element_blank(), 
                    panel.grid.minor = element_blank(),
                    panel.grid = element_blank(),
                    axis.line = element_line(color = "black")
              ) +
    ylab("spots (%)") +
    NoLegend() +
    scale_y_continuous(breaks = {{my_breaks}}, limits = {{my_lims}})+
    expand_limits(y = 0)
}


p0 <- dg %>% filter(cluster == "0") %>% my_plot(var = "0", my_breaks = c(0, 20, 40, 60), my_lims = c(0, 75))
p1 <- dg %>% filter(cluster == "1") %>% my_plot(var = "1", my_breaks = c(0, 20, 40, 60), my_lims = c(0, 75))
p2 <- dg %>% filter(cluster == "2") %>% my_plot(var = "2", my_breaks = c(0, 20, 40), my_lims = c(0, 40))
p3 <- dg %>% filter(cluster == "3") %>% my_plot(var = "3", my_breaks = c(0, 5, 10, 15, 20), my_lims = c(0, 20))
p4 <- dg %>% filter(cluster == "4") %>% my_plot(var = "4", my_breaks = c(0, 5, 10, 15, 20), my_lims = c(0, 20))
p5 <- dg %>% filter(cluster == "5") %>% my_plot(var = "5", my_breaks = c(0, 1, 2, 3), my_lims = c(0, 3))

pdf('Routput/Seurat/Clustering/boxplot_proportions.pdf')
p0+p1+p2+p3+p4+p5+plot_layout(ncol = 3)
dev.off()

In [17]:
# for the legend 

my_plot <- function(df, var, my_breaks, my_lims) {
  ggplot(data = df, aes(x = group, y = percentage, fill = group)) +
    facet_grid(~ {{var}}) +
    geom_boxplot(alpha = 1) +
    geom_point(shape = 21, size = 4) +
    scale_fill_manual(values = group_cols) +
    theme_classic() +
    theme(text = element_text(hjust = 0.5, face = "plain", size = (16)),
                    axis.text.x = element_blank(),
                    axis.ticks.x = element_blank(), 
                    plot.title = element_text(hjust = 0.5, face = "plain", size = (16)),
                    axis.title.y = element_text(face = "plain", size = (16)),
                    axis.text = element_text(size = (16), colour = "black"),
                    plot.subtitle = element_text(hjust = 0.5),
                    panel.background = element_blank(),
                    panel.border = element_blank(),
                    panel.grid.major = element_blank(), 
                    panel.grid.minor = element_blank(),
                    panel.grid = element_blank(),
                    axis.line = element_line(color = "black"),
                    strip.text = element_text(size = 24, color = "black"),
                    aspect.ratio = 1
              ) +
    ylab("") +
    xlab("") +

    scale_y_continuous(breaks = {{my_breaks}}, limits = {{my_lims}}) +
    expand_limits(y = 0) 
}


p1 <- dg %>% filter(cluster == "0") %>% my_plot(var = "0", 
                                                my_breaks = seq(0, 70, 10), my_lims = NULL)

pdf(paste("Routput/Seurat/Figures/Boxplot_res", res, "_landscape_LEGEND.pdf"), width = 12, height = 12)
p1
dev.off()

In [None]:
sessionInfo()