# Cluster differential expressed genes

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # GSEA
        library(GSEABase), 
        library(fgsea), 
        library(msigdbr), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        library(ComplexHeatmap)
        
    )
)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/plotDEA.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

In [None]:
options(warn=-1)

# Parameter settings

In [None]:
# Seurat object
so_file <- "data/object/int.rds"

# Grouping to compare treatment groups
group <- "leiden_annotation"

# DEA
test_use <- "MAST"
dea_file <- "result/dea/treatment_leiden.rds"

# GSEA
gsea_file <- "result/gsea/treatment_leiden.rds"

# Import Objects

In [None]:
so <- readRDS(so_file)

In [None]:
DefaultAssay(so) <- "RNA"
so <- SetIdent(so, value="treatment")

# Split data by group

In [None]:
# Split Seurat object and rename list by group
so <- SplitObject(so, split.by=group)
names(so) <- lapply(so, function(so) so@meta.data[, group][1]) %>% names()

# Different expression analysis (DEA)

In [None]:
dea <- list()
for(i in names(so)) {
        
    # Remove genes with 0 expression after splitting the data 
    cnt <- GetAssayData(so[[i]], assay="RNA", slot="counts")
    cnt <- cnt[rowSums(cnt) > 0, ]
    so[[i]] <- subset(so[[i]], features=rownames(cnt))
        
    dea[[i]] <- FindMarkers(so[[i]], ident.1="CpG", ident.2="NaCl", logfc.threshold=0, min.pct=0, assay="RNA", test.use=test_use, verbose=TRUE)

}
saveRDS(dea, dea_file)

## Volcaon plots 

In [None]:
# Add group 
for(i in 1:length(dea)) {dea[[i]] <- mutate(dea[[i]], cluster=i)}

In [None]:
dea <- dea[1:4]

In [None]:
vp <- lapply(dea, dea_volcano_plot, log2_thold=0, adjpvalue_thold=0.05)

In [None]:
options(repr.plot.width=20, repr.plot.height=5)
wrap_plots(vp, ncol=5)

# Gene set

In [None]:
gs=msigdbr(species="Mus musculus", category="H")
gs_list=split(x=gs$gene_symbol, f=gs$gs_name)

# Ranked gene list by Signed -log10 adjusted p-values

In [None]:
ranks <- list()
for(i in names(dea)) {
    
    dea_i <- dea[[i]]
    dea_i$p_val_adj <- ifelse(dea_i$p_val_adj == 0, .Machine$double.xmin, dea_i$p_val_adj)
    dea_i$sign_log_adj_p_values <- -log10(dea_i$p_val_adj) * sign(dea_i$avg_log2FC)
    
    ranks_i <- dea_i$sign_log_adj_p_values
    names(ranks_i) <- rownames(dea_i)
    
    ranks_i <- ranks_i[order(-ranks_i)]
    
    ranks[[i]] <- ranks_i
    
}

## Plot ranked gene lists

In [None]:
data <- list()
for(i in names(ranks)) {
    
    data[[i]] <- data.frame(
        
        score=ranks[[i]], 
        rank=1:length(ranks[[i]]), 
        cluster=i, 
        treatment=ifelse(ranks[[i]]>0, "CpG", ifelse(ranks[[i]]<0, "NaCl", NA))
        
    )
    
}

# Combine list and set factor level for cluster
data <- do.call("rbind", data)
data$cluster <- factor(data$cluster, levels=names(ranks))

In [None]:
options(repr.plot.width=20, repr.plot.height=9)
ggplot(data, aes(x=rank, y=score, fill=treatment)) + 
    geom_bar(stat="identity") + 
    scale_fill_manual(values=color$treatment, na.value="gray") + 
    facet_wrap(~cluster, ncol=4) + 
    theme(legend.position="none")

# GSEA 

In [None]:
ranks <- ranks[sapply(ranks, function(x) {sum(10^-abs(x) <= 0.05) >= 10})]

In [None]:
if(gsea_compute) {
    
    gsea <- list()
    for(i in 1:length(ranks)) {
        
        gsea[[i]] <- fgsea(
            
            pathways=gs_list,
            stats=ranks[[i]],
            minSize=15,
            maxSize=500
            
        )
    }

    saveRDS(gsea, gsea_file)
    
} else {
    
    gsea <- readRDS(gsea_file)
    
}

## Filter add cluster information to gsea

In [None]:
# Set gsea list names to cluster 
names(gsea) <- names(ranks)
gsea <- gsea[!sapply(gsea, is.null)]
for(i in names(gsea)) {gsea[[i]]$group <- i} 

# GSEA plotting 

In [None]:
gsea_plot <- lapply(gsea, function(gsea, padj_thr=0.25, top=20) {
    
    # Correct pathway names 
    names <- str_split(gsea$pathway, pattern="HALLMARK_")
    names <- sapply(names, "[[", 2)
    names <- str_split(names, pattern="_")
    names <- do.call("c", lapply(names, paste, collapse=" "))
    gsea$names <- names

    # Add color 
    gsea$treatment <- ifelse(sign(gsea$ES)==1, "CpG", "NaCl")
    gsea$treatment <- ifelse(gsea$padj<=padj_thr, gsea$treatment, NA)
    
    # Set 0 pvalues to min value 
    gsea$padj <- ifelse(gsea$padj == 0, .Machine$double.xmin, gsea$padj)

    # Signed -log10 adjusted p-values 
    gsea$sign_log_adj_p_values <- -log10(gsea$padj) * sign(gsea$ES)
    
    # Set axis limits 
    x_max <- 2.5
    
    # Take top 20 hits by adjusted pvalue
    gsea <- gsea[order(padj)]
    gsea <- gsea[1:top, ]
    
    # Order by adjusted p value
    gsea <- gsea[order(sign_log_adj_p_values)]

    # Set names to factor
    gsea$names <- factor(gsea$names, levels=gsea$names)
    
    # Plot 
    plot <- ggplot(gsea, aes(x=sign_log_adj_p_values, y=names, color=treatment)) + 
        geom_point(aes(size=abs(NES))) +
        geom_vline(xintercept=-log10(padj_thr)) + 
        geom_vline(xintercept=log10(padj_thr)) +
        ggtitle(gsea$group[1]) +
        xlab("Signed -log10 adj. p-value") + ylab("") + 
        xlim(-x_max, x_max) + 
        scale_color_manual(values=color$treatment, na.value="gray") +
        guides(
            color=guide_legend(order=1), 
            size=guide_legend(order=2, title="Abs. (NES)")
        ) + 
        theme(
            legend.position="right", 
            legend.justification="top"
        )
    
    return(plot)
    
}
                   )

In [None]:
# options(repr.plot.width=25, repr.plot.height=floor(length(gsea_plot)/4) * 7.5)
# wrap_plots(gsea_plot, ncol=4)

In [None]:
# gsea_plot