# Cluster differential expressed genes

In [6]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # GSEA
        library(GSEABase), 
        library(fgsea), 
        library(msigdbr), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        library(ComplexHeatmap)
        
    )
)

In [7]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [8]:
# Source files
source("plotting_global.R")
source("bin/plotDEA.R")

In [9]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

In [10]:
options(warn=-1)

# Parameter settings

In [11]:
# Seurat object
so_file <- "data/object/so_sct_int_hvg4000.rds"

# Grouping to compare treatment groups
group <- "seurat_clusters"

# Assay to use 
assay <- "RNA"

# DEA
test_use <- "wilcox"
dea_file <- "result/dea/treatment_louvain_wilcox.rds"

# GSEA
gsea_file <- "result/gsea/treatment_louvain_wilcox.rds"

# Import Objects

In [12]:
so <- readRDS(so_file)

In [14]:
# DEV 
DefaultAssay(so) <- "integrated"
so <- FindNeighbors(so, dims=1:10, k.param=20, verbose=FALSE)
so <- FindClusters(so, verbose=FALSE, resolution=0.5, algorithm=1, group.singletons=TRUE)
so <- RunUMAP(so, dims=1:100, n.neighbors=50, verbose=FALSE)

In [15]:
DefaultAssay(so) <- assay
so <- SetIdent(so, value=group)

# Normalize data 

In [16]:
if(assay=="SCT") {
    
    so <- PrepSCTFindMarkers(so)
    
} else {
    
    so <- NormalizeData(so)
    
}

# Split data by group

In [17]:
# Split Seurat object and rename list by group
so <- SplitObject(so, split.by=group)

# Different expression analysis (DEA)

In [None]:
dea <- list()
for(i in names(so)) {
        
    dea[[i]] <- FindMarkers(so[[i]], ident.1="CpG", ident.2="NaCl", logfc.threshold=0, min.pct=0, assay=assay, slot="data", group.by='treatment', subset.ident=i, test.use=test_use, recorrect_umi = FALSE, verbose=TRUE)

}
saveRDS(dea, dea_file)

# dea <- readRDS(dea_file)

## Rename 

In [None]:
# names(dea) <- lapply(so, function(x) x$leiden_annotation[1])

## Volcaon plots 

In [None]:
vp <- lapply(names(dea), function(i) dea_volcano_plot(dea[[i]], log2_thold=0.25, adjpvalue_thold=0.01, title=i))

In [None]:
options(repr.plot.width=30, repr.plot.height=floor(length(vp)/5) * 6.5)
wrap_plots(vp, ncol=5)

# Gene set enrichment analysis (GSEA)

# Ranked gene list by Signed -log10 adjusted p-values

In [None]:
ranks <- list()
for(i in names(dea)) {
    
    dea_i <- dea[[i]]
    dea_i$p_val_adj <- ifelse(dea_i$p_val_adj==0, .Machine$double.xmin, dea_i$p_val_adj)
    dea_i$sign_log_adj_p_values <- -log10(dea_i$p_val_adj) * sign(dea_i$avg_log2FC)
    
    ranks_i <- dea_i$sign_log_adj_p_values
    names(ranks_i) <- rownames(dea_i)
    
    ranks_i <- ranks_i[order(-ranks_i)]
    
    ranks[[i]] <- ranks_i
    
}

# GSEA 

In [None]:
# Test if ranks contain more then 10 significant genes with pval <= 0.05
ranks <- ranks[sapply(ranks, function(x) {sum(10^-abs(x)<=0.05)>=10})]

In [None]:
# Get gene set object
gs <- msigdbr(species="Mus musculus", category="H")
gs_list <- split(x=gs$gene_symbol, f=gs$gs_name)

In [None]:
gsea <- list()
for(i in names(ranks)) {
        
    gsea[[i]] <- fgsea(
            
        pathways=gs_list,
        stats=ranks[[i]],
        minSize=15,
        maxSize=600,
        nperm=10000
        
    )
}
saveRDS(gsea, gsea_file)

In [None]:
# Test if GSEA failed
gsea <- gsea[!sapply(gsea, is.null)]

# GSEA plotting 

In [None]:
gsea_dot_plot <- function(gsea, padj_thr=0.25, top=20, category_pattern="HALLMARK_", title=NULL) {
    
    # Correct pathway names 
    category <- str_split(gsea$pathway, pattern=category_pattern)
    category <- sapply(category, "[[", 2)
    category <- str_split(category, pattern="_")
    category <- do.call("c", lapply(category, paste, collapse=" "))
    gsea$category <- category
    
    # Set 0 or Inf pvalue to min number 
    gsea$padj <- ifelse(gsea$padj==0 | is.infinite(gsea$padj), .Machine$double.xmin, gsea$padj)
    gsea$padj <- ifelse(gsea$pval==0 | is.infinite(gsea$pval), .Machine$double.xmin, gsea$pval)

    # Add color 
    gsea$treatment <- ifelse(sign(gsea$ES)==1, "CpG", "NaCl")
    gsea$treatment <- ifelse(gsea$padj<=padj_thr, gsea$treatment, NA)

    # Signed -log10 adjusted p-values 
    gsea$sign_log_adj_p_values <- -log10(gsea$padj) * sign(gsea$ES)

    # Set axis limits 
    x_max <- max(abs(gsea$sign_log_adj_p_values)) + 0.5
    
    # Take top 20 hits by adjusted pvalue
    gsea <- gsea[order(padj)]
    gsea <- gsea[1:top, ]

    # Order by adjusted p value
    gsea <- gsea[order(sign_log_adj_p_values)]

    # Set category to factor
    gsea$category <- factor(gsea$category, levels=gsea$category)

    # Plot 
    plot <- ggplot(gsea, aes(x=sign_log_adj_p_values, y=category, color=treatment)) + 
        
        geom_vline(xintercept=-log10(padj_thr)) + 
        geom_vline(xintercept=log10(padj_thr)) +
        geom_point(aes(size=abs(NES))) +

        ggtitle(title) +
        xlab("Signed -log10 adj. p-value") + ylab("") + 
        xlim(-x_max, x_max) + 
        scale_color_manual(values=color$treatment, na.value="gray") +
        guides(
            color=guide_legend(order=1), 
            size=guide_legend(order=2, title="Abs. (NES)")
        ) + 

        theme(
            legend.position="right", 
            legend.justification="top"
        )
    
    return(plot)
    
}

In [None]:
options(repr.plot.width=25, repr.plot.height=floor(length(gsea)/4) * 7.5)
dp <- lapply(names(gsea), function(i) gsea_dot_plot(gsea[[i]], title=i))
wrap_plots(dp, ncol=4)