# Cluster differential expressed genes

In [167]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # GSEA
        library(GSEABase), 
        library(fgsea), 
        library(msigdbr), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        library(ComplexHeatmap)
        
    )
)

In [168]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [169]:
# Source files
source("plotting_global.R")
source("bin/SeuratDEG.R")

In [170]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

In [171]:
options(warn=-1)

# Parameter settings

In [176]:
so_file <- "data/object/seurat_int.rds"

# Clustering 
cluster <- "integrated_snn_res.0.8"

# DEG
test_use <- "wilcox"

deg_compute <- FALSE
deg_file <- "result/DEG/treatment_int_deg_wilcox_logfc_0_minpct_0.rds"

# GSEA
gsea_compute <- FALSE
gsea_file <- "result/GSEA/treatment_int_gsea_wilcox_logfc_0_minpct_0.rds"

# Import Objects

In [173]:
so <- readRDS(so_file)
DefaultAssay(so) <- "RNA"

### For SCVI the data slot needs to be reset to count normalized data

In [174]:
if(cluster=="SCVI_snn_res.0.8") {so <- NormalizeData(so)}

### Remove assays except RNA 

In [175]:
# Just to be absolut certain that the downstream analysis is done on the RNA assay all other assays are removed
assays <- names(so@assays)[names(so@assays)!="RNA"]
if(length(assays) > 0) {for(assay in assays) {so[[assay]] <- NULL}}

# Type I interferone response (MSigDB; GO:0034340)

In [None]:
gs <- msigdbr(species="Mus musculus", category="C5")
gs_inf <- gs[gs$gs_exact_source %in% "GO:0034340", ]

In [None]:
so <- AddModuleScore(so, features=list(gs_inf$gene_symbol), assays="RNA", slot="data", ctrl=100, nbin=25, name="msGO0034340_RNA")

## Plot module score 

In [None]:
data <- so@meta.data[, c("treatment", "msGO0034340_RNA1", cluster)] %>%
    dplyr::group_by_at(c("treatment", cluster)) %>%
    dplyr::mutate(msGO0034340_RNA1=ifelse(msGO0034340_RNA1<0, 0, msGO0034340_RNA1)) %>%
    dplyr::summarise(mean_ms=mean(msGO0034340_RNA1)) %>%
    pivot_wider(names_from=treatment, values_from=mean_ms) %>%
    dplyr::mutate(cluster=paste("Cluster ", .[, 1, drop=TRUE])) %>%
    dplyr::mutate(fc=log2(CpG/NaCl)) %>%
    dplyr::mutate(fc=ifelse(is.infinite(fc), 0, fc)) %>% 
    dplyr::select(-CpG, -NaCl, -as.symbol(cluster)) %>%
    column_to_rownames(var="cluster") %>% t()

In [None]:
options(repr.plot.width=10, repr.plot.height=3)
ComplexHeatmap::pheatmap(
    main="Module score for interferon genes GO:00343402", 
    name="log2FC(MS)", 
    mat=data,  
    cellwidth=20, 
    cellheight=20, 
    scale="none",
    cluster_rows=FALSE,
    cluster_cols=FALSE,
    show_rownames=FALSE,
    show_colnames=TRUE, 
    color=colorRampPalette(c(color$treatment["NaCl"], "white", color$treatment["CpG"]))(50), 
    breaks=seq(-max(abs(na.omit(data[1, ]))), max(abs(na.omit(data[1, ]))), 0.001), 
    border_color=NA
)

# Differently gene expression (DGE) analysis

In [None]:
# Set Ident and split by test group 
Idents(so) <- "treatment"
so <- SplitObject(so, split.by=cluster)

# Sort objects by cluster id 
so <- so[as.character(1:length(so))]

In [None]:
so <- lapply(so, function(so) {
    
    cnt <- GetAssayData(so, assay="RNA", slot="counts")
    cnt <- cnt[rowSums(cnt) > 0, ]
    
    so <- subset(so, features=rownames(cnt))
    
    return(so)
    
}
      )

In [177]:
if(deg_compute) {
    
    deg <- list()
    for(i in names(so)) {
        
        deg[[i]] <- FindMarkers(so[[i]], ident.1="CpG", ident.2="NaCl", logfc.threshold=0, min.pct=0, assay="RNA", test.use=test_use, verbose=TRUE)
        
    }
    
    saveRDS(deg, deg_file)
    
} else {
    
    deg <- readRDS(deg_file)
    
}

In [None]:
# Change names to cluster 
names(deg) <- paste0("Cluster ", 1:length(deg))

## Volcaon plots 

In [None]:
# Add cluster information to deg for plotting
for(i in 1:length(deg)) {deg[[i]] <- mutate(deg[[i]], cluster=i)}

In [None]:
source("bin/SeuratQC.R")
vp <- lapply(deg, deg_volcano_plot, log2_thold=0.25, adjpvalue_thold=0.05)

In [None]:
options(repr.plot.width=20, repr.plot.height=30)
wrap_plots(vp, ncol=4)

# Gene set

In [None]:
gs=msigdbr(species="Mus musculus", category="H")
gs_list=split(x=gs$gene_symbol, f=gs$gs_name)

# Ranked gene list by Signed -log10 adjusted p-values

In [None]:
ranks <- list()
for(i in 1:length(deg)) {
    
    deg_i <- deg[[i]]
    deg_i$p_val_adj <- ifelse(deg_i$p_val_adj == 0, .Machine$double.xmin, deg_i$p_val_adj)
    deg_i$sign_log_adj_p_values <- -log10(deg_i$p_val_adj) * sign(deg_i$avg_log2FC)
    
    ranks_i <- deg_i$sign_log_adj_p_values
    names(ranks_i) <- rownames(deg_i)
    
    ranks_i <- ranks_i[order(-ranks_i)]
    
    ranks[[i]] <- ranks_i
    
}

In [None]:
names(ranks) <- paste0("Cluster ", 1:length(ranks))

## Plot ranked gene lists

In [None]:
data <- list()
for(i in names(ranks)) {
    
    data[[i]] <- data.frame(
        
        score=ranks[[i]], 
        rank=1:length(ranks[[i]]), 
        cluster=i, 
        treatment=ifelse(ranks[[i]]>0, "CpG", ifelse(ranks[[i]]<0, "NaCl", NA))
        
    )
    
}

# Combine list and set factor level for cluster
data <- do.call("rbind", data)
data$cluster <- factor(data$cluster, levels=names(ranks))

In [None]:
options(repr.plot.width=20, repr.plot.height=9)
ggplot(data, aes(x=rank, y=score, fill=treatment)) + 
    geom_bar(stat="identity") + 
    scale_fill_manual(values=color$treatment, na.value="gray") + 
    facet_wrap(~cluster, ncol=4) + 
    theme(legend.position="none")

# GSEA 

In [None]:
ranks <- ranks[sapply(ranks, function(x) {sum(10^-abs(x) <= 0.05) >= 10})]

In [None]:
if(gsea_compute) {
    
    gsea <- list()
    for(i in 1:length(ranks)) {
        
        gsea[[i]] <- fgsea(
            
            pathways=gs_list,
            stats=ranks[[i]],
            minSize=15,
            maxSize=500
            
        )
    }

    saveRDS(gsea, gsea_file)
    
} else {
    
    gsea <- readRDS(gsea_file)
    
}

## Filter add cluster information to gsea

In [None]:
# Set gsea list names to cluster 
names(gsea) <- names(ranks) 
gsea <- gsea[!sapply(gsea, is.null)]
for(i in names(gsea)) {gsea[[i]]$cluster <- i} 

# GSEA plotting 

In [None]:
gsea_plot <- lapply(gsea, function(gsea, padj_thr=0.25, top=20) {
    
    # Correct pathway names 
    names <- str_split(gsea$pathway, pattern="HALLMARK_")
    names <- sapply(names, "[[", 2)
    names <- str_split(names, pattern="_")
    names <- do.call("c", lapply(names, paste, collapse=" "))
    gsea$names <- names

    # Add color 
    gsea$treatment <- ifelse(sign(gsea$ES)==1, "CpG", "NaCl")
    gsea$treatment <- ifelse(gsea$padj<=padj_thr, gsea$treatment, NA)
    
    # Set 0 pvalues to min value 
    gsea$padj <- ifelse(gsea$padj == 0, .Machine$double.xmin, gsea$padj)

    # Signed -log10 adjusted p-values 
    gsea$sign_log_adj_p_values <- -log10(gsea$padj) * sign(gsea$ES)
    
    # Set axis limits 
    if(max(abs(gsea$sign_log_adj_p_values)) >= 2) {
        
        x_max <- max(abs(gsea$sign_log_adj_p_values)) + 0.5
                     
    } else {
        
        x_max <- 2.5
        
    }
    
    # Take top 20 hits by adjusted pvalue
    gsea <- gsea[order(padj)]
    gsea <- gsea[1:top, ]
    
    # Order by adjusted p value
    gsea <- gsea[order(sign_log_adj_p_values)]

    # Set names to factor
    gsea$names <- factor(gsea$names, levels=gsea$names)
    
    # Plot 
    plot <- ggplot(gsea, aes(x=sign_log_adj_p_values, y=names, color=treatment)) + 
        geom_point(aes(size=abs(NES))) +
        geom_vline(xintercept=-log10(padj_thr)) + 
        geom_vline(xintercept=log10(padj_thr)) +
        ggtitle(gsea$cluster[1]) +
        xlab("Signed -log10 adj. p-value") + ylab("") + 
        xlim(-x_max, x_max) + 
        scale_color_manual(values=color$treatment, na.value="gray") +
        guides(
            color=guide_legend(order=1), 
            size=guide_legend(order=2, title="Abs. (NES)")
        ) + 
        theme(
            legend.position="right", 
            legend.justification="top"
        )
    
    return(plot)
    
}
                   )

In [None]:
options(repr.plot.width=25, repr.plot.height=floor(length(gsea_plot)/4) * 7.5)
wrap_plots(gsea_plot, ncol=4)