# Limma - Differential expression analysis (DEA)

https://support.bioconductor.org/p/p133524/

In [None]:
library_load <- suppressMessages(
    
    suppressWarnings(
    
            list(
                
                # Seurat 
                library(Seurat),

                # RNA seq
                library(edgeR), 
                
                # GSEA 
                library(msigdbr), 
                library(fgsea), 

                # Data 
                library(tidyverse), 
                library(openxlsx), 

                # Plotting 
                library(ComplexHeatmap), 
                library(circlize), 
                library(viridis), 
                library(ggplotify), 
                library(patchwork), 
                library(RColorBrewer), 

                # Python 
                library(reticulate)
            
            )
    )
)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.9.19-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
options(warn=-1)
options(dplyr.summarise.inform=FALSE)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter

In [None]:
cache_test <- FALSE

cnt_min=1
cell_min=3

adj_pval_thr <- 0.05
log2_thr <- 0

# Helper function

In [None]:
feature_select <- function(so, cnt_min=3, cell_min=1) {
    
    cnt <- GetAssayData(so, assay="RNA", layer="counts")
    cnt <- cnt[rowSums(cnt>=cnt_min)>=cell_min, ]

    so <- CreateSeuratObject(counts=cnt, meta.data=so@meta.data)
    
    return(so)
    
}

In [None]:
limma_fit <- function(so) {

    # Get Counts 
    cnt <- GetAssayData(so, assay="RNA", slot="counts")
        
    # Prepare count data for split 
    cnt <- as.matrix(cnt)

    # Get grouping variables from cnt matrix
    infection <- so[["infection", drop=TRUE]] %>% as.character()
    sample_group <- so[["sample_group", drop=TRUE]] %>% as.character()
    
    # Design matrix 
    design <- model.matrix(~0+infection)
    colnames(design) <- gsub("infection", "", colnames(design))
    
    # Run voomLmFit
    fit <- edgeR::voomLmFit(
        
        counts=cnt, 
        design=design, 
        block=NULL, 
        normalize.method="none", 
        lib.size=NULL, 
        sample.weights=FALSE, 
        plot=FALSE

    )
    
    return(fit)
    
}

In [None]:
contrasts_fit <- function(fit, contrasts_vec) {
    

    if (all(contrasts_vec %in% colnames(fit[[1]]))) {
        
        # Contrast fit
        contrasts_vec <- paste0(contrasts_vec[1], "-", contrasts_vec[2])
        
        contrasts <- limma::makeContrasts(contrasts=contrasts_vec, levels=colnames(fit[[1]]))
        contrasts_fit <- limma::contrasts.fit(fit, contrasts=contrasts)

        # eBayes fit 
        efit <- limma::eBayes(contrasts_fit)

        # Get result table
        res <- limma::topTable(efit, sort.by="P", n=Inf, p.value=1, lfc=0, coef=1)
        res$gene <- rownames(res)
        
    } else {
        
        res <- NULL
        
    }

    
    return(res)
    
}

In [None]:
vp <- function(dea, log2_thr=0, adj_pval_thr=0.1, top_label=10, title=NULL, color_pos=c("pos"="#0000ffff"), color_neg=c("neg"="#fd8008ff")) {
    
    # Annotate entries significance by log2_thr and adj_pval_thr
    dea$adj.P.Val <- ifelse(dea$adj.P.Val == 0, min(dea$adj.P.Val), dea$adj.P.Val)
    dea$sig <- ifelse((abs(dea$logFC) >= log2_thr) & (dea$adj.P.Val <= adj_pval_thr), "s", "ns")
    
    
    # Set color based on significance and direction of dea e.g. positive and negative 
    dea$color <- ifelse(dea$sig == "s" & dea$logFC > 0, names(color_pos), "n.s.")
    dea$color <- ifelse(dea$sig == "s" & dea$logFC < 0, names(color_neg), dea$color)
    
    color <- c(color_pos, "gray", "black", color_neg)
    names(color) <- c(names(color_pos), "n.s.", "black", names(color_neg))
    
    # Create labels based log2FC and adj.P.Val
    dea_pos <- dea[dea$logFC > 0 & dea$sig == "s", ]
    dea_neg <- dea[dea$logFC < 0 & dea$sig == "s", ]

    pos_labels_log2FC <- dea_pos[rev(order(dea_pos$logFC)), ][1:top_label, ] %>% rownames()
    neg_labels_log2FC <- dea_neg[order(dea_neg$logFC), ][1:top_label, ] %>% rownames()
    
    pos_labels_adj.P.Val <- dea_pos[order(dea_pos$adj.P.Val), ][1:top_label, ] %>% rownames()
    neg_labels_adj.P.Val <- dea_neg[order(dea_neg$adj.P.Val), ][1:top_label, ] %>% rownames()
    
    pos_labels <- c(pos_labels_log2FC, pos_labels_adj.P.Val)
    neg_labels <- c(neg_labels_log2FC, neg_labels_adj.P.Val)
    
    # Set labels 
    dea$label <- ifelse(rownames(dea) %in% c(pos_labels, neg_labels), rownames(dea), NA)

    # Plot
    vp <- ggplot(dea, aes(x=AveExpr, y=logFC, fill=dea$color, label=label), alpha=1) + 
    
        geom_point(size=4, shape=21, color="white") + 
        geom_hline(aes(yintercept=0), linetype="dotted", colour="black") +
        ggrepel::geom_text_repel(segment.color="black", force=20, force_pull=1, max.overlaps=getOption("ggrepel.max.overlaps", default=100), size=5, alpha=1, guide="none", segment.size=0.1, color='black') + 
        ylim(-max(abs(dea$logFC)), max(abs(dea$logFC))) +  
        ggtitle(title) + xlab("average expression [ratio]") + ylab("average log2FC") + 
        scale_fill_manual(values=color, name="DEA") + 
    
        guides(
            
            color=guide_legend(order=1, title="Group", size=2, keywidth=0.75, keyheight=0.75), 
            alpha="none"
            
        ) + 
    
    theme(
        
        legend.position="right", 
        aspect.ratio=1
        
    )
    
    return(vp)
    
}

In [None]:
gsea <- function(res_dea, gene_set) {
    
    # Set gene name 
    res_dea$gene_name <- rownames(res_dea)
    
    # Make ranks 
    res_dea$adj.P.Val <- ifelse(res_dea$adj.P.Val==0, min(res_dea$adj.P.Val	[res_dea$adj.P.Val>0]), res_dea$adj.P.Val)
    res_dea$sign_log_adj_p_values <- -log10(res_dea$adj.P.Val) * sign(res_dea$logFC)
    
    ranks <- res_dea$sign_log_adj_p_values
    names(ranks) <- res_dea$gene_name
    ranks <- ranks[order(ranks)]
    ranks <- rev(ranks)
    
    # Retain only pathways that overlap with dea lsit
    gene_set_filter <- lapply(gene_set, function(x) {sum(names(ranks) %in% x)>=1})
    gene_set <- gene_set[unlist(gene_set_filter)]

    gsea_res <- fgsea(
        
        pathways=gene_set,
        stats=ranks,
        nperm=100000, 
        minSize=5,
        maxSize=Inf
        
    )
    
    return(gsea_res)
    
}

In [None]:
gsea_pl <- function(gsea, pval_thr=0.1, title=NULL, size_range=5, pathway_suffix=NULL, top=20) {
    
    # Set GSEA data frame 
    gsea <- as.data.frame(gsea)
    gsea <- na.omit(gsea) 
    
    # Set color names 
    color <- color$infection
    
    # Fix pathway names
    if(!is.null(pathway_suffix)) {gsea$pathway <- gsub(pathway_suffix, "", gsea$pathway)}
    gsea$pathway <- gsub("_", " ", gsea$pathway)
    
    # Filter hits 
    gsea_up <- gsea[sign(gsea$NES)==+1, ]
    gsea_down <- gsea[sign(gsea$NES)==-1, ]
    
    gsea_up <- gsea_up[order(gsea_up$pval), ][1:top, ]
    gsea_down <- gsea_down[order(gsea_down$pval), ][1:top, ]
    
    gsea <- rbind(gsea_up, gsea_down)
    gsea <- na.omit(gsea)
    gsea <- distinct(gsea)
    
    # Add color 
    gsea$color <- ifelse(sign(gsea$ES)==-1, names(color)[1], names(color)[2])
    gsea$color <- ifelse(gsea$pval<=pval_thr, gsea$color, NA)

    gsea$sign_log_pval_values <- -log10(gsea$pval) * sign(gsea$ES)
    
    # Short pathway names 
    gsea <- mutate(gsea, pathway = str_sub(pathway, 1, 40))
    
    # Order hits 
    gsea <- gsea[order(gsea$sign_log_pval_values), ]
    gsea$pathway <- factor(gsea$pathway, levels=gsea$pathway)

    x_max <- max(abs(gsea$sign_log_pval_values)) + 0.1
    if(x_max<abs(log10(pval_thr))) {x_max <- abs(log10(pval_thr)) + 0.1}
    
    # Plot 
    plot <- ggplot(gsea, aes(x=sign_log_pval_values, y=pathway, color=color)) + 
        
        geom_vline(xintercept=-log10(pval_thr), linetype="dashed") + 
        geom_vline(xintercept=log10(pval_thr), linetype="dashed") +
    
        geom_point(aes(size=abs(NES))) +

        ggtitle(title) +
        xlab("Signed -log10 adj. p-value") + ylab("") + 
        xlim(-x_max, x_max) + 
        scale_color_manual(values=color, na.value="black", drop=FALSE) +
        scale_size(range=c(0, size_range)) + 
        guides(
            
            color=guide_legend(order=1, title="Group", size=5, keywidth=0.75, keyheight=0.75), 
            size=guide_legend(order=2, title="Abs. (NES)", keywidth=0.75, keyheight=0.75)
            
        ) +
    
        theme(
            
            legend.position="right", 
            legend.justification="top", 
            axis.text.y=element_text(hjust=1, vjust=0.5)
            
        )
    
    return(plot)
    
}

# Run DEA

In [None]:
# Load Seurat object 
so <- readRDS("data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.rds")

In [None]:
# Order results 
cell_type_order <- levels(so$leiden_cell_type_main)

In [None]:
# Prepare data 
so <- SplitObject(so, split.by="leiden_cell_type_main")

In [None]:
# Order cell types 
so <- so[cell_type_order]

In [None]:
# Subset features
so <- lapply(so, feature_select, cnt_min=cnt_min, cell_min=cell_min)

In [None]:
# DEA voom fit 
if(!cache_test) {
    
    fit <- lapply(so, limma_fit)
    saveRDS(fit, "result/dea/scRNAseq/limma/baseline_vs_cpg.rds")
    
} else {
    
    fit <- readRDS("result/dea/scRNAseq/limma/baseline_vs_cpg.rds")
    
}

## Baseline vs CpG

In [None]:
contrasts_vec <- c("CpG", "Baseline")

In [None]:
res_dea_1 <- lapply(fit, contrasts_fit, contrasts_vec=contrasts_vec)
res_dea_1 <- res_dea_1[!lapply(res_dea_1, is.null) %>% unlist()]

In [None]:
openxlsx::write.xlsx(res_dea_1, paste0("result/dea/scRNAseq/limma/baseline_vs_cpg.xlsx"))
saveRDS(res_dea_1, paste0("result/dea/scRNAseq/limma/baseline_vs_cpg.rds"))

## Volcano 

In [None]:
options(repr.plot.width=4*6, repr.plot.height=6)

vp_1 <- lapply(names(res_dea_1), function(i) vp(res_dea_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, color_pos=unlist(color$infection["CpG"]), color_neg=unlist(color$infection["Baseline"])) + theme_global_set(size_select=1))
               
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/limma/baseline_vs_cpg.pdf"), onefile=TRUE, width=6, height=6)

for (i in seq(length(vp_1))) {plot(vp_1[[i]])}

dev.off()

## Gene set enrichment analysis (Hallmark)

In [None]:
gs_mm <- msigdbr(species="Mus musculus", category="H", subcategory=NULL)
gs_mm <- split(x=gs_mm$gene_symbol, f=gs_mm$gs_name)

In [None]:
res_gsea_1 <- lapply(res_dea_1, function(res_dea) gsea(res_dea=res_dea, gene_set=gs_mm))

In [None]:
options(repr.plot.width=4*7, repr.plot.height=12)

gsea_pl_1 <- lapply(names(res_gsea_1), function(i) gsea_pl(res_gsea_1[[i]], pval_thr=0.1, title=i, size_range=5, pathway_suffix="HALLMARK", top=25))
                    
ggpubr::ggarrange(plotlist=gsea_pl_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

## Gene set enrichment analysis (Hallmark)

In [None]:
gs_mm <- msigdbr(species="Mus musculus", category="C2", subcategory="WIKIPATHWAYS")
gs_mm <- split(x=gs_mm$gene_symbol, f=gs_mm$gs_name)

In [None]:
res_gsea_1 <- lapply(res_dea_1, function(res_dea) gsea(res_dea=res_dea, gene_set=gs_mm))

In [None]:
options(repr.plot.width=4*7, repr.plot.height=12)

gsea_pl_1 <- lapply(names(res_gsea_1), function(i) gsea_pl(res_gsea_1[[i]], pval_thr=0.1, title=i, size_range=5, pathway_suffix="WP", top=25))
                    
ggpubr::ggarrange(plotlist=gsea_pl_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

# Session info 

In [None]:
sessionInfo()