# Limma - Differential expression analysis (DEA)

https://support.bioconductor.org/p/p133524/

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    suppressWarnings(
    
            list(
                
                # Seurat 
                library(Seurat), 
                
                # RNA seq
                library(edgeR), 

                # Data 
                library(tidyverse), 
                library(openxlsx), 

                # Plotting 
                library(ComplexHeatmap), 
                library(circlize), 
                library(viridis), 
                library(ggplotify), 
                library(patchwork), 
                library(RColorBrewer), 

                # Python 
                library(reticulate)
            
            )
    )
)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.10.16-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
options(warn=-1)
options(dplyr.summarise.inform=FALSE)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter

In [None]:
pct_min=5

cnt_min=3
cell_min=3

adj_pval_thr <- 0.01
log2_thr <- 0.50

adj_pval_gsea_thr <- 0.1

# Import data

In [None]:
# Load Seurat object 
so <- readRDS("data/scRNAseq/object/pp.rds")

# Helper function

In [None]:
feature_select <- function(so, pct_min=0, cnt_min=3, cell_min=1) {

    # Get count matrix 
    cnt <- GetAssayData(so, assay="RNA", layer="counts")

    # Filter by percentage 
    cnt <- cnt[(100*rowSums(cnt>0)/ncol(cnt))>=pct_min, ]

    # Filer by expression 
    cnt <- cnt[rowSums(cnt>=cnt_min)>=cell_min, ]

    # return genes 
    genes <- rownames(cnt)
        
    return(genes)
    
}

In [None]:
limma_fit <- function(so, pct_min=0, cnt_min=0, cell_min=0) {

    message(paste0("Limma fit ", unique(so$celltype_low), "(", ncol(so), ")"))

    genes <- lapply(unique(so$sample_group), function(i) {

        genes_i <- feature_select(so[, so$sample_group==i], pct_min=pct_min, cnt_min=cnt_min, cell_min=cell_min)
        
    }
          )

    genes <- do.call("c", genes) %>% unique()

    message(paste0("Using ", length(genes), " genes after filtering"))
    so <- so[genes, ]
    
    # Get Counts 
    cnt <- GetAssayData(so, assay="RNA", slot="counts")
        
    # Prepare count data for split 
    cnt <- as.matrix(cnt)

    # Get grouping variables from cnt matrix
    group <- as.character(so[["group", drop=TRUE]])
    
    # Design matrix 
    design <- model.matrix(~0+group)
    colnames(design) <- gsub("group", "", colnames(design))
    
    # Run voomLmFit
    fit <- edgeR::voomLmFit(
        
        counts=cnt, 
        design=design, 
        normalize.method="none", 
        lib.size=NULL, 
        sample.weights=FALSE, 
        plot=FALSE

    )
    
    return(fit)
    
}

In [None]:
contrasts_fit <- function(fit, contrasts_vec) {
    

    if (all(contrasts_vec %in% colnames(fit[[1]]))) {

        # Store cell numbers 
        n_cells_1 <- sum(fit$design[, contrasts_vec[1]])
        n_cells_2 <- sum(fit$design[, contrasts_vec[2]])
        
        # Contrast fit
        contrasts_vec <- paste0(contrasts_vec[1], "-", contrasts_vec[2])
        
        contrasts <- limma::makeContrasts(contrasts=contrasts_vec, levels=colnames(fit[[1]]))
        contrasts_fit <- limma::contrasts.fit(fit, contrasts=contrasts)

        # eBayes fit 
        efit <- limma::eBayes(contrasts_fit)

        # Get result table
        res <- limma::topTable(efit, sort.by="P", n=Inf, p.value=1, lfc=0, coef=1)
        res$gene <- rownames(res)

        # Add cell number 
        res$n_cells_1 <- n_cells_1
        res$n_cells_2 <- n_cells_2
        
    } else {
        
        res <- NULL
        
    }

    return(res)
    
}

In [None]:
vp <- function(dea, log2_thr=0, adj_pval_thr=0.1, top_label=10, title=NULL, color_pos=c("pos"="#0000ffff"), color_neg=c("neg"="#fd8008ff")) {
    
    # Annotate entries significance by log2_thr and adj_pval_thr
    dea$adj.P.Val <- ifelse(dea$adj.P.Val == 0, min(dea$adj.P.Val), dea$adj.P.Val)
    dea$sig <- ifelse((abs(dea$logFC) >= log2_thr) & (dea$adj.P.Val <= adj_pval_thr), "s", "ns")
    
    # Set color based on significance and direction of dea e.g. positive and negative 
    dea$color <- ifelse(dea$sig == "s" & dea$logFC > 0, names(color_pos), "n.s.")
    dea$color <- ifelse(dea$sig == "s" & dea$logFC < 0, names(color_neg), dea$color)
    
    color <- c(color_pos, "gray", "black", color_neg)
    names(color) <- c(names(color_pos), "n.s.", "black", names(color_neg))
    
    # Create labels based log2FC and adj.P.Val
    dea_pos <- dea[dea$logFC > 0 & dea$sig == "s", ]
    dea_neg <- dea[dea$logFC < 0 & dea$sig == "s", ]

    pos_labels_log2FC <- dea_pos[rev(order(dea_pos$logFC)), ][1:top_label, ] %>% rownames()
    neg_labels_log2FC <- dea_neg[order(dea_neg$logFC), ][1:top_label, ] %>% rownames()
    
    pos_labels_adj.P.Val <- dea_pos[order(dea_pos$adj.P.Val), ][1:top_label, ] %>% rownames()
    neg_labels_adj.P.Val <- dea_neg[order(dea_neg$adj.P.Val), ][1:top_label, ] %>% rownames()
    
    pos_labels <- c(pos_labels_log2FC, pos_labels_adj.P.Val)
    neg_labels <- c(neg_labels_log2FC, neg_labels_adj.P.Val)
    
    # Set labels 
    dea$label <- ifelse(rownames(dea) %in% c(pos_labels, neg_labels), rownames(dea), NA)

    # Plot
    vp <- ggplot(dea, aes(x=AveExpr, y=logFC, fill=dea$color, label=label), alpha=1) + 
    
        geom_point(size=4, shape=21, color="white") + 
        geom_hline(aes(yintercept=-log2_thr), linetype="dotted", colour="black") +
        geom_hline(aes(yintercept=+log2_thr), linetype="dotted", colour="black") +
        ggrepel::geom_text_repel(segment.color="black", force=20, force_pull=1, max.overlaps=getOption("ggrepel.max.overlaps", default=100), size=5, alpha=1, guide="none", segment.size=0.1, color='black') + 
        # ylim(-max(abs(dea$logFC)), max(abs(dea$logFC))) +  
        scale_y_continuous(limits=c(-2, +2)) + 
        ggtitle(title) + xlab("average expression") + ylab("average log2FC") + 
        scale_fill_manual(values=c(color_pos, color_neg), name="DEA") + 
    
        guides(
            
            color=guide_legend(order=1, title="Group", size=2, keywidth=0.75, keyheight=0.75), 
            alpha="none"
            
        ) + 
    
    theme(
        
        legend.position="right", 
        aspect.ratio=1
        
    ) +
    
    annotate("text", x=Inf, y=Inf, label=paste("n =", dea$n_cells_1[1]), hjust=1.1, vjust=1.5, size=6) +
    annotate("text", x=Inf, y=-Inf, label=paste("n =", dea$n_cells_2[1]), hjust=1.1, vjust=-0.5, size=6)
    
    return(vp)
    
}

In [None]:
gsea <- function(result, category="H", subcategory=NULL) {
    
    # Set mgi symbols
    result$mgi_symbol <- rownames(result)
    
    # Get gene set
    gene_set <- msigdbr::msigdbr(species="Mus musculus", category=category, subcategory=subcategory)
    gene_set <- split(gene_set, x=gene_set$gene_symbol, f=gene_set$gs_name)
    
    # Set gene names 
    result <- na.omit(result)
    
    # Make ranks 
    result$P.Value <- ifelse(result$P.Value==0, min(result$P.Value[result$P.Value>0]), result$P.Value)
    result$sign_log_adj_p_values <- -log10(result$P.Value) * sign(result$logFC)
    
    ranks <- result$sign_log_adj_p_values
    names(ranks) <- result$mgi_symbol
    ranks <- ranks[order(ranks)]
    ranks <- rev(ranks)
    
    # Retain only pathways that overlap with result lsit
    gene_set_filter <- lapply(gene_set, function(x) {sum(result$mgi_symbol %in% x)>=1})
    gene_set <- gene_set[unlist(gene_set_filter)]

    gsea <- fgsea::fgsea(
        
        pathways=gene_set,
        stats=ranks,
        nperm=100000, 
        minSize=5,
        maxSize=500
        
    )
    
    return(gsea)
    
}

In [None]:
gsea_plot <- function(gsea, adj_pval_thr=0.1, title=NULL, color=c(RColorBrewer::brewer.pal(8, "Set1")[1], RColorBrewer::brewer.pal(8, "Set1")[2]), color_names=c("Pos", "Neg"), size_range=5, pathway_suffix=NULL, top=20) {
    
    # Set GSEA data frame 
    gsea <- as.data.frame(gsea)
    gsea <- na.omit(gsea) 
    
    # Set color names 
    color <- setNames(color, color_names)
    
    # Fix pathway names
    if(!is.null(pathway_suffix)) {gsea$pathway <- gsub(pathway_suffix, "", gsea$pathway)}
    gsea$pathway <- gsub("_", " ", gsea$pathway)
    
    # Filter hits 
    gsea_up <- gsea[sign(gsea$NES)==+1, ]
    gsea_down <- gsea[sign(gsea$NES)==-1, ]
    
    gsea_up <- gsea_up[order(gsea_up$pval), ][1:top, ]
    gsea_down <- gsea_down[order(gsea_down$pval), ][1:top, ]
    
    gsea <- rbind(gsea_up, gsea_down)
    gsea <- na.omit(gsea)
    gsea <- distinct(gsea)
    
    # Add color 
    gsea$color <- ifelse(sign(gsea$ES)==1, names(color)[1], names(color)[2])
    gsea$color <- ifelse(gsea$padj<=adj_pval_thr, gsea$color, NA)

    gsea$sign_log_pval_values <- -log10(gsea$padj) * sign(gsea$ES)
    
    # Order hits 
    gsea <- gsea[rev(order(gsea$sign_log_pval_values)), ]
    gsea$pathway <- factor(gsea$pathway, levels=rev(gsea$pathway))

    x_max <- max(abs(gsea$sign_log_pval_values))
    if(x_max<abs(log10(adj_pval_thr))) {x_max <- abs(log10(adj_pval_thr))}
    x_max <- ceiling(ceiling(x_max))
    
    # Plot 
    plot <- ggplot(gsea, aes(x=sign_log_pval_values, y=pathway, color=color)) + 
        
        geom_vline(xintercept=-log10(adj_pval_thr), linetype="dashed") + 
        geom_vline(xintercept=log10(adj_pval_thr), linetype="dashed") +
    
        geom_point(aes(size=abs(NES))) +

        ggtitle(title) +
        xlab("Signed -log10 adj. p-value") + ylab("") + 
        scale_x_continuous(breaks=c(-x_max, 0, x_max), limits=c(-x_max, x_max)) +
        scale_color_manual(values=color, na.value="black", drop=FALSE) +
        scale_size(range=c(0, size_range)) + 
        guides(
            
            color=guide_legend(order=1, title="Agent", keywidth=0.75, keyheight=0.75, override.aes=list(size=4)), 
            size=guide_legend(order=2, title="Abs. (NES)", keywidth=0.75, keyheight=0.75)
            
        ) +
    
        theme(
            
            legend.position="bottom", 
            legend.justification="top", 
            axis.text.y=element_text(size=14, hjust=1, vjust=0.5, face="plain", margin=margin(t=0, r=2, b=0, l=0), color="black")
            
        ) 
    
    return(plot)
    
}

# Run DEA

In [None]:
# Set group variable
so$group <- so$sample_group

# Fit BSF model 

In [None]:
so_0 <- subset(so, subset=facility=="BSF")

In [None]:
# Prepare data 
so_0 <- SplitObject(so_0, split.by="celltype_low")

In [None]:
# Order 
celltype_order <- names(color$celltype_low)[names(color$celltype_low) %in% names(so_0)]

In [None]:
# Order cell types 
so_0 <- so_0[celltype_order]

In [None]:
# DEA voom fit 
if(!cache_fit) {

    fit <- lapply(so_0, function(x) limma_fit(x, pct_min=pct_min, cnt_min=cnt_min, cell_min=cell_min))
    saveRDS(fit, "result/dea/scRNAseq/limma/fit_0.rds")
    
} else {
    
    fit <- readRDS("result/dea/scRNAseq/limma/fit_0.rds")
    
}

# Bl6 NaCl D6 vs Bl6 CpG D6

In [None]:
contrasts_vec <- c("Bl6_CpG_D6", "Bl6_NaCl_D6")

In [None]:
res_1 <- lapply(fit, contrasts_fit, contrasts_vec=contrasts_vec)
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) vp(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, color_pos=unlist(color$infection["CpG"]), color_neg=unlist(color$infection["NaCl"])) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=6, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=6, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# Fit VBC model 

In [None]:
so_1 <- subset(so, subset=facility=="VBC")

In [None]:
# Prepare data 
so_1 <- SplitObject(so_1, split.by="celltype_low")

In [None]:
# Order 
celltype_order <- names(color$celltype_low)[names(color$celltype_low) %in% names(so_1)]

In [None]:
# Order cell types 
so_1 <- so_1[celltype_order]

In [None]:
# DEA voom fit 
if(!cache_fit) {

    fit <- lapply(so_1, function(x) limma_fit(x, pct_min=pct_min, cnt_min=cnt_min, cell_min=cell_min))
    saveRDS(fit, "result/dea/scRNAseq/limma/fit_1.rds")
    
} else {
    
    fit <- readRDS("result/dea/scRNAseq/limma/fit_1.rds")
    
}

# IFNAR fl/fl LysM cre baseline vs IFNAR fl/fl baseline 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_Baseline_D0", "IFNAR_fl_Baseline_D0")

In [None]:
res_1 <- lapply(fit, contrasts_fit, contrasts_vec=contrasts_vec)
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
res_1

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) vp(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, color_pos=unlist(color$genotype["IFNAR_fl_LysM_cre"]), color_neg=unlist(color$genotype["IFNAR_fl"])) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
vp_1

In [None]:
pdf(paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=6, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre D1 vs IFNAR fl/fl D1 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D1", "IFNAR_fl_CpG_D1")

In [None]:
res_1 <- lapply(fit, contrasts_fit, contrasts_vec=contrasts_vec)
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) vp(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, color_pos=unlist(color$genotype["IFNAR_fl_LysM_cre"]), color_neg=unlist(color$genotype["IFNAR_fl"])) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=6, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre D3 vs IFNAR fl/fl D3 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D3", "IFNAR_fl_CpG_D3")

In [None]:
res_1 <- lapply(fit, contrasts_fit, contrasts_vec=contrasts_vec)
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) vp(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, color_pos=unlist(color$genotype["IFNAR_fl_LysM_cre"]), color_neg=unlist(color$genotype["IFNAR_fl"])) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=6, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/limma/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# Session info 

In [None]:
sessionInfo()