# Wilcox - Differential expression analysis (DEA)

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        library(SeuratWrappers), 
    
        # IHW 
        library(IHW), 
        
        # Data 
        library(tidyverse), 
        library(openxlsx), 
        
        # Plotting 
        library(ComplexHeatmap), 
        library(circlize), 
        library(viridis), 
        library(ggplotify), 
        library(patchwork), 
        
        # Python 
        library(reticulate)
        
    )
)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.10.16-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
options(warn=-1)
options(dplyr.summarise.inform=FALSE)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/dea_pl.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter

In [None]:
pct_min=5

cnt_min=3
cell_min=3

adj_pval_thr <- 0.01
log2_thr <- 0.50

adj_pval_gsea_thr <- 0.1

# Import data

In [None]:
# Load Seurat object 
so <- readRDS("data/scRNAseq/object/pp.rds")

# Helper functions

In [None]:
feature_select <- function(so, pct_min=0, cnt_min=3, cell_min=1) {

    # Get count matrix 
    cnt <- GetAssayData(so, assay="RNA", layer="counts")

    # Filter by percentage 
    cnt <- cnt[(100*rowSums(cnt>0)/ncol(cnt))>=pct_min, ]

    # Filer by expression 
    cnt <- cnt[rowSums(cnt>=cnt_min)>=cell_min, ]

    # return genes 
    genes <- rownames(cnt)
        
    return(genes)
    
}

In [None]:
wilcox <- function(so, ident, ident_1=NULL, ident_2=NULL, only_pos=FALSE, avg_log2FC_threshold=0, pct_min=0, cnt_min=0, cell_min=0, test_use="wilcox", verbose=TRUE) {

    # Select genes per group 
    genes_ident_1 <- feature_select(so[, so$group==ident_1], pct_min=pct_min, cnt_min=cnt_min, cell_min=cell_min)
    genes_ident_2 <- feature_select(so[, so$group==ident_2], pct_min=pct_min, cnt_min=cnt_min, cell_min=cell_min)

    genes <- union(genes_ident_1, genes_ident_2)

    genes <- genes[!genes %in% c("Igha", "Igkc", "Tcrg-C1")]

    if(verbose) message(paste0(so$celltype_low[1], " genes selected: ", length(genes)))

    so <- so[genes, ]
    
    # Drop empty levels 
    so@meta.data <- droplevels(so@meta.data)
    
    # Check number of cells 
    n_cells_1 <- sum(so@meta.data[[ident]]==ident_1)
    n_cells_2 <- sum(so@meta.data[[ident]]==ident_2)

    check_1 <- n_cells_1 >= 3
    check_2 <- n_cells_2 >= 3
    
    if(check_1 & check_2) {
        
        so <- SetIdent(so, value=ident)
        so <- NormalizeData(so)
        res <- RunPresto(so, ident.1=ident_1, ident.2=ident_2, logfc.threshold=avg_log2FC_threshold, min.pct=0, only.pos=only_pos, test.use=test_use)

        # Adjusted p-value with IHW 
        res$mean_exp <- rowMeans(GetAssayData(so, assay="RNA", slot="data")[rownames(so), ])
        res$p_val_adj <- IHW::adj_pvalues(IHW::ihw(res$p_val ~ res$mean_exp, alpha=0.05))

        # Annotate results 
        res$gene <- rownames(res)

        # N cells per group 
        res$n_cells_1 <- n_cells_1
        res$n_cells_2 <- n_cells_2

        return(res)
        
    } else {
        
        return(NULL)
        
    }

}

# Run DEA

In [None]:
# Set group variable
so$group <- so$sample_group

In [None]:
celltype_order <- levels(so$celltype_low)

In [None]:
# Prepare data 
so <- SplitObject(so, split.by="celltype_low")

In [None]:
# Order cell types 
so <- so[celltype_order]

# Bl6 NaCl D6 vs Bl6 CpG D6

In [None]:
contrasts_vec <- c("Bl6_CpG_D6", "Bl6_NaCl_D6")

In [None]:
so_1 <- lapply(so, function(x) subset(x, subset=group %in% contrasts_vec))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,  
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$infection["CpG"]), color_neg=unlist(color$infection["NaCl"]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=6, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl baseline vs IFNAR fl/fl CpG D1 

In [None]:
contrasts_vec <- c("IFNAR_fl_CpG_D1", "IFNAR_fl_Baseline_D0")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,  
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl baseline vs IFNAR fl/fl CpG D3

In [None]:
contrasts_vec <- c("IFNAR_fl_CpG_D3", "IFNAR_fl_Baseline_D0")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,   
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre baseline vs IFNAR fl/fl LysM cre CpG D1 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D1", "IFNAR_fl_LysM_cre_Baseline_D0")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,  
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre baseline vs IFNAR fl/fl LysM cre CpG D3

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D3", "IFNAR_fl_LysM_cre_Baseline_D0")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,   
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre baseline vs IFNAR fl/fl baseline 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_Baseline_D0", "IFNAR_fl_Baseline_D0")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,  
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre D1 vs IFNAR fl/fl D1 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D1", "IFNAR_fl_CpG_D1")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,   
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# IFNAR fl/fl LysM cre D3 vs IFNAR fl/fl D3 

In [None]:
contrasts_vec <- c("IFNAR_fl_LysM_cre_CpG_D3", "IFNAR_fl_CpG_D3")

In [None]:
so_1 <- lapply(so, function(x) tryCatch({subset(x, subset=group %in% contrasts_vec)}, error=function(e) {NULL}))
so_1 <- so_1[!lapply(so_1, is.null) %>% unlist()]

In [None]:
res_1 <- lapply(so_1, function(so) {wilcox(
    
    so=so, 
    ident="group", 
    ident_1=contrasts_vec[1], 
    ident_2=contrasts_vec[2], 
    only_pos=FALSE, 
    avg_log2FC_threshold=0, 
    pct_min=pct_min, 
    cnt_min=cnt_min, 
    cell_min=cell_min,  
    test_use="wilcox"
    
)
                                  }
               )

In [None]:
res_1 <- res_1[!lapply(res_1, is.null) %>% unlist()]

In [None]:
options(repr.plot.width=6*6, repr.plot.height=6)

vp_1 <- lapply(names(res_1), function(i) v_pl(res_1[[i]], title=i, log2_thr=log2_thr, adj_pval_thr=adj_pval_thr, point_size=2, color_pos=unlist(color$sample_group[contrasts_vec[1]]), color_neg=unlist(color$sample_group[contrasts_vec[2]]), top_label=20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

In [None]:
pdf(paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".pdf"), onefile=TRUE, width=12, height=6)
for (i in seq(length(vp_1))) {plot(vp_1[[i]])}
dev.off()

In [None]:
names(res_1) <- make.names(names(res_1))
openxlsx::write.xlsx(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".xlsx"))
saveRDS(res_1, paste0("result/dea/scRNAseq/wilcox/sample_group/", contrasts_vec[1], "_vs_", contrasts_vec[2], ".rds"))

# Session info 

In [None]:
sessionInfo()