# Marker gene DEA

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        library(presto), 
        
        # Data 
        library(tidyverse), 
        
        # Plot 
        library(ggplot2), 
        library(ggpubr), 
        
        # Python 
        library(reticulate)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Configure reticulate 
use_condaenv(condaenv='p.3.9.19-FD20200109SPLENO', conda="/nobackup/peer/fdeckert/miniconda3/bin/conda", required=NULL)
py_config()

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("bin/so_pl.R")
source("plotting_global.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(1)) # From project global source()

# Parameter settings

In [None]:
so_file <- "data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.rds"

# Import Seurat object

In [None]:
so <- readRDS(so_file)

In [None]:
options(repr.plot.width=10, repr.plot.height=10)

dplot(so, reduction="umap", group_by="leiden_cell_type_main", label=TRUE, label_box=TRUE) + scale_color_manual(values=color$leiden_cell_type_main) + scale_fill_manual(values=color$leiden_cell_type_main)

In [None]:
options(repr.plot.width=20, repr.plot.height=6)

p_1 <- ggplot(so@meta.data, aes(x=leiden_cell_type_main, fill=sample_group)) + geom_bar() + coord_flip() + theme(axis.text.y=element_text(vjust=0.5, hjust=1))
p_2 <- ggplot(so@meta.data, aes(x=leiden_cell_type_main, fill=infection)) + geom_bar() + coord_flip() + theme(axis.text.y=element_text(vjust=0.5, hjust=1))

p_1 + p_2

# Helper function 

In [None]:
wilcox_marker <- function(so, ident, assay="RNA", layer="data") {
    
    so <- NormalizeData(so, verbose=FALSE)
    
    data <- GetAssayData(so, assay=assay, layer=layer)
    ident <- so[[ident, drop=TRUE]]

    res <- presto::wilcoxauc(data, ident) 
    
    res <- res %>% 
        rename(
        
            gene=feature,
            avg_log2FC=logFC,
            p_val=pval,
            p_val_adj=padj,
            pct.1=pct_in,
            pct.2=pct_out
  
        )
    
    res$pct.1 <- res$pct.1/100
    res$pct.2 <- res$pct.2/100

    res <- split(res, f=res$group)
    
    return(res)
}

In [None]:
vp <- function(dea, log2_thr=1, adj_pval_thr=0.05, top_label=10, title=NULL, conserved=FALSE, color_pos=c("pos"="#0000ffff"), color_neg=c("neg"="#fd8008ff")) {
    
    # Set rownames to genes
    if("gene" %in% colnames(dea)) {rownames(dea) <- dea$gene}
    
    # Annotate entries significance by log2_thr and adj_pval_thr
    dea$p_val_adj <- ifelse(dea$p_val_adj == 0, min(dea$p_val_adj), dea$p_val_adj)
    dea$sig <- ifelse((abs(dea$avg_log2FC) >= log2_thr) & (dea$p_val_adj <= adj_pval_thr), "s", "ns")
    
    # Set color based on significance and direction of dea e.g. positive and negative 
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC > 0, names(color_pos), "n.s.")
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC < 0, names(color_neg), dea$color)
    
    color <- c(color_pos, "gray", "black", color_neg)
    names(color) <- c(names(color_pos), "n.s.", "black", names(color_neg))
    
    # Create labels based log2FC and p_val_adj
    dea_pos <- dea[dea$avg_log2FC > 0 & dea$sig == "s", ]
    dea_neg <- dea[dea$avg_log2FC < 0 & dea$sig == "s", ]

    pos_labels_log2FC <- dea_pos[rev(order(dea_pos$avg_log2FC)), ][1:top_label, ] %>% rownames()
    neg_labels_log2FC <- dea_neg[order(dea_neg$avg_log2FC), ][1:top_label, ] %>% rownames()
    
    pos_labels_p_val_adj <- dea_pos[order(dea_pos$p_val_adj), ][1:top_label, ] %>% rownames()
    neg_labels_p_val_adj <- dea_neg[order(dea_neg$p_val_adj), ][1:top_label, ] %>% rownames()
    
    pos_labels <- c(pos_labels_log2FC, pos_labels_p_val_adj)
    neg_labels <- c(neg_labels_log2FC, neg_labels_p_val_adj)
    
    # Set labels 
    dea$label <- ifelse(rownames(dea) %in% c(pos_labels, neg_labels), rownames(dea), NA)

    # Plot
    volcano_plot <- ggplot(dea, aes(x=pct.1, y=avg_log2FC, fill=color, label=label), alpha=1) + 
    
        geom_point(size=4, shape=21, color="white") + 
        geom_hline(aes(yintercept=0), linetype="dotted", colour="black") +
        ggrepel::geom_text_repel(segment.color="black", force=20, force_pull=1, max.overlaps=getOption("ggrepel.max.overlaps", default=100), size=5, alpha=1, segment.size=0.1, color='black') + 
        ylim(-max(abs(dea$avg_log2FC)), max(abs(dea$avg_log2FC))) +  
        xlim(0, 1) + 
        ggtitle(title) + xlab("expression reference [ratio]") + ylab("average log2FC") + 
        scale_fill_manual(values=color, name="DEA") + 
    
        guides(
            
            color=guide_legend(order=1, title="Group", size=2, keywidth=0.75, keyheight=0.75), 
            alpha="none"
            
        ) + 
    
    theme(
        
        legend.position="right", 
        aspect.ratio=1
        
    )
    
    return(volcano_plot)
    
}

In [None]:
so@meta.data <- droplevels(so@meta.data)

In [None]:
res <- wilcox_marker(so, "leiden_cell_type_main", assay="RNA", layer="data")
res <- res[levels(so$leiden_cell_type_main)]

In [None]:
options(repr.plot.width=4*8, repr.plot.height=8)

vp_1 <- lapply(names(res), function(i) vp(res[[i]], title=i, log2_thr=0, adj_pval_thr=0.01, top_label = 20) + theme_global_set(size_select=1))
ggpubr::ggarrange(plotlist=vp_1, ncol=4, common.legend=TRUE, legend="bottom") %>% print()

# Session info

In [None]:
sessionInfo()