# Lamian lineage marker genes and DEA along pseudotime 

**Lamian**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.07.10.451910v1.full)  
[GitHub](https://github.com/Winnie09/Lamian)  
[Tutorial](https://winnie09.github.io/Wenpin_Hou/pages/Lamian.html)  

**Condiment**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.03.09.433671v1.full)  
[GitHub](https://github.com/HectorRDB/condiments)  
[Tutorial](https://hectorrdb.github.io/condimentsPaper/)  

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Condiment
        library(condiments), 
        library(tradeSeq), 
        library(SingleCellExperiment), 
        # library(clusterExperiment), 
        
        # GO 
        library(msigdbr), 
        library(fgsea), 
        
        # Data 
        library(tidyverse), 
        library(openxlsx), 
        
        # Plotting 
        library(ComplexHeatmap), 
        library(circlize), 
        library(viridis), 
        library(ggplotify), 
        library(ComplexUpset), 
        
        # Prallel 
        library(BiocParallel), 
        
        # Python 
        library(reticulate)
        
    )
    
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
options(future.globals.maxSize=5000*1024^2)

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/tradeseq.R")
source("bin/seurat_qc.R")
source("bin/seurat_dea.R")
source("bin/cell_type.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter settings and data import 

In [None]:
# Parallel computing 
workers <- future::availableCores()
print(workers)

In [None]:
# Seurat object 
so <- readRDS("data/object/pp.rds")

# CellRank absorption probabilities 
absorption_probabilities_prog_nacl <- read.csv("result/cellrank/absorption_probabilities_prog_nacl.csv", row.names=1, check.names=FALSE)
absorption_probabilities_prog_cpg <- read.csv("result/cellrank/absorption_probabilities_prog_cpg.csv", row.names=1,check.names=FALSE)

# Prepare data  

In [None]:
cell_type_prog=c(
    
    "MastP",
    "MegP", 
    "MEP (1)", 
    "MEP (2)", 
    "MEP (3)",
    "MEP (4)", 
    "ProEB (1)",
    "ProEB (2)",
    "ProEB (3)",
    "ProEB (4)",
    "EB (1)",
    "EB (2)",
    "EB (3)", 
    "EB (4)",
    "EB (5)"
    
)

In [None]:
so_prog <- subset(so, subset=cell_type_fine %in% cell_type_prog)
so_prog$treatment <- factor(so_prog$treatment, levels=names(color$treatment))
so_prog$cell_type_fine <- factor(so_prog$cell_type_fine, levels=cell_type_fine_order)

In [None]:
so_prog <- AddMetaData(so_prog, rbind(absorption_probabilities_prog_nacl, absorption_probabilities_prog_cpg))

In [None]:
so_prog_nacl <- subset(so_prog, subset=treatment=="NaCl")
so_prog_cpg <- subset(so_prog, subset=treatment=="CpG")

# Absorption probability box plots

In [None]:
options(repr.plot.width=15, repr.plot.height=5)

data <- select(so_prog@meta.data, treatment, cell_type_fine, MastP, MegP, EB..5.) %>% 
    reshape2::melt(id_vars=c("treatment", "cell_type_fine")) %>% as.data.frame() %>% 
    dplyr::mutate(cell_type_fine=factor(cell_type_fine, levels=cell_type_prog)) %>% 
    dplyr::mutate(treatment=factor(treatment, levels=c("NaCl", "CpG")))

plot <- ggplot(data, aes(x=cell_type_fine, y=value, color=cell_type_fine, fill=cell_type_fine)) + 
    geom_boxplot(outlier.size=0.5) + 
    facet_grid(treatment~variable) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    scale_fill_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

plot

# DEA helper function 

In [None]:
voom_lm_fit <- function(grouping_var_i, so, grouping_var, pseudobatch_var, sample_weights) {
    
    # Get relevant genes for grouping_var 
    cnt <- GetAssayData(so, assay="RNA", slot="counts")
    cnt <- cnt[, so[[grouping_var, drop=TRUE]]==grouping_var_i]
    cnt <- cnt[rowSums(cnt>3)>3, ]
    genes_i <- rownames(cnt)
    
    # Get Counts 
    cnt <- GetAssayData(so, assay="RNA", slot="counts")
    
    # Subset counts 
    cnt <- cnt[genes_i, ]
    
    # Prepare count data for split 
    cnt <- t(as.matrix(cnt))
    cnt <- as.data.frame(cnt)
        
    # Make pseudobulks by suming single cell counts
    cnt <- split(cnt, f=paste0(so[[grouping_var, drop=TRUE]], "|", so[[pseudobatch_var, drop=TRUE]]))
    cnt <- lapply(names(cnt), function(i) {x <- data.frame(counts=colSums(cnt[[i]])); colnames(x) <- i; return(x)})
    cnt <- do.call(cbind, cnt)
        
    # Get grouping variables from cnt matrix
    grouping_var_vec <- sapply(strsplit(colnames(cnt), "\\|"), `[[`, 1) # Used for design matrix
    pseudobatch_var_vec <- sapply(strsplit(colnames(cnt), "\\|"), `[[`, 2) # Used for design matrix
    
    # Design matrix 
    grouping_var_vec <- as.factor(grouping_var_vec)
    grouping_var_vec <- relevel(grouping_var_vec, ref=grouping_var_i)
    pseudobatch_var_vec <- as.character(pseudobatch_var_vec)
    design <- model.matrix(~0+grouping_var_vec)
    
    suppressMessages(
        
        fit <- edgeR::voomLmFit(

            counts=cnt, 
            design=design, 
            sample.weights=sample_weights, 
            var.design=design

        )
        
    )
    
    return(fit)
    
}

In [None]:
ebayes <- function(fit) {
    
    # Get grouping var and design from fit
    design <- fit$design
    
    # Define result list
    result <- list()
    
    # Grouping var vec 
    grouping_var_vec <- grep("grouping_var_vec", colnames(design), value=TRUE)
    grouping_var_ref <- grouping_var_vec[1]
    grouping_var_qry <- grouping_var_vec[-1]
    
    # Contrast fit
    contrasts_vec <- paste0(grouping_var_ref, "-(", paste0(grouping_var_qry, collapse="+"), ")/", length(grouping_var_qry))
    contrasts <- limma::makeContrasts(contrasts=contrasts_vec, levels=colnames(design))
    contrasts_fit <- limma::contrasts.fit(fit, contrasts=contrasts)

    # eBayes fit 
    efit <- limma::eBayes(contrasts_fit)

    # Get result table
    result_df <- limma::topTable(efit, sort.by="P", n=Inf, p.value=1, lfc=0, coef=1)

    # Convert results to Seurat format 
    colnames(result_df)[1] <- "avg_log2FC"
    colnames(result_df)[4] <- "p_value"
    colnames(result_df)[5] <- "p_val_adj"

    return(result_df)
    
}

In [None]:
dea_vp <- function(dea, log2fc_thr=1, p_adj_thr=0.05, top_label=10, title=NULL, color_neg=RColorBrewer::brewer.pal(8, "Set1")[1], color_pos=RColorBrewer::brewer.pal(8, "Set1")[2]) {

    # Set rownames to genes
    if("gene" %in% colnames(dea)) {rownames(dea) <- dea$gene}
    
    # Annotate entries significance by log2fc_thr and p_adj_thr
    dea$p_val_adj <- ifelse(dea$p_val_adj == 0, .Machine$double.xmin, dea$p_val_adj)
    dea$sig <- ifelse(abs(dea$avg_log2FC) >= log2fc_thr & -log10(dea$p_val_adj) >= -log10(p_adj_thr), "s", "ns")
    
    # Set color based on significance and direction of dea e.g. positive and negative 
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC > 0, "s_pos", "ns")
    dea$color <- ifelse(dea$sig == "s" & dea$avg_log2FC < 0, "s_neg", dea$color)
    
    color <- c(color_neg, "gray", "black", color_pos)
    names(color) <- c("s_pos", "ns", "black", "s_neg")
    
    # Create labels based log2FC and p_val_adj
    dea_pos <- dea[dea$avg_log2FC > 0 & dea$sig == "s", ]
    dea_neg <- dea[dea$avg_log2FC < 0 & dea$sig == "s", ]

    pos_labels_log2FC <- dea_pos[rev(order(dea_pos$avg_log2FC)), ][1:top_label, ] %>% rownames()
    neg_labels_log2FC <- dea_neg[order(dea_neg$avg_log2FC), ][1:top_label, ] %>% rownames()
    
    pos_labels_p_val_adj <- dea_pos[order(dea_pos$p_val_adj), ][1:top_label, ] %>% rownames()
    neg_labels_p_val_adj <- dea_neg[order(dea_neg$p_val_adj), ][1:top_label, ] %>% rownames()
    
    pos_labels <- c(pos_labels_log2FC, pos_labels_p_val_adj)
    neg_labels <- c(neg_labels_log2FC, neg_labels_p_val_adj)
    
    # Set labels 
    dea$label <- ifelse(rownames(dea) %in% c(pos_labels, neg_labels), rownames(dea), NA)

    # Plot
    vp <- ggplot(dea, aes(x=avg_log2FC, y=-log10(p_val_adj), fill=dea$color, label=label), alpha=1) + 
    
        geom_point(size=4, shape=21, color="white") + 
        geom_vline(aes(xintercept=log2fc_thr), linetype="dotted", colour="black") +
        geom_vline(aes(xintercept=-log2fc_thr), linetype="dotted", colour="black") +
        geom_hline(aes(yintercept=-log10(p_adj_thr)), linetype="dotted", colour="black") +
        ggrepel::geom_text_repel(segment.color="black", force=20, force_pull=1, max.overlaps=getOption("ggrepel.max.overlaps", default=100), size=5, alpha=1, guide="none", segment.size=0.1, color="black") + 
        xlim(-max(abs(dea$avg_log2FC)), max(abs(dea$avg_log2FC))) +  
        ylim(0, max(-log10(dea$p_val_adj))+1) + 
        ggtitle(title) + xlab("average log2FC") + ylab("-log10(adj. p-value)") + 
        scale_fill_manual(values=color) + 
    
        guides(
            
            color=guide_legend(order=1, title="Group", size=2, keywidth=0.75, keyheight=0.75), 
            alpha="none"
            
        ) + 
    
        theme(

            legend.position="none", 
            aspect.ratio=1

        )
    
    return(vp)
    
}

# Endpoint DEA

In [None]:
grouping_var="cell_type_fine"
pseudobatch_var="sample_name"

p_adj_thr=0.05
sample_weights=FALSE

In [None]:
so_prog_ep <- subset(so_prog, subset=cell_type_fine %in% c("MastP", "MegP", "EB (5)"))

In [None]:
# Store original names for plotting 
so_prog_ep$grouping_var_set <- so_prog_ep[[grouping_var, drop=TRUE]]

In [None]:
# Set names 
so_prog_ep[[grouping_var]] <- make.names(so_prog_ep[[grouping_var, drop=TRUE]])

In [None]:
# DEA voom fit 
fit <- lapply(unique(so_prog_ep[[grouping_var, drop=TRUE]]), voom_lm_fit, so=so_prog_ep, grouping_var=grouping_var, pseudobatch_var=pseudobatch_var, sample_weights=sample_weights)

In [None]:
# Get results 
results <- lapply(fit, ebayes)

In [None]:
# Set original grouping names 
rename_df <- distinct(so_prog_ep@meta.data[, c(grouping_var, "grouping_var_set")])
rename_df[1] <- paste0("grouping_var_vec", rename_df[[1]])
rename_df <- rename_df[order(rename_df$grouping_var_set), ]

In [None]:
names(results) <- paste0("grouping_var_vec", unique(so_prog_ep[[grouping_var, drop=TRUE]]))
results <- results[rename_df[[1]]]
names(results) <- rename_df[[2]]

In [None]:
mat <- GetAssayData(so_prog_ep, assay="RNA", slot="counts")
mat_order <- so_prog_ep$grouping_var_set

In [None]:
results <- lapply(names(results), function(i, mat, mat_order) {
    
    x <- results[[i]]
    mat <- mat[rownames(x), ]
    mat <- mat>0
    x$pct_1<-rowSums(mat[, mat_order==i])/sum(mat_order==i)
    x$pct_2<-rowSums(mat[, mat_order!=i])/sum(mat_order!=i)

    return(x)

}, mat=mat, mat_order=mat_order)

In [None]:
names(results) <- rename_df[[2]]
results <- results[c("MastP", "MegP", "EB (5)")]

In [None]:
options(repr.plot.width=8*3, repr.plot.height=8*1)

vp <- lapply(names(results), function(i) {dea_vp(results[[i]], title=i, log2fc_thr=0, p_adj_thr=p_adj_thr)})
wrap_plots(vp, ncol=3, nrow=1)

# Start point DEA

In [None]:
grouping_var="cell_type_fine"
pseudobatch_var="sample_name"

p_adj_thr=0.05
sample_weights=FALSE

In [None]:
so_prog_ep <- subset(so_prog, subset=cell_type_fine %in% c("MEP (1)", "MEP (2)", "MEP (3)", "MEP (4)"))

In [None]:
# Store original names for plotting 
so_prog_ep$grouping_var_set <- so_prog_ep[[grouping_var, drop=TRUE]]

In [None]:
# Set names 
so_prog_ep[[grouping_var]] <- make.names(so_prog_ep[[grouping_var, drop=TRUE]])

In [None]:
# DEA voom fit 
fit <- lapply(unique(so_prog_ep[[grouping_var, drop=TRUE]]), voom_lm_fit, so=so_prog_ep, grouping_var=grouping_var, pseudobatch_var=pseudobatch_var, sample_weights=sample_weights)

In [None]:
# Get results 
results <- lapply(fit, ebayes)

In [None]:
# Set original grouping names 
rename_df <- distinct(so_prog_ep@meta.data[, c(grouping_var, "grouping_var_set")])
rename_df[1] <- paste0("grouping_var_vec", rename_df[[1]])
rename_df <- rename_df[order(rename_df$grouping_var_set), ]

In [None]:
names(results) <- paste0("grouping_var_vec", unique(so_prog_ep[[grouping_var, drop=TRUE]]))
results <- results[rename_df[[1]]]
names(results) <- rename_df[[2]]

In [None]:
mat <- GetAssayData(so_prog_ep, assay="RNA", slot="counts")
mat_order <- so_prog_ep$grouping_var_set

In [None]:
results <- lapply(names(results), function(i, mat, mat_order) {
    
    x <- results[[i]]
    mat <- mat[rownames(x), ]
    mat <- mat>0
    x$pct_1<-rowSums(mat[, mat_order==i])/sum(mat_order==i)
    x$pct_2<-rowSums(mat[, mat_order!=i])/sum(mat_order!=i)

    return(x)

}, mat=mat, mat_order=mat_order)

In [None]:
names(results) <- rename_df[[2]]
results <- results[c("MEP (1)", "MEP (2)", "MEP (3)", "MEP (4)")]

In [None]:
options(repr.plot.width=8*4, repr.plot.height=8*1)

vp <- lapply(names(results), function(i) {dea_vp(results[[i]], title=i, log2fc_thr=0, p_adj_thr=p_adj_thr)})
wrap_plots(vp, ncol=4, nrow=1)

# Condiment + Diffusion pseudotime 

## Prepare data

In [None]:
options(repr.plot.width=25, repr.plot.height=5)

so_eb <- suppressMessages(import_cellrank(so_prog, suffix="_eb", absorption_probability_col="EB..5."))

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

pdp_1 <- pseudotime_density_plot(so_eb, pseudotime="dpt_pseudotime") 
pdp_2 <- pseudotime_density_plot(so_eb, pseudotime="dpt_pseudotime_treatment")

pdp_1 + pdp_2

## Scale pseudotime for each treatment group

In [None]:
dpt_pseudotime_nacl <- subset(so_eb, subset=treatment=="NaCl")@meta.data["dpt_pseudotime"]
dpt_pseudotime_nacl$dpt_pseudotime <- scales::rescale(dpt_pseudotime_nacl$dpt_pseudotime)

dpt_pseudotime_cpg <- subset(so_eb, subset=treatment=="CpG")@meta.data["dpt_pseudotime"]
dpt_pseudotime_cpg$dpt_pseudotime <- scales::rescale(dpt_pseudotime_cpg$dpt_pseudotime)

dpt_pseudotime <- rbind(dpt_pseudotime_nacl, dpt_pseudotime_cpg)

In [None]:
summary(subset(so_eb, subset=treatment=="NaCl")$dpt_pseudotime)
summary(subset(so_eb, subset=treatment=="CpG")$dpt_pseudotime)

In [None]:
so_eb$dpt_pseudotime <- NULL
so_eb <- AddMetaData(so_eb, dpt_pseudotime)

In [None]:
summary(subset(so_eb, subset=treatment=="NaCl")$dpt_pseudotime)
summary(subset(so_eb, subset=treatment=="CpG")$dpt_pseudotime)

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

pdp_1 <- pseudotime_density_plot(so_eb, pseudotime="dpt_pseudotime") 
pdp_2 <- pseudotime_density_plot(so_eb, pseudotime="dpt_pseudotime_treatment")

pdp_1 + pdp_2

# TradeSeq workflow

In [None]:
cnt_nacl <- GetAssayData(subset(so_eb, subset=treatment=="NaCl"), assay="RNA", slot="counts")
genes_nacl <- rownames(cnt_nacl[rowSums(cnt_nacl>=3)>5, ])

In [None]:
cnt_cpg <- GetAssayData(subset(so_eb, subset=treatment=="CpG"), assay="RNA", slot="counts")
genes_cpg <- rownames(cnt_cpg[rowSums(cnt_cpg>=3)>5, ])

In [None]:
genes <- unique(c(genes_nacl, genes_cpg))
genes <- genes[!genes=="Igkc"]

In [None]:
tradeseq_eb <- tradeseq_workflow(so_eb, so_eb$dpt_pseudotime, conditions=factor(so_eb$treatment, levels=c("NaCl", "CpG")), genes=genes, suffix="_eb", log2_thr=0, nknots=7, workers=workers, compute=FALSE)

## Evaluate K

In [None]:
options(repr.plot.width=15, repr.plot.height=5)

plot_evalutateK_results(tradeseq_eb[["evaluate_k"]], k=NULL, aicDiff=2)

## Pseudotime associated genes (PTAG)

In [None]:
association_eb <- tradeseq_eb[["association"]]

colnames(association_eb) <- c("wald_stat", "df", "pval", "wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg", "mean_logfc")
association_eb <- association_eb[, c("wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg")]

association_eb$pval_nacl <- ifelse(association_eb$pval_nacl==0, min(na.omit(association_eb$pval_nacl[association_eb$pval_nacl>0])), association_eb$pval_nacl)
association_eb$pval_cpg <- ifelse(association_eb$pval_cpg==0, min(na.omit(association_eb$pval_cpg[association_eb$pval_cpg>0])), association_eb$pval_cpg)

association_eb$padj_nacl <- p.adjust(association_eb$pval_nacl, "fdr")
association_eb$padj_cpg <- p.adjust(association_eb$pval_cpg, "fdr")

association_eb$padj_nacl <- ifelse(association_eb$padj_nacl==0, min(na.omit(association_eb$padj_nacl[association_eb$padj_nacl>0])), association_eb$padj_nacl)
association_eb$padj_cpg <- ifelse(association_eb$padj_cpg==0, min(na.omit(association_eb$padj_cpg[association_eb$padj_cpg>0])), association_eb$padj_cpg)

In [None]:
association_eb <- association_eb[!(is.na(association_eb$wald_stat_nacl) & is.na(association_eb$wald_stat_cpg)), ]
association_eb <- association_eb[(association_eb$wald_stat_nacl>0 | association_eb$wald_stat_cpg>0), ]

In [None]:
association_eb_nacl <- association_eb[association_eb$wald_stat_nacl > 0 & is.na(association_eb$wald_stat_nacl)==FALSE, ]
association_eb_cpg <- association_eb[association_eb$wald_stat_cpg > 0 & is.na(association_eb$wald_stat_cpg)==FALSE, ]

In [None]:
association_eb_nacl$p_value <- 1-stats::pchisq(association_eb_nacl$wald_stat_nacl, df=association_eb_nacl$df_nacl, lower.tail=TRUE, log.p=FALSE)
association_eb_nacl$fdr <- p.adjust(association_eb_nacl$p_value, method="fdr")

association_eb_cpg$p_value <- 1-stats::pchisq(association_eb_cpg$wald_stat_cpg, df=association_eb_cpg$df_cpg, lower.tail=TRUE, log.p=FALSE)
association_eb_cpg$fdr <- p.adjust(association_eb_cpg$p_value, method="fdr")

In [None]:
genes_ptag_nacl <- rownames(association_eb_nacl[association_eb_nacl$fdr <= 0.001, ])
genes_ptag_cpg <- rownames(association_eb_cpg[association_eb_cpg$fdr <= 0.001, ])

In [None]:
association_eb$ptag_nacl <- ifelse(rownames(association_eb) %in% genes_ptag_nacl, TRUE, FALSE)
association_eb$ptag_cpg <- ifelse(rownames(association_eb) %in% genes_ptag_cpg, TRUE, FALSE)

In [None]:
options(repr.plot.width=10, repr.plot.height=5)

mean_nacl <- mean(log(association_eb_nacl$wald_stat_nacl))
sd_nacl <- sd(log(association_eb_nacl$wald_stat_nacl))

p_1 <- ggplot(association_eb[association_eb$wald_stat_nacl > 0, ], aes(x=log(wald_stat_nacl))) + 
    geom_histogram(bins=25) + 
    geom_vline(xintercept=mean_nacl, color="red", size=1) + 
    geom_vline(xintercept=mean_nacl-sd_nacl, color="red", size=1, linetype="longdash") + 
    geom_vline(xintercept=mean_nacl+sd_nacl, color="red", size=1, linetype="longdash") + 
    ggtitle("Wald stat (NaCl)") + 
    theme(aspect.ratio=1)

mean_cpg <- mean(log(association_eb_cpg$wald_stat_cpg))
sd_cpg <- sd(log(association_eb_cpg$wald_stat_cpg))

p_2 <- ggplot(association_eb[association_eb$wald_stat_cpg > 0, ], aes(x=log(wald_stat_cpg))) + 
    geom_histogram(bins=25) + 
    geom_vline(xintercept=mean_cpg, color="red", size=1) + 
    geom_vline(xintercept=mean_cpg-sd_cpg, color="red", size=1, linetype="longdash") + 
    geom_vline(xintercept=mean_cpg+sd_cpg, color="red", size=1, linetype="longdash") + 
    ggtitle("Wald stat (CpG)") + 
    theme(aspect.ratio=1)

p_1 + p_2

In [None]:
options(repr.plot.width=10, repr.plot.height=5)

p_1 <- ggplot(association_eb, aes(x=-log10(padj_nacl), y=log(wald_stat_nacl), color=ptag_nacl)) + 
    geom_point(size=2) + 
    ggtitle("Wald stat vs padj (NaCl)") + 
    scale_color_manual(values=c("gray", "black")) + 
    theme(aspect.ratio=1, legend.position="none")

p_2 <- ggplot(association_eb, aes(x=-log10(padj_cpg), y=log(wald_stat_cpg), color=ptag_cpg)) + 
    geom_point(size=2) + 
    ggtitle("Wald stat vs padj (CpG)") + 
    scale_color_manual(values=c("gray", "black")) + 
    theme(aspect.ratio=1, legend.position="none")

p_1 + p_2

In [None]:
association_eb <- association_eb[(association_eb$ptag_nacl | association_eb$ptag_cpg), ]

## Pseudotime condition genes (PTCG)

In [None]:
condition_eb <- conditionTest(tradeseq_eb[["fitgam"]], l2fc=log2(1))
condition_eb <- condition_eb[rownames(condition_eb) %in% rownames(association_eb), ]

colnames(condition_eb) <- c("wald_stat_ptcg", "df_ptcg", "pval_ptcg")

condition_eb$pval_ptcg <- ifelse(condition_eb$pval_ptcg==0, min(na.omit(condition_eb$pval_ptcg[condition_eb$pval_ptcg>0])), condition_eb$pval_ptcg)
condition_eb$padj_ptcg <- p.adjust(condition_eb$pval_ptcg, "fdr")
condition_eb$padj_ptcg <- ifelse(condition_eb$padj_ptcg==0, min(na.omit(condition_eb$padj_ptcg[condition_eb$padj_ptcg>0])), condition_eb$padj_ptcg)

condition_eb$ptcg <- ifelse(condition_eb$padj_ptcg<=0.001, TRUE, FALSE)
condition_eb$ptcg <- ifelse(is.na(condition_eb$ptcg), FALSE, condition_eb$ptcg)

In [None]:
options(repr.plot.width=5, repr.plot.height=5)

p_1 <- ggplot(condition_eb, aes(x=-log10(padj_ptcg), y=log10(wald_stat_ptcg), color=ptcg)) + 
    geom_point(size=2) + 
    ggtitle("Condition genes") + 
    scale_color_manual(values=c("gray", "black")) + 
    theme(
        aspect.ratio=1, 
        legend.position="none"
    )

p_1

In [None]:
association_eb$ptcg <- ifelse(rownames(association_eb) %in% rownames(condition_eb[condition_eb$ptcg, ]), TRUE, FALSE)

## Combine and save PTAG/PTCG results 

In [None]:
association_eb <- cbind(condition_eb[, c("wald_stat_ptcg", "df_ptcg", "pval_ptcg", "padj_ptcg")], association_eb)
saveRDS(association_eb, "result/tradeseq/ptg_eb.rds")

In [None]:
options(repr.plot.width=5, repr.plot.height=5)

upset(data=association_eb, intersect=c("ptag_nacl", "ptag_cpg", "ptcg"), width_ratio=0.1, sort_sets=FALSE)

## Smoothed gene expression 

### Marker genes

In [None]:
genes <- c("Kit", "Cd47", "Gata1", "Gata2", "Epor", "Tfrc", "Klf1", "Tal1", "Stat1", "Stat5a", "Rb1")

In [None]:
options(repr.plot.width=10*3, repr.plot.height=ceiling(length(genes)/10)*3)

p <- lapply(genes, function(i) plot_smooth(tradeseq_eb[["fitgam"]], i, point=FALSE) + ggtitle(i) + theme(legend.position="none"))
wrap_plots(p, ncol=10, nrow=ceiling(length(genes)/10))

### Transcription factor

In [None]:
genes <- read.delim("data/annotation/animaltfdb3/Mus_musculus_TF.txt")$Symbol %>% unique()
genes <- genes[genes %in% rownames(association_eb[(association_eb$ptag_nacl==TRUE | association_eb$ptag_cpg==TRUE), ])]
genes <- genes[genes %in% rownames(tradeseq_eb[["fitgam"]])]
length(genes)

In [None]:
options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(genes)/10)*2.5)

p <- lapply(genes, function(i) plot_smooth(tradeseq_eb[["fitgam"]], i) + ggtitle(i) + theme(legend.position="none"))
wrap_plots(p, ncol=10, nrow=ceiling(length(genes)/10))

### Ligand

In [None]:
genes <- CellChat::CellChatDB.mouse$interaction[["ligand"]]%>% unique()
genes <- genes[genes %in% rownames(association_eb[(association_eb$ptag_nacl==TRUE | association_eb$ptag_cpg==TRUE), ])]
genes <- genes[genes %in% rownames(tradeseq_eb[["fitgam"]])]
length(genes)

In [None]:
options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(genes)/10)*2.5)

p <- lapply(genes, function(i) plot_smooth(tradeseq_eb[["fitgam"]], i) + ggtitle(i) + theme(legend.position="none"))
wrap_plots(p, ncol=10, nrow=ceiling(length(genes)/10))

### Receptor 

In [None]:
genes <- CellChat::CellChatDB.mouse$interaction[["receptor"]] %>% unique()
genes <- genes[genes %in% rownames(association_eb[(association_eb$ptag_nacl==TRUE | association_eb$ptag_cpg==TRUE), ])]
genes <- genes[genes %in% rownames(tradeseq_eb[["fitgam"]])]
length(genes)

In [None]:
options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(genes)/10)*2.5)

p <- lapply(genes, function(i) plot_smooth(tradeseq_eb[["fitgam"]], i) + ggtitle(i) + theme(legend.position="none"))
wrap_plots(p, ncol=10, nrow=ceiling(length(genes)/10))