# Lamian lineage marker genes and regulons along pseudotime 

**Lamian**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.07.10.451910v1.full)  
[GitHub](https://github.com/Winnie09/Lamian)  
[Tutorial](https://winnie09.github.io/Wenpin_Hou/pages/Lamian.html)  

**Condiment**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.03.09.433671v1.full)  
[GitHub](https://github.com/HectorRDB/condiments)  
[Tutorial](https://hectorrdb.github.io/condimentsPaper/)  

https://github.com/statOmics/tradeSeq/issues/104

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Condiment
        library(condiments), 
        library(tradeSeq), 
        library(SingleCellExperiment), 
        
        # GO 
        library(msigdbr), 
        library(fgsea), 
        
        # Data 
        library(tidyverse), 
        library(openxlsx), 
        
        # Plotting 
        library(ComplexHeatmap), 
        library(circlize), 
        library(viridis), 
        library(ggplotify), 
        library(ComplexUpset), 
        
        # Prallel 
        library(BiocParallel), 
        
        # Python 
        library(reticulate)
        
    )
    
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/tradeseq.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Set parallel 

In [None]:
options(mc.cores=1, future.globals.maxSize=200000*1024^2)

In [None]:
# Prallel computing 
BPPARAM <- BiocParallel::bpparam()
BPPARAM <- MulticoreParam(workers=future::availableCores())  

# Import Data 

In [None]:
# Seurat object 
so <- readRDS("data/object/pp.rds")

# DPT for erythroid lineage
dpt <- read.csv("result/tradeseq/dpt_pseudotime.csv", row.names=1)

# Erythroid lineage genes 
genes <- read.csv("result/tradeseq/genes.csv", row.names=1)[, "genes"]

# Subset Seurat Object to erythroid lineage

In [None]:
# Add dpt pseudotime 
so <- AddMetaData(so, dpt)
so <- subset(so, subset=cell_id %in% rownames(dpt))

# TradeSeq workflow on transcriptome data 

In [None]:
tradeseq_eb <- tradeseq_workflow(so, so$dpt_pseudotime, conditions=factor(so$treatment, levels=c("NaCl", "CpG")), genes=genes, suffix="_eb", log2_thr=0, nknots=7, family="nb", offset=NULL, compute=FALSE, BPPARAM=BPPARAM)

In [None]:
options(repr.plot.width=15, repr.plot.height=5)

plot_evalutateK_results(tradeseq_eb[["evaluate_k"]], k=NULL, aicDiff=2) + theme_global_set(1)

## Pseudotime associated genes (PTAG)

In [None]:
# Get association test
association_eb <- tradeseq_eb[["association"]]

# Rename columns 
colnames(association_eb) <- c("wald_stat", "df", "pval", "wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg", "mean_logfc")
association_eb <- association_eb[, c("wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg")]

# Set p-value to min p-value if zero 
association_eb$pval_nacl <- ifelse(association_eb$pval_nacl==0, min(na.omit(association_eb$pval_nacl[association_eb$pval_nacl>0])), association_eb$pval_nacl)
association_eb$pval_cpg <- ifelse(association_eb$pval_cpg==0, min(na.omit(association_eb$pval_cpg[association_eb$pval_cpg>0])), association_eb$pval_cpg)

# Compute adjusted p-values
association_eb$padj_nacl <- p.adjust(association_eb$pval_nacl, "BH")
association_eb$padj_cpg <- p.adjust(association_eb$pval_cpg, "BH")

# Filter out failed tests in both condition 
association_eb <- association_eb[!(is.na(association_eb$wald_stat_nacl) & is.na(association_eb$wald_stat_cpg)), ]
association_eb <- association_eb[(association_eb$wald_stat_nacl>0 | association_eb$wald_stat_cpg>0), ]

# Select PTAG per condition 
association_eb$ptag_nacl <- ifelse(association_eb$padj_nacl <= 0.01 & association_eb$wald_stat_nacl > 0, TRUE, FALSE)
association_eb$ptag_cpg <- ifelse(association_eb$padj_cpg <= 0.01 & association_eb$wald_stat_cpg > 0, TRUE, FALSE)

## Pseudotime condition genes (PTCG)

In [None]:
# Run condition test
condition_eb <- conditionTest(tradeseq_eb[["fitgam"]], l2fc=log2(1))

# Rename columns 
colnames(condition_eb) <- c("wald_stat_ptcg", "df_ptcg", "pval_ptcg")

# Set p-value to min p-value if zero 
condition_eb$pval_ptcg <- ifelse(condition_eb$pval_ptcg==0, min(na.omit(condition_eb$pval_ptcg[condition_eb$pval_ptcg>0])), condition_eb$pval_ptcg)

# Compute adjusted p-values 
condition_eb$padj_ptcg <- p.adjust(condition_eb$pval_ptcg, "fdr")

# Select PTCG 
condition_eb$ptcg <- ifelse(condition_eb$padj_ptcg<=0.01, TRUE, FALSE)

## Combine results for final pseudotime genes list (PTG)

In [None]:
ptg <- dplyr::inner_join(tibble::rownames_to_column(association_eb, "gene"), tibble::rownames_to_column(condition_eb, "gene"), by=join_by(gene)) %>% tibble::column_to_rownames(., "gene")

In [None]:
saveRDS(association_eb, "result/tradeseq/ptg_eb.rds")

# TradeSeq workflow on pyscenic regulon  

In [None]:
# Compute weghts and offset from original data 
cnt <- GetAssayData(so, assay="RNA", slot="counts")

norm_factor <- edgeR::calcNormFactors(cnt)
lib_size <- colSums(as.matrix(cnt)) * norm_factor
offset <- log(lib_size)

In [None]:
# Import regulon AUC
auc_mtx <- read.csv("/research/peer/fdeckert/FD20200109SPLENO/result/scenic/eb_0/auc_mtx.csv", row.names=1)
auc_mtx <- t(auc_mtx)[, colnames(so)]

In [None]:
# Create Seurat GRN
so_grn <- CreateSeuratObject(counts=auc_mtx, meta.data=so@meta.data)

In [None]:
tradeseq_grn <- tradeseq_workflow(so_grn, so_grn$dpt_pseudotime, conditions=factor(so_grn$treatment, levels=c("NaCl", "CpG")), genes=NULL, suffix="_grn", log2_thr=0, nknots=7, family="gaussian", offset=offset, compute=FALSE, BPPARAM=BPPARAM)

In [None]:
options(repr.plot.width=15, repr.plot.height=5)

plot_evalutateK_results(tradeseq_grn[["evaluate_k"]], k=NULL, aicDiff=2) + theme_global_set(1)

In [None]:
test <- reshape2::melt(auc_mtx, varname=c("gene", "cell_id"), value.name="auc_score") %>% dplyr::left_join(., so@meta.data, by=join_by(cell_id)) 

In [None]:
options(repr.plot.width=50, repr.plot.height=50)

ggplot(test, aes(x=treatment, y=auc_score, fill=treatment)) + geom_violin() + facet_wrap(~gene, ncol=20)

## Pseudotime associated genes (PTAG)

In [None]:
# Get association test
association_eb <- tradeseq_grn[["association"]]

# Rename columns 
colnames(association_eb) <- c("wald_stat", "df", "pval", "wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg", "mean_logfc")
association_eb <- association_eb[, c("wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg")]

# Set p-value to min p-value if zero 
association_eb$pval_nacl <- ifelse(association_eb$pval_nacl==0, min(na.omit(association_eb$pval_nacl[association_eb$pval_nacl>0])), association_eb$pval_nacl)
association_eb$pval_cpg <- ifelse(association_eb$pval_cpg==0, min(na.omit(association_eb$pval_cpg[association_eb$pval_cpg>0])), association_eb$pval_cpg)

# Compute adjusted p-values
association_eb$padj_nacl <- p.adjust(association_eb$pval_nacl, "BH")
association_eb$padj_cpg <- p.adjust(association_eb$pval_cpg, "BH")

# Filter out failed tests in both condition 
association_eb <- association_eb[!(is.na(association_eb$wald_stat_nacl) & is.na(association_eb$wald_stat_cpg)), ]
association_eb <- association_eb[(association_eb$wald_stat_nacl>0 | association_eb$wald_stat_cpg>0), ]

# Select PTAG per condition 
association_eb$ptag_nacl <- ifelse(association_eb$padj_nacl <= 0.01 & association_eb$wald_stat_nacl > 0, TRUE, FALSE)
association_eb$ptag_cpg <- ifelse(association_eb$padj_cpg <= 0.01 & association_eb$wald_stat_cpg > 0, TRUE, FALSE)

## Pseudotime condition genes (PTCG)

In [None]:
# Run condition test
condition_eb <- conditionTest(tradeseq_grn[["fitgam"]], l2fc=log2(1))

# Rename columns 
colnames(condition_eb) <- c("wald_stat_ptcg", "df_ptcg", "pval_ptcg")

# Set p-value to min p-value if zero 
condition_eb$pval_ptcg <- ifelse(condition_eb$pval_ptcg==0, min(na.omit(condition_eb$pval_ptcg[condition_eb$pval_ptcg>0])), condition_eb$pval_ptcg)

# Compute adjusted p-values 
condition_eb$padj_ptcg <- p.adjust(condition_eb$pval_ptcg, "fdr")

# Select PTCG 
condition_eb$ptcg <- ifelse(condition_eb$padj_ptcg<=0.01, TRUE, FALSE)

## Combine results for final pseudotime genes list (PTG)

In [None]:
ptg <- dplyr::inner_join(tibble::rownames_to_column(association_eb, "gene"), tibble::rownames_to_column(condition_eb, "gene"), by=join_by(gene)) %>% tibble::column_to_rownames(., "gene")

In [None]:
saveRDS(association_eb, "result/tradeseq/ptg_eb.rds")

# Session info 

In [None]:
sessionInfo()