# Lamian lineage marker genes and regulons along pseudotime 

**Lamian**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.07.10.451910v1.full)  
[GitHub](https://github.com/Winnie09/Lamian)  
[Tutorial](https://winnie09.github.io/Wenpin_Hou/pages/Lamian.html)  

**Condiment**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.03.09.433671v1.full)  
[GitHub](https://github.com/HectorRDB/condiments)  
[Tutorial](https://hectorrdb.github.io/condimentsPaper/)  

https://github.com/statOmics/tradeSeq/issues/104

In [None]:
options(warn=-1)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Condiment
        library(condiments), 
        library(tradeSeq), 
        library(SingleCellExperiment), 
        
        # Data 
        library(tidyverse), 
        
        # Prallel 
        library(BiocParallel), 
        
        # Python 
        library(reticulate)
        
    )
    
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/so_pl.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Set parallel 

In [None]:
options(future.globals.maxSize=200000*1024^2)

In [None]:
BPPARAM <- MulticoreParam(tasks=0, workers=32)
BPPARAM

# Import Data 

In [None]:
# Seurat object 
so <- readRDS("data/BSA_0355_SM01_10x_SPLENO/ANALYSIS/pp.rds")

# DPT for erythroid lineage
dpt <- read.csv("result/lineage/ery_dpt.csv", row.names=1)

# Subset Seurat Object to erythroid lineage

In [None]:
# Add dpt pseudotime 
so <- AddMetaData(so, dpt)
so <- subset(so, subset=cell_id %in% rownames(dpt))

# Set DPT 

In [None]:
minmax_scale <- function(x) {(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))}
so@meta.data <- dplyr::group_by(so@meta.data, infection) %>% dplyr::mutate(dpt_pseudotime = minmax_scale(dpt_pseudotime)) %>% dplyr::ungroup() %>% column_to_rownames(., "cell_id")

In [None]:
options(repr.plot.width=3*6, repr.plot.height=6)

fplot_1 <- fplot(subset(so, subset=infection=="Baseline"), features="dpt_pseudotime")
fplot_2 <- fplot(subset(so, subset=infection=="CpG"), features="dpt_pseudotime")
dplot_1 <- ggplot(so@meta.data, aes(x=dpt_pseudotime, fill=infection, alpha=0.5)) + geom_density() + scale_fill_manual(values=color$infection)

fplot_1 + fplot_2 + dplot_1 + patchwork::plot_layout(ncol=3)

# Setup tradeSeq parameters

In [None]:
# Genes to consider for ery lineage
counts <- GetAssayData(so, assay="RNA", layer="counts")

In [None]:
# Select lineage genes
genes_1 <- rownames(counts[rowSums(counts>=3)>=1, ])
genes_2 <- rownames(counts[rowSums(counts>=1)>=20, ])
genes <- intersect(genes_1, genes_2)

In [None]:
# so <- so[, sample(colnames(so), size=1000, replace=FALSE)]
# counts <- GetAssayData(so, assay="RNA", layer="counts")

In [None]:
# Get count matrix 
counts <- counts[genes, ]

In [None]:
cache_evealuate_k <- TRUE
cache_tradeseq <- TRUE

## Evaluate K

In [None]:
if(!cache_evealuate_k) {   
    
    evaluate_k <- evaluateK(

        counts=counts,
        pseudotime=so$dpt_pseudotime,
        cellWeights=rep(1, ncol(so)),
        conditions=factor(so$infection, levels=c("Baseline", "CpG")), 
        k=3:10, 
        family="nb", 
        offset=NULL, 
        parallel=TRUE, 
        BPPARAM=BPPARAM, 
        verbose=TRUE
    
    )

    saveRDS(evaluate_k, paste0("result/lineage/ery_evaluate_k.rds"))

} else {
    
    evaluate_k <- readRDS(paste0("result/lineage/ery_evaluate_k.rds"))

}

In [None]:
options(repr.plot.width=15, repr.plot.height=5)

plot_evalutateK_results(evaluate_k, k=NULL, aicDiff=2)

## Fit GAM 

In [None]:
if(!cache_tradeseq) {   

    fitgam <- fitGAM(

        counts=counts, 
        pseudotime=so$dpt_pseudotime,
        cellWeights=rep(1, ncol(so)),
        conditions=factor(so$infection, levels=c("Baseline", "CpG")), 
        nknots=6,
        genes=genes, 
        family="nb", 
        offset=NULL, 
        parallel=TRUE, 
        verbose=TRUE

    )

    # associationTest
    association <- associationTest(fitgam, l2fc=0, lineage=TRUE, contrastType="end", inverse="Chol")

    # Save results 
    tradeseq_res <- list(fitgam=fitgam, association=association)

    saveRDS(tradeseq_res, paste0("result/lineage/ery_gam.rds"))
        
        
} else {
        
        tradeseq_res <- readRDS(paste0("result/lineage/ery_gam.rds"))

}

# Pseudotime associated genes (PTAG)

In [None]:
# Get association test
association_res <- tradeseq_res[["association"]]

# Rename columns 
colnames(association_res) <- c("wald_stat", "df", "pval", "wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg", "mean_logfc")
association_res <- association_res[, c("wald_stat_nacl", "df_nacl", "pval_nacl", "wald_stat_cpg", "df_cpg", "pval_cpg")]

# Set p-value to min p-value if zero 
association_res$pval_nacl <- ifelse(association_res$pval_nacl==0, min(na.omit(association_res$pval_nacl[association_res$pval_nacl>0])), association_res$pval_nacl)
association_res$pval_cpg <- ifelse(association_res$pval_cpg==0, min(na.omit(association_res$pval_cpg[association_res$pval_cpg>0])), association_res$pval_cpg)

# Compute adjusted p-values
association_res$padj_nacl <- p.adjust(association_res$pval_nacl, "BH")
association_res$padj_cpg <- p.adjust(association_res$pval_cpg, "BH")

# Filter out failed tests in both condition 
association_res <- association_res[!(is.na(association_res$wald_stat_nacl) & is.na(association_res$wald_stat_cpg)), ]
association_res <- association_res[(association_res$wald_stat_nacl>0 | association_res$wald_stat_cpg>0), ]

# Select PTAG per condition 
association_res$ptag_nacl <- ifelse(association_res$padj_nacl <= 0.01 & association_res$wald_stat_nacl > 0, TRUE, FALSE)
association_res$ptag_cpg <- ifelse(association_res$padj_cpg <= 0.01 & association_res$wald_stat_cpg > 0, TRUE, FALSE)

# Pseudotime condition genes (PTCG)

In [None]:
# Run condition test
condition_res <- conditionTest(tradeseq_res[["fitgam"]], l2fc=log2(1))

# Rename columns 
colnames(condition_res) <- c("wald_stat_ptcg", "df_ptcg", "pval_ptcg")

# Set p-value to min p-value if zero 
condition_res$pval_ptcg <- ifelse(condition_res$pval_ptcg==0, min(na.omit(condition_res$pval_ptcg[condition_res$pval_ptcg>0])), condition_res$pval_ptcg)

# Compute adjusted p-values 
condition_res$padj_ptcg <- p.adjust(condition_res$pval_ptcg, "fdr")

# Select PTCG 
condition_res$ptcg <- ifelse(condition_res$padj_ptcg<=0.01, TRUE, FALSE)

# Combine results for final pseudotime genes list (PTG)

In [None]:
ptg <- dplyr::inner_join(tibble::rownames_to_column(association_res, "gene"), tibble::rownames_to_column(condition_res, "gene"), by=join_by(gene)) %>% tibble::column_to_rownames(., "gene")

In [None]:
saveRDS(association_res, "result/lineage/ery_ptg.rds")

# Session info 

In [None]:
sessionInfo()