# Lamian lineage marker genes and DEA along pseudotime 

**Lamian**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.07.10.451910v1.full)  
[GitHub](https://github.com/Winnie09/Lamian)  
[Tutorial](https://winnie09.github.io/Wenpin_Hou/pages/Lamian.html)  

**Condiment**  
[Preprint](https://www.biorxiv.org/content/10.1101/2021.03.09.433671v1.full)  
[GitHub](https://github.com/HectorRDB/condiments)  
[Tutorial](https://hectorrdb.github.io/condimentsPaper/)  

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Seurat 
        library(Seurat), 
        
        # Condiment
        library(condiments), 
        
        # Lamian 
        library(Lamian),
        
        # Data 
        library(tidyverse), 
        library(openxlsx), 
        
        # Plotting 
        library(ComplexHeatmap), 
        library(circlize), 
        library(viridis), 
        library(ggplotify), 
        library(ComplexUpset), 
        
        # Prallel 
        library(BiocParallel)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
options(warn=-1)

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/tradeseq.R")
source("bin/seurat_qc.R")
source("bin/seurat_dea.R")
source("bin/cell_type.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Parameter settings and data import 

In [None]:
so <- readRDS("data/object/pp.rds")

# CellRank absorption probabilities 
absorption_probabilities_prog_nacl <- read.csv("result/cellrank/absorption_probabilities_prog_nacl.csv", row.names=1, check.names=FALSE)
absorption_probabilities_prog_cpg <- read.csv("result/cellrank/absorption_probabilities_prog_cpg.csv", row.names=1,check.names=FALSE)

# Prepare data  

In [None]:
cell_type_prog=c(
    
    "MastP",
    "MegP", 
    "MEP (1)", 
    "MEP (2)", 
    "MEP (3)",
    "MEP (4)", 
    "ProEB (1)",
    "ProEB (2)",
    "ProEB (3)",
    "ProEB (4)",
    "EB (1)",
    "EB (2)",
    "EB (3)", 
    "EB (4)",
    "EB (5)"
    
)

In [None]:
so_prog <- subset(so, subset=cell_type_fine %in% cell_type_prog)
so_prog$treatment <- factor(so_prog$treatment, levels=names(color$treatment))
so_prog$cell_type_fine <- factor(so_prog$cell_type_fine, levels=cell_type_fine_order)

In [None]:
so_prog <- AddMetaData(so_prog, rbind(absorption_probabilities_prog_nacl, absorption_probabilities_prog_cpg))

In [None]:
so_prog_nacl <- subset(so_prog, subset=treatment=="NaCl")
so_prog_cpg <- subset(so_prog, subset=treatment=="CpG")

# Absorption probability UMAP 

In [None]:
options(repr.plot.width=20, repr.plot.height=5)

dplot_1 <- dplot(so_prog_nacl, reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=0.5) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

fplot_1 <- fplot(so_prog_nacl, reduction="umap", features="MastP") + ggtitle("MastP") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_prog_nacl, reduction="umap", features="MegP") + ggtitle("MegP") + scale_color_viridis(option="G")
fplot_3 <- fplot(so_prog_nacl, reduction="umap", features="EB..5.") + ggtitle("EB (5)") + scale_color_viridis(option="G")

dplot_1 + fplot_1 + fplot_2 + fplot_3 + plot_layout(ncol=4)

In [None]:
options(repr.plot.width=20, repr.plot.height=5)

dplot_1 <- dplot(so_prog_cpg, reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=0.5) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

fplot_1 <- fplot(so_prog_cpg, reduction="umap", features="MastP") + ggtitle("MastP") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_prog_cpg, reduction="umap", features="MegP") + ggtitle("MegP") + scale_color_viridis(option="G")
fplot_3 <- fplot(so_prog_cpg, reduction="umap", features="EB..5.") + ggtitle("EB (5)") + scale_color_viridis(option="G")

dplot_1 + fplot_1 + fplot_2 + fplot_3 + plot_layout(ncol=4)

# Absorption probability box plots

In [None]:
options(repr.plot.width=10, repr.plot.height=5)

data <- select(so_prog@meta.data, treatment, cell_type_fine, MastP, MegP, EB..5.) %>% 
    reshape2::melt(id_vars=c("treatment", "cell_type_fine")) %>% as.data.frame() %>% 
    dplyr::mutate(cell_type_fine=factor(cell_type_fine, levels=cell_type_prog)) %>% 
    dplyr::mutate(treatment=factor(treatment, levels=c("NaCl", "CpG")))

plot <- ggplot(data, aes(x=cell_type_fine, y=value, color=cell_type_fine, fill=cell_type_fine)) + 
    geom_boxplot(outlier.size=0.5) + 
    facet_grid(treatment~variable) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    scale_fill_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

plot

# Cell cycle proportions 

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

ggplot(so_prog@meta.data, aes(x=cell_type_fine, fill=cc_phase_class)) + 
    geom_bar() + 
    scale_fill_manual(values=color$cc_phase_class) + 
    facet_grid(~treatment) + 
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

# Ribosomal percentage

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

ggplot(so_prog@meta.data, aes(x=cell_type_fine, y=pRb_RNA, fill=cell_type_fine, color=cell_type_fine)) + 
    geom_boxplot() + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    scale_fill_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    facet_grid(~treatment) + 
    theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

# Gene expression counts

In [None]:
feature_count_nacl <- lapply(SplitObject(so_prog_nacl, split.by="cell_type_fine"), feature_select)
feature_count_nacl <- lapply(feature_count_nacl, nrow)
feature_count_nacl <- data.frame(cell_type_fine=names(feature_count_nacl), treatment="NaCl", feature_count=unlist(feature_count_nacl))

In [None]:
feature_count_cpg <- lapply(SplitObject(so_prog_cpg, split.by="cell_type_fine"), feature_select)
feature_count_cpg <- lapply(feature_count_cpg, nrow)
feature_count_cpg <- data.frame(cell_type_fine=names(feature_count_cpg), treatment="CpG", feature_count=unlist(feature_count_cpg))

In [None]:
feature_count <- rbind(feature_count_nacl, feature_count_cpg) %>% 
    dplyr::left_join(., dplyr::group_by(so_prog@meta.data, treatment, cell_type_fine) %>% dplyr::summarise(cell_count=n()), by=c("treatment", "cell_type_fine")) %>% 
    dplyr::mutate(treatment=factor(treatment, levels=c("NaCl", "CpG")))

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

ggplot(feature_count, aes(x=cell_count, y=feature_count, color=cell_type_fine)) +     
    geom_point(stat="identity") + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    facet_grid(~treatment) 

# MEP DEA

In [None]:
so_prog_mep <- subset(so_prog, subset=cell_type_main=="MEP")
so_prog_mep_nacl <- subset(so_prog_mep, subset=treatment=="NaCl")
so_prog_mep_cpg <- subset(so_prog_mep, subset=treatment=="CpG")

In [None]:
dea_treatment_mep <- dea_seurat(so_prog_mep, ident="cell_type_main",  map=cell_type[cell_type$ident %in% so_prog_mep$ident, ], only_pos=FALSE, logfc_threshold=0, min_pct=0, conserved=FALSE, treatment=TRUE, grouping_var="treatment", cnt_min=5, cell_min=3, test_use="wilcox", compute=TRUE)

In [None]:
options(repr.plot.width=5*1, repr.plot.height=5*1)

vp <- lapply(seq_along(dea_treatment_mep), function(i) vp_dea(dea_treatment_mep[[i]], title=dea_treatment_mep[[i]]$cell_type_fine_detail, log2_thold=0, adjpvalue_thold=0.1))
wrap_plots(vp, ncol=1, nrow=1)

In [None]:
dea_treatment_mep <- dea_seurat(so_prog_mep, ident="ident",  map=cell_type[cell_type$ident %in% so_prog_mep$ident, ], only_pos=FALSE, logfc_threshold=0, min_pct=0, conserved=FALSE, treatment=TRUE, grouping_var="treatment", cnt_min=5, cell_min=3, test_use="wilcox", compute=TRUE)

In [None]:
options(repr.plot.width=5*4, repr.plot.height=5*1)

vp <- lapply(seq_along(dea_treatment_mep), function(i) vp_dea(dea_treatment_mep[[i]], title=dea_treatment_mep[[i]]$cell_type_fine_detail, log2_thold=0, adjpvalue_thold=0.1))
wrap_plots(vp, ncol=4, nrow=1)

In [None]:
dea_marker_mep <- dea_seurat(so_prog_mep, ident="ident", map=cell_type[cell_type$ident %in% so_prog_mep$ident, ], conserved=TRUE, only_pos=FALSE, logfc_threshold=0, min_pct=0, grouping_var="treatment", cnt_min=3, cell_min=3, file="result/dea/prog_mep_conserved", compute=FALSE)
dea_marker_mep_nacl <- dea_seurat(so_prog_mep_nacl, ident="ident", map=cell_type[cell_type$ident %in% so_prog_mep_nacl$ident, ], conserved=FALSE, only_pos=FALSE, logfc_threshold=0, min_pct=0, grouping_var="ident", cnt_min=3, cell_min=3, file="result/dea/prog_mep_conserved_nacl", compute=FALSE)
dea_marker_mep_cpg <- dea_seurat(so_prog_mep_cpg, ident="ident", map=cell_type[cell_type$ident %in% so_prog_mep_cpg$ident, ], conserved=FALSE, only_pos=FALSE, logfc_threshold=0, min_pct=0, grouping_var="ident", cnt_min=3, cell_min=3, file="result/dea/prog_mep_conserved_cpg", compute=FALSE)

In [None]:
options(repr.plot.width=15, repr.plot.height=12)

hm_1 <- hm_dea(dea=dea_marker_mep, so=so_prog_mep, column_name="cell_type_fine", column_order=c("MEP (1)", "MEP (2)", "MEP (3)", "MEP (4)"), p_val=0.05, top=20, column_title_rot=90, conserved=TRUE)
hm_2 <- hm_dea(dea=dea_marker_mep_nacl, so=so_prog_mep, column_name="cell_type_fine", column_order=c("MEP (1)", "MEP (2)", "MEP (3)", "MEP (4)"), p_val=0.05, top=20, column_title_rot=90, conserved=FALSE)
hm_3 <- hm_dea(dea=dea_marker_mep_cpg, so=so_prog_mep, column_name="cell_type_fine", column_order=c("MEP (1)", "MEP (2)", "MEP (3)", "MEP (4)"), p_val=1, top=20, column_title_rot=90, conserved=FALSE)

hm_1 + hm_2 + hm_3

# End point DEA

In [None]:
so_prog_ep <- subset(so_prog, subset=cell_type_fine %in% c("MastP", "MegP", "EB (5)"))

In [None]:
dea_marker_ep <- dea_seurat(so_prog_ep, ident="ident", map=cell_type[cell_type$ident %in% so_prog_ep$ident, ], conserved=TRUE, only_pos=FALSE, logfc_threshold=0, min_pct=0, grouping_var="treatment", cnt_min=10, cell_min=3, file="result/dea/prog_end_point_conserved", compute=FALSE)

In [None]:
options(repr.plot.width=5*3, repr.plot.height=5*1)

vp <- lapply(seq_along(dea_marker_ep), function(i) vp_dea(dea_marker_ep[[i]], title=dea_marker_ep[[i]]$cell_type_fine, log2_thold=0.25, adjpvalue_thold=0.05, conserved=TRUE))
wrap_plots(vp, ncol=3, nrow=1)

In [None]:
options(repr.plot.width=10, repr.plot.height=9)

hm <- hm_dea(dea=dea_marker_ep, so=so_prog_ep, column_name="cell_type_fine", column_order=c("MastP", "MegP", "EB (5)"), p_val=0.01, top=20, column_title_rot=90, conserved=TRUE)
hm

# End point module score 

In [None]:
# Select positive end point dea
features <- lapply(seq_along(dea_marker_ep), function(i) {
    
    features <- dea_marker_ep[[i]][dea_marker_ep[[i]]$minimump_p_val<=0.01 & dea_marker_ep[[i]]$NaCl_avg_log2FC >= 1.25 & dea_marker_ep[[i]]$CpG_avg_log2FC >= 1.25, ][["gene"]]
    return(features[1:length(features)])
    
})

# Select unique end point features 
features <- lapply(features, function(i) {i[i%in%unlist(features)[table(unlist(features))==1]]})

In [None]:
lapply(features, length)

In [None]:
so_prog_nacl <- AddModuleScore(so_prog_nacl, features=features, name=list("msMastP", "msMegP", "msEB (5)"))
names(so_prog_nacl@meta.data)[names(so_prog_nacl@meta.data) == "msMastP1"] <- "msMastP"
names(so_prog_nacl@meta.data)[names(so_prog_nacl@meta.data) == "msMegP2"] <- "msMegP"
names(so_prog_nacl@meta.data)[names(so_prog_nacl@meta.data) == "msEB..5.3"] <- "msEB..5."

In [None]:
so_prog_cpg <- AddModuleScore(so_prog_cpg, features=features, name=list("msMastP", "msMegP", "msEB (5)"))
names(so_prog_cpg@meta.data)[names(so_prog_cpg@meta.data) == "msMastP1"] <- "msMastP"
names(so_prog_cpg@meta.data)[names(so_prog_cpg@meta.data) == "msMegP2"] <- "msMegP"
names(so_prog_cpg@meta.data)[names(so_prog_cpg@meta.data) == "msEB..5.3"] <- "msEB..5."

In [None]:
options(repr.plot.width=20, repr.plot.height=5)

dplot_1 <- dplot(subset(so_prog_nacl, subset=treatment=="NaCl"), reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=0.5) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

fplot_1 <- fplot(so_prog_nacl, reduction="umap", features="msMastP") + ggtitle("MastP") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_prog_nacl, reduction="umap", features="msMegP") + ggtitle("MegP") + scale_color_viridis(option="G")
fplot_3 <- fplot(so_prog_nacl, reduction="umap", features="msEB..5.") + ggtitle("EB (5)") + scale_color_viridis(option="G")

dplot_1 + fplot_1 + fplot_2 + fplot_3 + plot_layout(ncol=4)

In [None]:
options(repr.plot.width=20, repr.plot.height=5)

dplot_1 <- dplot(subset(so_prog_cpg, subset=treatment=="CpG"), reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=0.5) + 
    scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) + 
    theme(
        legend.title=element_blank(), 
        plot.title=element_blank()
    )

fplot_1 <- fplot(so_prog_cpg, reduction="umap", features="msMastP") + ggtitle("MastP") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_prog_cpg, reduction="umap", features="msMegP") + ggtitle("MegP") + scale_color_viridis(option="G")
fplot_3 <- fplot(so_prog_cpg, reduction="umap", features="msEB..5.") + ggtitle("EB (5)") + scale_color_viridis(option="G")

dplot_1 + fplot_1 + fplot_2 + fplot_3 + plot_layout(ncol=4)

# Condiment

In [None]:
dpt_pseudotime_eb <- read.csv("result/cellrank/dpt_pseudotime_eb.csv", row.names=1)
so_eb <- subset(so_prog, subset=cell_id %in% rownames(dpt_pseudotime_eb))
so_eb <- AddMetaData(so_eb, dpt_pseudotime_eb)
so_eb[["umap"]] <- CreateDimReducObject(embeddings=as.matrix(read.csv("foo.csv", row.names=1)), key="UMAP_")

In [None]:
imbalance_score_eb <- condiments::imbalance_score(Object=so_eb@reductions$umap@cell.embeddings, conditions=so_eb$treatment, k=20, smooth=40)
so_eb$imbalance_score <- imbalance_score_eb$scaled_scores

In [None]:
options(repr.plot.width=20, repr.plot.height=5)

dplot_1 <- dplot(so_eb, reduction="umap", group_by="cell_type_fine_detail", alpha=1, pt_size=0.5) + scale_color_manual(values=color[["cell_type_fine_detail"]][names(color[["cell_type_fine_detail"]]) %in% so_prog$cell_type_fine]) 
dplot_2 <- dplot(so_eb, reduction="umap", group_by="treatment", alpha=1, pt_size=0.5) + scale_color_manual(values=color[["treatment"]])
fplot_1 <- fplot(so_eb, reduction="umap", features="imbalance_score") + ggtitle("Imbalance score") + scale_color_viridis(option="G")
fplot_2 <- fplot(so_eb, reduction="umap", features="dpt_pseudotime") + ggtitle("DPT pseudotime") + scale_color_viridis(option="G")

dplot_1 + dplot_2 + fplot_1 + fplot_2 + plot_layout(ncol=4)

In [None]:
options(repr.plot.width=10, repr.plot.height=2.5)

plot_1 <- ggplot(so_eb@meta.data, aes(x=dpt_pseudotime)) +
  geom_density(alpha=.8, aes(fill=treatment), col="transparent") +
  geom_density(aes(col=treatment), fill="transparent", guide=FALSE, size=1.5) +
  labs(x="Pseudotime", fill="Treatment") +
  guides(col="none") +
  scale_fill_manual(values=color[["treatment"]]) +
  scale_color_manual(values=color[["treatment"]])

plot_1

In [None]:
progressionTest(pseudotime=as.matrix(so_eb$dpt_pseudotime, drop=FALSE), cellWeights=as.matrix(rep(1, ncol(so_eb)), drop=FALSE)), conditions=so_eb$treatment)

0
1
1
1
1
1
1
1
1
1
1


In [None]:
class(so_eb$dpt_pseudotime)

# Lamian

In [None]:
dpt_pseudotime_eb <- read.csv("result/cellrank/dpt_pseudotime_eb.csv", row.names=1)
so_eb <- subset(so_prog, subset=cell_id %in% rownames(dpt_pseudotime_eb))

In [None]:
lamian_eb <- list()

expr <- as.matrix(GetAssayData(so_eb, assay="RNA", slot="data")[rowSums(GetAssayData(so_eb, assay="RNA", slot="counts")>3)>5, ])
cellanno <- data.frame(Cell=so_eb$cell_id, Sample=so_eb$treatment, row.names=NULL)
pseudotime <- setNames(1:nrow(dpt_pseudotime_eb), rownames(dpt_pseudotime_eb)[order(dpt_pseudotime_eb$dpt_pseudotime)])
design <- as.matrix(data.frame(intercept=c(1, 1), group=c(1, 0), row.names=c("NaCl", "CpG")))

lamian_eb[["expr"]] <- expr
lamian_eb[["cellanno"]] <- cellanno
lamian_eb[["pseudotime"]] <- pseudotime
lamian_eb[["design"]] <- design

In [None]:
lamian_eb_result <- lamian_test(
    
    expr=lamian_eb$expr,
    cellanno=lamian_eb$cellanno,
    pseudotime=lamian_eb$pseudotime,
    design=lamian_eb$design,
    test.type="variable", # To perform the sample covariant test 
    testvar=2,
    permuiter=5,
    ncores=8

)

saveRDS(lamian_eb_result, "lamian_eb_result.rds")
lamian_eb_result <- readRDS("lamian_eb_result.rds")

In [None]:
## get differential dynamic genes statistics
stat <- lamian_eb_result$statistics
stat <- stat[order(stat[, 1],-stat[, 3]),]

## identify XDE genes with FDR.overall < 0.05 cutoff
diffgene <- rownames(stat[stat[, grep("^fdr.*overall$", colnames(stat))] <= 0.05, ])
diffgene