# Cluster genes along pseudotime 

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # Condiment
        library(condiments), 
        library(tradeSeq), 
        library(SingleCellExperiment), 
        library(clusterExperiment), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ComplexHeatmap), 
        library(circlize), 
        library(viridis), 
        library(ggplotify), 
        library(ComplexUpset), 
        library(patchwork), 
        
        # Prallel 
        library(BiocParallel)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
options(future.globals.maxSize=5000*1024^2)

In [None]:
options(warn=-1)

In [None]:
ht_opt$message=FALSE # ComplexHeatmap 

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/tradeseq.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()

# Parameter settings and data import 

In [None]:
# Parallel computing 
workers <- future::availableCores()
print(workers)

In [None]:
tradeseq_eb <- readRDS("result/tradeseq/tradeseq_eb.rds")
ptg_eb <- readRDS("result/tradeseq/ptg_eb.rds")

## Cluster tradeSeq

In [None]:
genes <- rownames(ptg_eb[ptg_eb$ptag_cpg==TRUE, ])

cnt_smooth <- predictSmooth(tradeseq_eb[["fitgam"]], rownames(tradeseq_eb[["fitgam"]]), nPoints=50, tidy=TRUE)
cnt_smooth$time <- paste0("lineage_", cnt_smooth$lineage, ":", cnt_smooth$condition, ":", cnt_smooth$time)

# Prepare for each condition 
cnt_smooth_nacl <- cnt_smooth[cnt_smooth$condition=="NaCl", ]
cnt_smooth_cpg <- cnt_smooth[cnt_smooth$condition=="CpG", ]

cnt_smooth_nacl <- tidyr::spread(cnt_smooth_nacl, key=time, value=yhat)
cnt_smooth_cpg <- tidyr::spread(cnt_smooth_cpg, key=time, value=yhat)

cnt_smooth_nacl <- dplyr::select(cnt_smooth_nacl, -lineage, -condition)
cnt_smooth_cpg <- dplyr::select(cnt_smooth_cpg, -lineage, -condition)

cnt_smooth_nacl <- column_to_rownames(cnt_smooth_nacl, "gene")
cnt_smooth_cpg <- column_to_rownames(cnt_smooth_cpg, "gene") 

# Combine condition 
cnt_smooth <- cbind(cnt_smooth_nacl, cnt_smooth_cpg)

In [None]:
genes_ptag_nacl <- rownames(ptg_eb[(ptg_eb$ptag_nacl==TRUE), ])
genes_ptag_cpg <- rownames(ptg_eb[(ptg_eb$ptag_cpg==TRUE), ])
genes_ptag <- rownames(ptg_eb[(ptg_eb$ptag_nacl==TRUE | (ptg_eb$ptag_cpg==TRUE)), ]) %>% unique()

In [None]:
cluster_many <- function(x) {
    
    BPPARAM <- BiocParallel::bpparam()
    BPPARAM <- MulticoreParam(workers=future::availableCores())    
    
    # Result list 
    cluster_rsec_result <- list()
    
    # Store original smoothed counts before scaling
    cluster_rsec_result[["cnt_smooth"]] <- x
    
    # Scale smoothed data 
    x <- scale(t(x))
    
    # Run cluster experiment RSEC
    cluster_rsec_result[["rsec"]] <- clusterExperiment::clusterMany(
        
        x=x,
        ks=3:15,
        alphas=c(0.1, 0.2, 0.3),
        betas=c(0.7, 0.8, 0.9),
        minSizes=c(3, 5, 10),
        clusterFunction="hierarchical01",
        distFunction="eucledian", 
        sequential=TRUE,
        subsample=TRUE,
        reduceMethod="PCA",
        nReducedDims=c(10, 50, 100),
        isCount=FALSE, 
        ncores=future::availableCores()
    
    )
    
      return(cluster_rsec_result)
}

In [None]:
cluster_many_ptag_nacl <- cluster_many(cnt_smooth_nacl[genes_ptag_nacl[1:50], ])

In [None]:
cluster_plot_ptag_nacl <- cluster_plot(cluster_many_ptag_nacl, proportion=0.5)

options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(cluster_plot_ptag_nacl)/10)*2.5)
wrap_plots(cluster_plot_ptag_nacl, ncol=10, nrow=ceiling(length(cluster_plot_ptag_nacl)/10))

In [None]:
cluster_many_ptag_nacl <- readRDS("result/tradeseq/cluster_many_ptag_nacl.rds")
cluster_many_ptag_log_nacl <- readRDS("result/tradeseq/cluster_many_ptag_log_nacl.rds")

In [None]:
cluster_plot <- function(cluster_result, proportion=1) {

#     cluster_result[["rsec"]] <- makeConsensus(cluster_result[["rsec"]], proportion=proportion)
    
    cluster_label <- primaryCluster(cluster_result[["rsec"]])
    cluster_label_unique <- unique(cluster_label)[order(unique(cluster_label))]
    
    cnt_smooth <- t(scale(t(cluster_result[["cnt_smooth"]])))
#     cnt_smooth <- t(apply(cluster_result[["cnt_smooth"]], 1, scales::rescale))
    
    cnt_smooth <- as.data.frame(cnt_smooth)
    
    p <- list()
    for(cluster_label_i in cluster_label_unique) {
        
        data <- cnt_smooth[which(cluster_label==cluster_label_i), ]
        data <- reshape2::melt(rownames_to_column(data, var="gene"), id="gene")

        data$condition <- as.character(sapply(strsplit(as.character(data$variable), ":"), `[`, 2))
        data$pseudotime <- as.double(sapply(strsplit(as.character(data$variable), ":"), `[`, 3))
        
        p[[paste("Cluster", cluster_label_i)]] <- ggplot(data, aes(x=pseudotime, y=value, color=condition, group=paste(condition, gene))) + 
            geom_line() + 
            ylim(-ceiling(max(abs(data$value))), ceiling(max(abs(data$value)))) + 
            geom_hline(yintercept=0, linetype="dashed") + 
            ggtitle(paste("Cluster", cluster_label_i)) + 
            scale_color_manual(values=unlist(color$treatment)) + 
            theme(legend.position="none")
        
    }
    
    return(p)
    
}

In [None]:
cluster_plot_ptag_nacl <- cluster_plot(cluster_many_ptag_nacl, proportion=0.5)

options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(cluster_plot_ptag_nacl)/10)*2.5)
wrap_plots(cluster_plot_ptag_nacl, ncol=10, nrow=ceiling(length(cluster_plot_ptag_nacl)/10))

In [None]:
cluster_plot_ptag_log_nacl <- cluster_plot(cluster_many_ptag_log_nacl, proportion=0.5)

options(repr.plot.width=10*2.5, repr.plot.height=ceiling(length(cluster_plot_ptag_log_nacl)/10)*2.5)
wrap_plots(cluster_plot_ptag_log_nacl, ncol=10, nrow=ceiling(length(cluster_plot_ptag_log_nacl)/10))