# Cluster genes along pseudotime 

Summary of time series clustering in R  
https://journal.r-project.org/archive/2016/RJ-2016-058/index.html  
https://cran.r-project.org/web/packages/dtwclust/vignettes/dtwclust.pdf  

### Distance meassures 

Shape meassures: Lock-step (n=m) and elastic (n!=m)  
Feature based: Fourier or wavelet coefficients, autocorrelation values, etc.  
Structure-based: (i) model-based approaches, where a model is fit to each series and the comparison is made between models, and (ii) complexity-based models, where the similarity between two series is measured based on the quantity of shared information.  
Prediction-based distances analyze the similarity of the forecasts obtained for different time series.  

In [None]:
options(warn=-1)
options(dplyr.summarise.inform=FALSE)

In [None]:
library_load <- suppressMessages(
    
    list(
        
        # parallelDist
        library(parallelDist), 
        
        # TradeSeq
        library(tradeSeq), 
        
        # Seurat 
        library(Seurat), 
        
        # GSEA
        library(fgsea), 
        library(msigdbr), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        library(ggplotify),
        library(ComplexHeatmap),
        library(circlize)
        
    )
)

In [None]:
random_seed <- 42
set.seed(random_seed)

In [None]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
# Source files
source("plotting_global.R")
source("bin/tradeseq_pp.R")
source("bin/tradeseq_pl.R")

In [None]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter settings 

In [None]:
tradeseq_res_file <- "result/lineage/tradeseq_res_2.rds"

In [None]:
contrast_vec <- list(

    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_CpG_D1"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_CpG_D3"), 
    c("IFNAR_fl_CpG_D1", "IFNAR_fl_CpG_D3"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_CpG_D1"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_CpG_D3"), 
    c("IFNAR_fl_LysM_cre_CpG_D1", "IFNAR_fl_LysM_cre_CpG_D3"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_Baseline_D0"), 
    c("IFNAR_fl_CpG_D1", "IFNAR_fl_LysM_cre_CpG_D1"), 
    c("IFNAR_fl_CpG_D3", "IFNAR_fl_LysM_cre_CpG_D3")
    
)

# Import data 

In [None]:
tradeseq_res <- readRDS(tradeseq_res_file)

In [None]:
fitgam <- tradeseq_res[[1]]

In [None]:
condition_levels <- levels(fitgam@colData$tradeSeq$conditions)

# LRT condition test 

In [18]:
condition_res <- conditionTest(fitgam, l2fc=0, pairwise=TRUE)

In [19]:
ptag <- rownames(tradeseq_res[["ptag"]][rowSums(tradeseq_res[["ptag"]])>0, ])
condition_res <- condition_res[ptag, ]

In [20]:
# Remove background genes 
condition_res <- condition_res[!rownames(condition_res) %in% grep("^Tr[abgdv][vjcds]\\b|^Tcr|^Ig[hgkl]|Jchain", rownames(condition_res), value=TRUE), ]

# Classify expression patterns between conditions 

In [21]:
B <- 500
mc_cores <- 32
cache_wilcox <- TRUE

In [23]:
ptpg <- lapply(contrast_vec, function(x) {

    # Get condition fit
    idx <- paste0(which(condition_levels %in% x), collapse="vs")
    ptpg_i <- condition_res[, grepl(idx, colnames(condition_res))] %>% tibble::rownames_to_column("gene")
    colnames(ptpg_i) <- gsub(paste0("_conds", idx), "", colnames(ptpg_i))

    # Clean up data 
    ptpg_i <- ptpg_i %>% dplyr::rename(p_val_wald=pvalue, wald_stat=waldStat) %>% dplyr::mutate(p_val_wald=ifelse(is.na(p_val_wald), 1, p_val_wald), wald_stat=ifelse(is.na(wald_stat), 0, wald_stat))
    
    # Condition fit
    ptpg_i <- ptpg_i  %>%
        dplyr::mutate(p_val_adj_wald=p.adjust(p_val_wald, method="BH")) %>% 
        dplyr::mutate(ptpg_wald=ifelse(p_val_adj_wald<=0.05, TRUE, FALSE)) 
    
    # Difference in scaled smoothed expression bins
    mat_1 <- pt_diff_smooth(fitgam, genes=ptpg_i[ptpg_i$ptpg_wald, ]$gene, condition_qry=x[2], condition_ref=x[1])
    
    diff_gene_smooth_1 <- mat_1 %>% dplyr::filter(abs(diff_smooth)>=0.50) %>% dplyr::pull(gene) %>% unique()
    diff_gene_smooth_2 <- mat_1 %>% group_by(gene) %>% dplyr::summarise(diff_sum_smooth=sum(diff_smooth)) %>% dplyr::filter(abs(diff_sum_smooth)>=2.50) %>% dplyr::pull(gene) %>% unique()
    diff_gene_smooth <- union(diff_gene_smooth_1, diff_gene_smooth_2)

    mat_1 <- mat_1[mat_1$gene %in% diff_gene_smooth, ]
    
    # Difference by wilcox test in expression bins 
    if(!cache_wilcox) {
        
        mat_2 <- pt_diff_wilcox(fitgam, genes=diff_gene_smooth, condition_qry=x[2], condition_ref=x[1], B=B, mc_cores=mc_cores)

        saveRDS(mat_2, paste0("result/lineage/wilcox/", x[2], "_vs_", x[1], ".rds"))
        
    } else {

        mat_2 <- readRDS(paste0("result/lineage/wilcox/", x[2], "_vs_", x[1], ".rds"))
        
    }

    # Set significant log2FC based on wilcox
    mat_2 <- mat_2 %>% group_by(gene, pt_bin) %>%
    
        summarise(
            
            log2FC_wilcox=median(avg_log2FC, na.rm=TRUE), 
            prop_sig_pos=mean(p_val_adj <= 0.05 & avg_log2FC > 0, na.rm=TRUE),
            prop_sig_neg=mean(p_val_adj <= 0.05 & avg_log2FC < 0, na.rm=TRUE), 
            diff_gene_wilcox=ifelse((prop_sig_pos >= 0.80 & log2FC_wilcox > 0) | (prop_sig_neg >= 0.80 & log2FC_wilcox < 0), TRUE, FALSE)
        
        ) 

    # Combine mat 
    mat <- dplyr::left_join(mat_1 %>% dplyr::select(gene, pt_bin, diff_smooth), mat_2 %>% dplyr::select(gene, pt_bin, log2FC_wilcox, diff_gene_wilcox), by=join_by(gene, pt_bin)) %>% na.omit()
        
    ptpg_class <- mat %>% dplyr::arrange(gene, pt_bin) %>% 
        dplyr::filter((abs(log2FC_wilcox)>=0.25 | abs(diff_smooth)>=0.25) & diff_gene_wilcox & sign(diff_smooth)==sign(log2FC_wilcox)) %>% 
        dplyr::group_by(gene) %>%
        dplyr::mutate(ptpg_class=ifelse(all(sign(log2FC_wilcox)==+1), x[2], ifelse(all(sign(log2FC_wilcox)==-1), x[1], "Perturbed"))) %>% 
        dplyr::select(gene, ptpg_class) %>% dplyr::distinct()
    
    # Set PTPG class
    ptpg_i <- dplyr::left_join(ptpg_i, ptpg_class, by=join_by(gene)) %>% 
        dplyr::mutate(ptpg_class=ifelse(is.na(ptpg_class) & ptpg_wald, "Perturbed", ptpg_class)) %>% 
        dplyr::mutate(ptpg_class=ifelse(!(gene %in% diff_gene_smooth), "Canonical", ptpg_class)) %>% 
        dplyr::mutate(ptpg_class=factor(ptpg_class, levels=c("Canonical", "Perturbed", x[1], x[2])))

    ptpg_i <- ptpg_i %>% dplyr::mutate(contrast=paste0(x, collapse=":"))

    # Print summary
    out <- table(ptpg_i$ptpg_class)  %>% as.data.frame() %>% t() %>% as.data.frame()
    rownames(out) <- NULL
    colnames(out) <- NULL
    print(out)
    
    return(ptpg_i)

}
      )

                                                          
1 Canonical Perturbed IFNAR_fl_Baseline_D0 IFNAR_fl_CpG_D1
2      7661       656                    9             107
                                                          
1 Canonical Perturbed IFNAR_fl_Baseline_D0 IFNAR_fl_CpG_D3
2      7606       710                    7             110
                                                     
1 Canonical Perturbed IFNAR_fl_CpG_D1 IFNAR_fl_CpG_D3
2      8159       257               7              10
                                                                   
1 Canonical Perturbed IFNAR_fl_Baseline_D0 IFNAR_fl_LysM_cre_CpG_D1
2      7640       671                    9                      113
                                                                   
1 Canonical Perturbed IFNAR_fl_Baseline_D0 IFNAR_fl_LysM_cre_CpG_D3
2      7259      1084                   11                       79
                                                                       
1 Ca

# Save results 

In [None]:
names(ptpg) <- lapply(contrast_vec, function(x) paste0(x, collapse=":"))
tradeseq_res[["ptpg"]] <- purrr::imap_dfr(ptpg, ~ mutate(.x, contrast=.y))

In [None]:
saveRDS(tradeseq_res, tradeseq_res_file)

# SessionInfo 

In [None]:
sessionInfo()