# Cluster genes along pseudotime 

Summary of time series clustering in R  
https://journal.r-project.org/archive/2016/RJ-2016-058/index.html  
https://cran.r-project.org/web/packages/dtwclust/vignettes/dtwclust.pdf  

### Distance meassures 

Shape meassures: Lock-step (n=m) and elastic (n!=m)  
Feature based: Fourier or wavelet coefficients, autocorrelation values, etc.  
Structure-based: (i) model-based approaches, where a model is fit to each series and the comparison is made between models, and (ii) complexity-based models, where the similarity between two series is measured based on the quantity of shared information.  
Prediction-based distances analyze the similarity of the forecasts obtained for different time series.  

In [3]:
options(warn=-1)
options(dplyr.summarise.inform=FALSE)

In [4]:
library_load <- suppressMessages(
    
    list(
        
        # parallelDist
        library(parallelDist), 
        
        # TradeSeq
        library(tradeSeq), 
        
        # Seurat 
        library(Seurat), 
        
        # GSEA
        library(fgsea), 
        library(msigdbr), 
        
        # Data 
        library(tidyverse), 
        
        # Plotting 
        library(ggplot2), 
        library(patchwork), 
        library(ggplotify),
        library(ComplexHeatmap),
        library(circlize)
        
    )
)

In [5]:
random_seed <- 42
set.seed(random_seed)

In [6]:
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")

In [7]:
# Source files
source("plotting_global.R")
source("bin/tradeseq_pp.R")
source("bin/tradeseq_pl.R")

In [8]:
# Plotting Theme
ggplot2::theme_set(theme_global_set(size_select=1)) # From project global source()

# Parameter settings 

In [9]:
tradeseq_res_file <- "result/lineage/tradeseq_res_2.rds"

In [10]:
contrast_vec <- list(

    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_CpG_D1"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_CpG_D3"), 
    c("IFNAR_fl_CpG_D1", "IFNAR_fl_CpG_D3"), 
    
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_CpG_D1"), 
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_CpG_D3"), 
    c("IFNAR_fl_LysM_cre_CpG_D1", "IFNAR_fl_LysM_cre_CpG_D3"), 
    
    c("IFNAR_fl_Baseline_D0", "IFNAR_fl_LysM_cre_Baseline_D0"), 
    c("IFNAR_fl_CpG_D1", "IFNAR_fl_LysM_cre_CpG_D1"), 
    c("IFNAR_fl_CpG_D3", "IFNAR_fl_LysM_cre_CpG_D3")
    
)

# Import data 

In [11]:
tradeseq_res <- readRDS(tradeseq_res_file)

In [12]:
fitgam <- tradeseq_res[[1]]

In [13]:
condition_levels <- levels(fitgam@colData$tradeSeq$conditions)

# LRT condition test 

In [14]:
condition_res <- conditionTest(fitgam, l2fc=0, pairwise=TRUE)

In [15]:
ptag <- rownames(tradeseq_res[["ptag"]][rowSums(tradeseq_res[["ptag"]])>0, ])
condition_res <- condition_res[ptag, ]

In [16]:
# Remove background genes 
condition_res <- condition_res[!rownames(condition_res) %in% grep("^Tr[abgdv][vjcds]\\b|^Tcr|^Ig[hgkl]|Jchain", rownames(condition_res), value=TRUE), ]

# Classify expression patterns between conditions 

In [17]:
B <- 500
mc_cores <- 32
cache_wilcox <- TRUE

In [24]:
ptpg <- lapply(contrast_vec[1], function(x) {

    # Get condition fit
    idx <- paste0(which(condition_levels %in% x), collapse="vs")
    ptpg_i <- condition_res[, grepl(idx, colnames(condition_res))] %>% tibble::rownames_to_column("gene")
    colnames(ptpg_i) <- gsub(paste0("_conds", idx), "", colnames(ptpg_i))

    # Clean up data 
    ptpg_i <- ptpg_i %>% dplyr::rename(p_val_wald=pvalue, wald_stat=waldStat) %>% dplyr::mutate(p_val_wald=ifelse(is.na(p_val_wald), 1, p_val_wald), wald_stat=ifelse(is.na(wald_stat), 0, wald_stat))
    
    # Condition fit
    ptpg_i <- ptpg_i  %>%
        dplyr::mutate(p_val_adj_wald=p.adjust(p_val_wald, method="BH")) %>% 
        dplyr::mutate(ptpg_wald=ifelse(p_val_adj_wald<=0.05, TRUE, FALSE)) 
    
    # Difference in scaled smoothed expression bins
    mat_1 <- pt_diff_smooth(fitgam, genes=ptpg_i[ptpg_i$ptpg_wald, ]$gene, condition_qry=x[2], condition_ref=x[1])
    
    diff_gene_smooth_1 <- mat_1 %>% dplyr::filter(abs(diff_smooth)>=0.50) %>% dplyr::pull(gene) %>% unique()
    diff_gene_smooth_2 <- mat_1 %>% group_by(gene) %>% dplyr::summarise(diff_sum_smooth=sum(diff_smooth)) %>% dplyr::filter(abs(diff_sum_smooth)>=2.50) %>% dplyr::pull(gene) %>% unique()
    diff_gene_smooth <- union(diff_gene_smooth_1, diff_gene_smooth_2)

    mat_1 <- mat_1[mat_1$gene %in% diff_gene_smooth, ]
    
    # Difference by wilcox test in expression bins 
    if(!cache_wilcox) {
        
        mat_2 <- pt_diff_wilcox(fitgam, genes=diff_gene_smooth, condition_qry=x[2], condition_ref=x[1], B=B, mc_cores=mc_cores)

        saveRDS(mat_2, paste0("result/lineage/wilcox/", x[2], "_vs_", x[1], ".rds"))
        
    } else {

        mat_2 <- readRDS(paste0("result/lineage/wilcox/", x[2], "_vs_", x[1], ".rds"))
        
    }
    
    
    # Set significant log2FC based on wilcox
    mat_2 <- mat_2 %>% group_by(gene, pt_bin) %>%
    
        summarise(
            
            log2FC_wilcox=median(avg_log2FC, na.rm=TRUE), 
            prop_sig_pos=mean(avg_log2FC > 0, na.rm=TRUE), 
            prop_sig_neg=mean(avg_log2FC < 0, na.rm=TRUE), 
            diff_gene_wilcox=ifelse((prop_sig_pos >= 0.80 & log2FC_wilcox > 0) | (prop_sig_neg >= 0.80 & log2FC_wilcox < 0), TRUE, FALSE)
        
        ) 

    

    # Combine mat 
    mat <- dplyr::left_join(mat_1 %>% dplyr::select(gene, pt_bin, diff_smooth), mat_2 %>% dplyr::select(gene, pt_bin, log2FC_wilcox, diff_gene_wilcox), by=join_by(gene, pt_bin)) %>% na.omit()
    
    ptpg_class <- mat %>% dplyr::arrange(gene, pt_bin) %>% 
        dplyr::filter(abs(diff_smooth)>=0.25 & diff_gene_wilcox & sign(diff_smooth)==sign(log2FC_wilcox)) %>% 
        dplyr::group_by(gene) %>%
        dplyr::mutate(ptpg_class=ifelse(all(sign(log2FC_wilcox)==+1), x[2], ifelse(all(sign(log2FC_wilcox)==-1), x[1], "Perturbed"))) %>% 
        dplyr::select(gene, ptpg_class) %>% dplyr::distinct()
    
    # Set PTPG class
    ptpg_i <- dplyr::left_join(ptpg_i, ptpg_class, by=join_by(gene)) %>% 
        dplyr::mutate(ptpg_class=ifelse(is.na(ptpg_class) & ptpg_wald, "Perturbed", ptpg_class)) %>% 
        dplyr::mutate(ptpg_class=ifelse(!(gene %in% diff_gene_smooth), "Canonical", ptpg_class)) %>% 
        dplyr::mutate(ptpg_class=factor(ptpg_class, levels=c("Canonical", "Perturbed", x[1], x[2])))

    ptpg_i <- ptpg_i %>% dplyr::mutate(contrast=paste0(x, collapse=":"))

    # Print summary
    out <- table(ptpg_i$ptpg_class)  %>% as.data.frame() %>% t() %>% as.data.frame()
    rownames(out) <- NULL
    colnames(out) <- NULL
    print(out)
    
    return(ptpg_i)

}
      )

     gene  wald_stat df   p_val_wald p_val_adj_wald ptpg_wald
1  Mrpl15 20.9458986  5 8.293472e-04   0.0054640893      TRUE
2  Lypla1  6.0888277  5 2.976713e-01   0.5774114823     FALSE
3   Tcea1 28.0016031  5 3.637251e-05   0.0003418358      TRUE
4 Atp6v1h 13.1316348  5 2.217594e-02   0.0885758397     FALSE
5  Rb1cc1  0.6503152  5 9.855838e-01   1.0000000000     FALSE
6    Rrs1 10.1691064  4 3.767344e-02   0.1334147267     FALSE


# Save results 

In [31]:
names(ptpg) <- lapply(contrast_vec, function(x) paste0(x, collapse=":"))
tradeseq_res[["ptpg"]] <- purrr::imap_dfr(ptpg, ~ mutate(.x, contrast=.y))

In [32]:
saveRDS(tradeseq_res, tradeseq_res_file)

# SessionInfo 

In [33]:
sessionInfo()

R version 4.4.1 (2024-06-14)
Platform: x86_64-conda-linux-gnu
Running under: Red Hat Enterprise Linux 8.9 (Ootpa)

Matrix products: default
BLAS/LAPACK: /nobackup/peer/fdeckert/miniconda3/envs/r.4.4.1-FD20200109SPLENO/lib/libopenblasp-r0.3.29.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Vienna
tzcode source: system (glibc)

attached base packages:
[1] stats4    grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggforce_0.4.2               data.table_1.17.0          
 [3] org.Mm.eg.db_3.20.0         AnnotationDbi_1.68.0       
 [5] clusterProfiler_4.14.6      Reactom