In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------ tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m --------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
projects <- c("cesc", "ucs", "ucec")

In [3]:
dset_idx <- 3
save_res <- TRUE

In [4]:
padj_pairwise_figo_thresh <- 0.01
lfc_thresh <- log2(2)

In [5]:
mat_df <- rutils::load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")) %>%
    select(gene_symbol, gene_name, division, category, hgnc_ids)
figo_demg_df <- read_lines(paste0(dirs$analysis_dir, "/gene_lists_extra/", projects[dset_idx], "_figo_demg.txt")) %>%
    as_tibble() %>%
    rename(geneID = value)
deg_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv")) %>%
    select(geneID, baseMean, log2FoldChange, qval) %>%
    rename(de_base_mean = baseMean, de_l2fc = log2FoldChange, de_qval = qval)
# waov_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_welch_anova_results.tsv")) %>%
#     select(geneID, contains("qval")) %>%
#     rename_with(~ str_replace(., "qval", "anova_qval"))
figo_pairwise_demg_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_figo_pairwise_demg_results.tsv"))
pbc_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_pbc_results.tsv")) %>%
    select(geneID, contains("qval"), contains("cor")) %>%
    rename_with(~ str_replace(., "qval", "cor_qval")) %>%
    rowwise() %>%
    mutate(min_cor_qval = min(figo_stage_1_cor_qval, figo_stage_2_cor_qval, figo_stage_3_cor_qval, figo_stage_4_cor_qval)) %>%
    ungroup()
lasso_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_en_multinom_results.tsv")) %>%
    # All coefs should be the same
    mutate(lasso_coef = figo_stage_1_coefs) %>%
    select(geneID, lasso_coef)
wgcna_mm_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv")) %>%
    select(geneID, module, mm_pval)
wgcna_eg_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv")) %>%
    rowwise() %>%
    mutate(eg_figo_qval_min = min(figo_stage_1_qval, figo_stage_2_qval, figo_stage_3_qval, figo_stage_4_qval)) %>%
    ungroup() %>%
    select(-starts_with("vital"), -contains("pval"), -contains("cor")) %>%
    rename_with(~ str_replace(., "qval", "wgcna_qval"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m-----------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m-----------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  baseMean = [32mcol_double()[39m,
  log2FoldChange = [32mcol_double()[39m,
  lfcSE = [32mcol_double()[39m,
  stat = [32mcol_double()[39m,
  pvalue = [32mcol_double()[39m,
  padj = [32mcol_do

# Organize FIGO pairwise DEMG results

In [6]:
figo_pairwise_demg_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_figo_pairwise_demg_results.tsv"))
sig_figo_pairwise_demg_lfcs_df <- figo_pairwise_demg_df %>%
    spread(contrast, l2fc) %>%
    rowwise() %>%
    mutate(l2fc = first(na.omit(c(`2_vs_1`, `3_vs_1`, `4_vs_1`, `3_vs_2`, `4_vs_2`, `4_vs_3`)))) %>%
    ungroup() %>%
    filter(padj < padj_pairwise_figo_thresh, abs(l2fc) > lfc_thresh) %>%
    group_by(geneID) %>%
    summarize(
        figo_2v1 = first(na.omit(`2_vs_1`)),
        figo_3v1 = first(na.omit(`3_vs_1`)),
        figo_4v1 = first(na.omit(`4_vs_1`)),
        figo_3v2 = first(na.omit(`3_vs_2`)),
        figo_4v2 = first(na.omit(`4_vs_2`)),
        figo_4v3 = first(na.omit(`4_vs_3`))
    ) %>%
    rename_with(~ str_replace(., "figo", "sig_figo_lfc"))

sig_figo_pairwise_demg_contrasts_df <- figo_pairwise_demg_df %>%
    filter(padj < padj_pairwise_figo_thresh, abs(l2fc) > lfc_thresh) %>%
    group_by(geneID) %>%
    summarize(sig_contrasts = paste0(contrast, collapse = ";")) %>%
    ungroup()

sig_figo_pairwise_demg_df <- sig_figo_pairwise_demg_contrasts_df %>%
    inner_join(sig_figo_pairwise_demg_lfcs_df, by = "geneID")
nrow(sig_figo_pairwise_demg_df)


[36m--[39m [1m[1mColumn specification[1m[22m [36m-----------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  base_mean = [32mcol_double()[39m,
  l2fc = [32mcol_double()[39m,
  pval = [32mcol_double()[39m,
  padj = [32mcol_double()[39m,
  contrast = [31mcol_character()[39m
)




In [7]:
meta_df <- figo_demg_df %>%
    inner_join(mat_df, by = c("geneID" = "gene_symbol")) %>%
    inner_join(deg_df, by = "geneID") %>%
#     left_join(waov_df, by = "geneID") %>%
    left_join(sig_figo_pairwise_demg_df, by = "geneID") %>%
    left_join(pbc_df, by = "geneID") %>%
    left_join(lasso_df, by = "geneID") %>%
    inner_join(wgcna_mm_df, by = "geneID") %>%
    inner_join(wgcna_eg_df, by = "module") %>%
    replace_na(list(lasso_coef = 0))

In [8]:
if (save_res) {
    meta_df %>% write_tsv(paste0(dirs$analysis_dir, "/meta/", unified_dsets[dset_idx], "_full_figo_umsmg_summary.tsv"))
}