In [58]:
library(tidyverse)

# Custom package
library(rutils)

In [59]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
cancer_name <- c("cervical cancer", "none", "endometrial cancer")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [60]:
dset_idx <- 3
# hub_con_thresh <- 0.25
hub_con_thresh <- 0.0

In [61]:
matrisome_df <- load_matrisome_df(matrisome_path) %>%
    rename(geneID = gene_symbol) %>%
    select(geneID, division, category)


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)




In [62]:
deseq_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  baseMean = [32mcol_double()[39m,
  log2FoldChange = [32mcol_double()[39m,
  lfcSE = [32mcol_double()[39m,
  stat = [32mcol_double()[39m,
  pvalue = [32mcol_double()[39m,
  padj = [32mcol_double()[39m,
  qval = [32mcol_double()[39m
)




In [63]:
pathology_df <- read_tsv(paste0(dirs$data_dir, "/THPA_v20_1_staining/pathology.tsv")) %>%
    rename_all(tolower) %>%
    rename_with(~ gsub(" - ", "_", .x, fixed = TRUE)) %>%
    rename_with(~ gsub(" ", "_", .x, fixed = TRUE)) %>%
    rename(geneID = gene_name) %>%
    select(geneID, cancer, high, medium, low, not_detected) %>%
    filter(cancer %in% cancer_name[dset_idx]) %>%
    filter(rowSums(is.na(.[,3:6])) == 0)


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Gene = [31mcol_character()[39m,
  `Gene name` = [31mcol_character()[39m,
  Cancer = [31mcol_character()[39m,
  High = [32mcol_double()[39m,
  Medium = [32mcol_double()[39m,
  Low = [32mcol_double()[39m,
  `Not detected` = [32mcol_double()[39m,
  `prognostic - favorable` = [32mcol_double()[39m,
  `unprognostic - favorable` = [32mcol_double()[39m,
  `prognostic - unfavorable` = [32mcol_double()[39m,
  `unprognostic - unfavorable` = [32mcol_double()[39m
)




In [64]:
umsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_demg_list.txt"))
umsmg_nsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_nsmg_hub_thresh_", hub_con_thresh, "_demg_list.txt"))

In [65]:
critical_mg_stats_df <- deseq_df %>%
    filter(geneID %in% umsmg_nsmg_demg_list) %>%
    rename(base_mean = baseMean, lfc = log2FoldChange) %>%
    mutate(change_dir = ifelse(lfc > 0, "up", "down")) %>%
    select(geneID, change_dir, base_mean, lfc, qval) %>%
    arrange(geneID)

In [66]:
critical_mg_stats_df %>% 
    inner_join(pathology_df, by = "geneID") %>%
    arrange(change_dir)

geneID,change_dir,base_mean,lfc,qval,cancer,high,medium,low,not_detected
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
ANGPT4,down,66.71048,-4.039425,4.8301180000000004e-46,endometrial cancer,1,7,2,1
LGI2,down,682.82144,-2.418608,5.702105e-41,endometrial cancer,0,0,1,10
LOXL3,down,466.65753,-1.128548,2.120413e-31,endometrial cancer,0,0,1,10
PAPPA2,down,57.24273,-1.267549,2.683043e-07,endometrial cancer,0,0,0,10
RSPO3,down,1829.66009,-2.875078,3.438289e-33,endometrial cancer,0,7,2,3
WNT10A,up,282.8423,4.337583,1.423487e-49,endometrial cancer,0,3,3,5
