In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m -------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ----------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dset_idx <- 1
# hub_con_thresh <- 0.25
hub_con_thresh <- 0.0

In [3]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
cancer_name <- c("cervical cancer", "none", "endometrial cancer")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [61]:
matrisome_df <- rutils::load_matrisome_df(matrisome_path) %>%
    dplyr::select(gene_symbol, division, category) %>%
    dplyr::rename(geneID = gene_symbol)
deseq_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv")) %>%
    rename(base_mean = baseMean, lfc = log2FoldChange) %>%
    mutate(change_dir = ifelse(lfc > 0, "up", "down")) %>%
    select(geneID, change_dir, base_mean, lfc, qval)

demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))

figo_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_umsmg_list.txt"))
figo_nsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_nsmg_list.txt"))
figo_list <- figo_umsmg_list %>%
    union(figo_nsmg_list) %>%
    intersect(demg_list)

survival_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_umsmg_list.txt"))
survival_list <- survival_umsmg_list %>%
    intersect(demg_list)


[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  baseMean = [32mcol_double()[39m,
  log2FoldChange = [32mcol_double()[39m,
  lfcSE = [32mcol_double()[39m,
  stat = [32mcol_double()[39m,
  pvalue = [32mcol_double()[39m,
  padj = [32mco

In [62]:
pathology_df <- read_tsv(paste0(dirs$data_dir, "/THPA_v20_1_staining/pathology.tsv")) %>%
    rename_all(tolower) %>%
    rename_with(~ gsub(" - ", "_", .x, fixed = TRUE)) %>%
    rename_with(~ gsub(" ", "_", .x, fixed = TRUE)) %>%
    rename(geneID = gene_name) %>%
    select(geneID, cancer, high, medium, low, not_detected) %>%
    filter(cancer %in% cancer_name[dset_idx]) %>%
    filter(rowSums(is.na(.[,3:6])) == 0) %>%
    dplyr::mutate(
        detected_count = rowSums(.[c("high", "medium", "low")]),
        detected_prop = detected_count / (not_detected + detected_count),
        detected = detected_count > 0
    )


[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------[39m
cols(
  Gene = [31mcol_character()[39m,
  `Gene name` = [31mcol_character()[39m,
  Cancer = [31mcol_character()[39m,
  High = [32mcol_double()[39m,
  Medium = [32mcol_double()[39m,
  Low = [32mcol_double()[39m,
  `Not detected` = [32mcol_double()[39m,
  `prognostic - favorable` = [32mcol_double()[39m,
  `unprognostic - favorable` = [32mcol_double()[39m,
  `prognostic - unfavorable` = [32mcol_double()[39m,
  `unprognostic - unfavorable` = [32mcol_double()[39m
)




In [6]:
make_joined_subset_df <- function(de_df, path_df, gene_list = NULL, order_genes = TRUE, order_change_dir = TRUE, group_col = NULL, direction_filt = NULL) {
    de_df %>%
        {if (length(gene_list) > 0) dplyr::filter(., geneID %in% gene_list) else .} %>%
        dplyr::inner_join(path_df, by = "geneID") %>%
        {if (order_genes) dplyr::arrange(., geneID) else .} %>%
        {if (order_change_dir) dplyr::arrange(., geneID) else .} %>%
        {if (!is.null(group_col)) dplyr::mutate(., group = group_col) else .} %>%
        {if (!is.null(direction_filt)) dplyr::filter(., change_dir == direction_filt) else .}
}