In [1]:
library(tidyverse)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
p_thresh = 0.05
mi_thresh = 0
lfc_thresh = log2(2)

In [4]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


In [5]:
i <- 1

In [6]:
cox_ph_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_coxph_results.tsv")) %>%
    dplyr::mutate(cox_ph_sig = gene_pval < p_thresh)
mi_figo_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_MI_figo_results.tsv")) %>%
    dplyr::mutate(mi_figo_sig = MI_est_median > mi_thresh)
mi_survival_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_MI_survival_results.tsv")) %>%
    dplyr::mutate(mi_survival_sig = MI_est_median > mi_thresh)
deg_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_DESeq_results.tsv")) %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol) %>%
    dplyr::select(geneID, log2FoldChange, padj)

# DEG list is missing some genes (which were filtered due to low expression)
deg_missing_genes_df <- mi_figo_df %>%
    dplyr::filter(!geneID %in% deg_df$geneID) %>%
    dplyr::select(geneID) %>%
    dplyr::mutate(log2FoldChange = NA, padj = NA) %>%
    dplyr::mutate_at(vars(log2FoldChange, padj), as.numeric)
deg_df <- dplyr::bind_rows(deg_df, deg_missing_genes_df) %>%
    dplyr::mutate(deg_sig = (log2FoldChange > lfc_thresh) & (padj < p_thresh))

Parsed with column specification:
cols(
  geneID = col_character(),
  gene_pval = col_double(),
  gene_coeff = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double()
)


In [14]:
meta_df <- deg_df %>%
    inner_join(cox_ph_df, by = "geneID") %>%
    inner_join(mi_figo_df, by = "geneID") %>%
    inner_join(mi_survival_df, by = "geneID") %>%
    dplyr::select(geneID, deg_sig, cox_ph_sig, mi_figo_sig, mi_survival_sig) %>%
    replace(is.na(.), FALSE) %>%
    dplyr::mutate(membership_count = rowSums(. [, 2:ncol(.)])) %>%
    dplyr::mutate(majority_vote = membership_count >= floor((ncol(.) - 2) / 2) + 1) %>%
    inner_join(matrisome_df, by = c("geneID" = "gene_symbol"))

In [17]:
write_tsv(meta_df, paste0(dirs$analysis_dir, "/", unified_dsets[i], "_matrisome_importance_meta_analysis.tsv"))