In [1]:
library(tidyverse)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
p_thresh = 0.05
mi_thresh = 0
lfc_thresh = log2(2)

In [4]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


In [5]:
i <- 1

In [14]:
coxph_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_coxph_results.tsv")) %>%
    dplyr::rename(coxph_pval = gene_pval, coxph_coeff = gene_coeff)
deg_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_DESeq_results.tsv")) %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol) %>%
    dplyr::select(geneID, log2FoldChange, padj) %>%
    dplyr::rename(deg_l2fc = log2FoldChange, deg_padj = padj)

Parsed with column specification:
cols(
  geneID = col_character(),
  gene_pval = col_double(),
  gene_coeff = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double()
)


In [19]:
combined_df <- coxph_df %>%
    inner_join(deg_df, by = "geneID")
nrow(combined_df)
head(combined_df)

geneID,coxph_pval,coxph_coeff,deg_l2fc,deg_padj
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
PGF,0.8071582,-0.03116842,-0.00558361,0.9919896
TIMP4,0.2748115,0.16072631,-4.45396269,5.769052e-12
C1QTNF6,0.0280016,0.29687324,0.46855664,0.2090489
TNC,0.106904,0.14709662,0.43931731,0.4140684
PRL,0.7570963,0.12384552,-3.27230838,0.001246596
OGN,0.844616,0.02494448,-6.39269858,3.630674e-16


In [20]:
cor.test(combined_df$coxph_pval, combined_df$deg_padj)


	Pearson's product-moment correlation

data:  combined_df$coxph_pval and combined_df$deg_padj
t = -1.7212, df = 915, p-value = 0.08556
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.121102417  0.007959822
sample estimates:
        cor 
-0.05680863 
