In [1]:
library(tidyverse)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
p_thresh = 0.05
mi_thresh = 0
lfc_thresh = log2(2)

In [4]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


In [5]:
i <- 1

In [6]:
coxph_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_coxph_results.tsv")) %>%
    dplyr::rename(coxph_pval = gene_pval, coxph_coeff = gene_coeff)
deg_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_DESeq_results.tsv")) %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol) %>%
    dplyr::select(geneID, log2FoldChange, padj) %>%
    dplyr::rename(deg_l2fc = log2FoldChange, deg_padj = padj)
mi_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_MI_survival_results.tsv"))
cor_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_cor_results.tsv")) %>%
    dplyr::rename(cor_test_cor = cor, cor_test_pval = pval, cor_test_n = n)

Parsed with column specification:
cols(
  geneID = col_character(),
  gene_pval = col_double(),
  gene_coeff = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  cor = col_double(),
  pval = col_double(),
  n = col_double()
)


# DE and Cox

In [7]:
deg_and_coxph <- coxph_df %>%
    inner_join(deg_df, by = "geneID")
nrow(deg_and_coxph)
head(deg_and_coxph)

geneID,coxph_pval,coxph_coeff,deg_l2fc,deg_padj
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
PGF,0.5442516,0.062918777,-0.00558361,0.9919896
TIMP4,0.21721965,0.161809921,-4.45396269,5.769052e-12
C1QTNF6,0.01685799,0.273679674,0.46855664,0.2090489
TNC,0.01755491,0.181345111,0.43931731,0.4140684
PRL,0.20689399,0.367487825,-3.27230838,0.001246596
OGN,0.93805902,-0.008228786,-6.39269858,3.630674e-16


## All genes (that are present in both - DESeq2 filters out lowly expressed genes)

In [26]:
cor.test(deg_and_coxph$coxph_pval, deg_and_coxph$deg_padj)


	Pearson's product-moment correlation

data:  deg_and_coxph$coxph_pval and deg_and_coxph$deg_padj
t = -1.1776, df = 915, p-value = 0.2393
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.10338037  0.02590285
sample estimates:
        cor 
-0.03890156 


## What is the size of overlap?

In [40]:
nrow(deg_and_coxph %>%
    dplyr::filter(coxph_pval < p_thresh, deg_padj < p_thresh, deg_l2fc > lfc_thresh))

# MI and Cox

In [31]:
mi_and_coxph_df <- coxph_df %>%
    inner_join(mi_df, by = "geneID")
nrow(mi_and_coxph_df)
head(mi_and_coxph_df)

geneID,coxph_pval,coxph_coeff,MI_est_median
<chr>,<dbl>,<dbl>,<dbl>
PGF,0.5442516,0.062918777,0.0
TIMP4,0.21721965,0.161809921,0.0
C1QTNF6,0.01685799,0.273679674,0.1079641
TNC,0.01755491,0.181345111,0.0
PRL,0.20689399,0.367487825,0.0
OGN,0.93805902,-0.008228786,0.0


## All genes

In [32]:
cor.test(mi_and_coxph_df$coxph_pval, mi_and_coxph_df$MI_est_median)


	Pearson's product-moment correlation

data:  mi_and_coxph_df$coxph_pval and mi_and_coxph_df$MI_est_median
t = -1.9059, df = 1006, p-value = 0.05695
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.12127807  0.00177229
sample estimates:
        cor 
-0.05998076 


# MI and correlation

In [35]:
mi_and_cor_df <- mi_df %>%
    inner_join(cor_df, by = "geneID")
nrow(mi_and_cor_df)
head(mi_and_cor_df)

geneID,MI_est_median,cor_test_cor,cor_test_pval,cor_test_n
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
PGF,0.0,-0.03267992,0.794485949,66
TIMP4,0.0,-0.11527481,0.356694904,66
C1QTNF6,0.1079641,-0.32180744,0.008415805,66
TNC,0.0,0.03475637,0.78174112,66
PRL,0.0,-0.10272623,0.411771043,66
OGN,0.0,0.16283526,0.191436111,66


## All genes

In [16]:
cor.test(mi_and_cor_df$MI_est_median, mi_and_cor_df$cor_test_pval)


	Pearson's product-moment correlation

data:  mi_and_cor_df$MI_est_median and mi_and_cor_df$cor_test_pval
t = -4.7528, df = 997, p-value = 2.301e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.20894064 -0.08763035
sample estimates:
       cor 
-0.1488455 
