In [1]:
library(tidyverse)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
p_thresh = 0.05
mi_thresh = 0
lfc_thresh = log2(2)

In [4]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


In [5]:
i <- 1

In [6]:
coxph_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_coxph_results.tsv")) %>%
    dplyr::rename(coxph_pval = gene_pval, coxph_coeff = gene_coeff)
deg_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_DESeq_results.tsv")) %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol) %>%
    dplyr::select(geneID, log2FoldChange, padj) %>%
    dplyr::rename(deg_l2fc = log2FoldChange, deg_padj = padj)
mi_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[i], "_MI_survival_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  gene_pval = col_double(),
  gene_coeff = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)


# DE and Cox

In [7]:
deg_and_coxph <- coxph_df %>%
    inner_join(deg_df, by = "geneID")
nrow(deg_and_coxph)
head(deg_and_coxph)

geneID,coxph_pval,coxph_coeff,deg_l2fc,deg_padj
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
PGF,0.8071582,-0.03116842,-0.00558361,0.9919896
TIMP4,0.2748115,0.16072631,-4.45396269,5.769052e-12
C1QTNF6,0.0280016,0.29687324,0.46855664,0.2090489
TNC,0.106904,0.14709662,0.43931731,0.4140684
PRL,0.7570963,0.12384552,-3.27230838,0.001246596
OGN,0.844616,0.02494448,-6.39269858,3.630674e-16


## All genes (that are present in both - DESeq2 filters out lowly expressed genes)

In [8]:
cor.test(deg_and_coxph$coxph_pval, deg_and_coxph$deg_padj)


	Pearson's product-moment correlation

data:  deg_and_coxph$coxph_pval and deg_and_coxph$deg_padj
t = -1.7212, df = 915, p-value = 0.08556
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.121102417  0.007959822
sample estimates:
        cor 
-0.05680863 


## Genes that are significant in DE & Cox PH

In [9]:
sig_deg_and_coxph_df <- deg_and_coxph %>%
    dplyr::filter(coxph_pval < p_thresh, deg_padj < p_thresh, deg_l2fc > lfc_thresh)

In [10]:
cor.test(sig_deg_and_coxph_df$coxph_pval, sig_deg_and_coxph_df$deg_padj)


	Pearson's product-moment correlation

data:  sig_deg_and_coxph_df$coxph_pval and sig_deg_and_coxph_df$deg_padj
t = 0.6029, df = 38, p-value = 0.5502
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.2208678  0.3968158
sample estimates:
      cor 
0.0973387 


# MI and Cox

In [11]:
mi_and_coxph_df <- coxph_df %>%
    inner_join(mi_df, by = "geneID")
nrow(mi_and_coxph_df)
head(mi_and_coxph_df)

geneID,coxph_pval,coxph_coeff,MI_est_median
<chr>,<dbl>,<dbl>,<dbl>
PGF,0.8071582,-0.03116842,0.0
TIMP4,0.2748115,0.16072631,0.0
C1QTNF6,0.0280016,0.29687324,0.1079641
TNC,0.106904,0.14709662,0.0
PRL,0.7570963,0.12384552,0.0
OGN,0.844616,0.02494448,0.0


## All genes

In [12]:
cor.test(mi_and_coxph_df$coxph_pval, mi_and_coxph_df$MI_est_median)


	Pearson's product-moment correlation

data:  mi_and_coxph_df$coxph_pval and mi_and_coxph_df$MI_est_median
t = -0.55394, df = 1006, p-value = 0.5797
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.07912319  0.04433226
sample estimates:
        cor 
-0.01746203 


## Genes that have MI & are sig in Cox PH

In [13]:
sig_mi_and_coxph <- mi_and_coxph_df %>%
    dplyr::filter(coxph_pval < p_thresh, MI_est_median > mi_thresh)

In [14]:
cor.test(sig_mi_and_coxph$coxph_pval,sig_mi_and_coxph$MI_est_median)


	Pearson's product-moment correlation

data:  sig_mi_and_coxph$coxph_pval and sig_mi_and_coxph$MI_est_median
t = -1.6311, df = 67, p-value = 0.1076
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.4130018  0.0432578
sample estimates:
       cor 
-0.1954237 
