In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [3]:
dset_idx <- 3

In [4]:
p_thresh <- 0.05
q_thresh <- 0.15

In [5]:
univ_surv_df <- read_tsv(paste0(dirs$analysis_dir, "/survival/", unified_dsets[dset_idx], "_univ_survival_results.tsv"))
en_cph_df <- read_tsv(paste0(dirs$analysis_dir, "/survival/", unified_dsets[dset_idx], "_en_cph_results.tsv"))
network_mm_gs_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv"))
network_me_sig_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  km_pval = [32mcol_double()[39m,
  km_qval = [32mcol_double()[39m,
  cph_pval = [32mcol_double()[39m,
  cph_qval = [32mcol_double()[39m,
  coeff = [32mcol_double()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  coeff = [32mcol_double()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  .default = col_double(),
  geneID = [31mcol_character()[39m,
  module = [31mcol_character()[39m
)
[36mi[39m Use [30m[47m[30m[

# Univariate Survival

In [6]:
sig_univ_df <- univ_surv_df %>%
    dplyr::filter(km_pval < p_thresh | cph_pval < p_thresh) %>%
    dplyr::filter(km_qval < q_thresh | cph_qval < q_thresh)
sig_univ_df

geneID,km_pval,km_qval,cph_pval,cph_qval,coeff
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
REG3A,0.04881854,0.3317609,0.007443063,0.1431721,-49.7966701
ANGPTL2,0.01642883,0.2977253,0.003721132,0.1431721,0.7207592
SERPIND1,0.13594952,0.4568378,0.01298963,0.1431721,-0.8798543
COL5A3,0.04284721,0.3317609,0.012346876,0.1431721,0.4652507


# Cox PH EN

In [7]:
en_cph_df

geneID,coeff
<chr>,<dbl>
TGM5,0.027480124
PCOLCE2,0.009700858
COL6A5,0.117991338
TNFSF11,-0.041883856
ANGPTL2,0.136867375
SERPIND1,-0.006672967
LGI2,0.020502491
FGF19,-0.022298708
CBLN3,0.007024268
COL5A3,0.136411073


# Joined data

In [8]:
univ_joined_df <- network_mm_gs_df %>%
    dplyr::filter(geneID %in% (sig_univ_df %>% pull(geneID))) %>%
    dplyr::select(geneID, module, mm_cor, mm_pval) %>%
    inner_join(network_me_sig_df, by = "module") %>%
    condense_figo(include_pvals = TRUE)

en_joined_df <- network_mm_gs_df %>%
    dplyr::filter(geneID %in% (en_cph_df %>% pull(geneID))) %>%
    dplyr::select(geneID, module, mm_cor, mm_pval) %>%
    inner_join(network_me_sig_df, by = "module") %>%
    condense_figo(include_pvals = TRUE)

In [9]:
univ_joined_df
en_joined_df

geneID,module,mm_cor,mm_pval,vital_hr,vital_dev_cor,vital_pval,vital_qval,figo_min_pval,figo_min_qval,figo_max_cor
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ANGPTL2,turquoise,0.60598832,2.696626e-15,15.210746,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
SERPIND1,brown,0.12339366,0.1478439,3.874555,0.03686686,0.5880984,0.7891011,0.26831026,0.1199615,0.07632176
COL5A3,turquoise,0.49531043,5.689026e-10,15.210746,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
REG3A,brown,0.01252468,0.8836561,3.874555,0.03686686,0.5880984,0.7891011,0.26831026,0.1199615,0.07632176


geneID,module,mm_cor,mm_pval,vital_hr,vital_dev_cor,vital_pval,vital_qval,figo_min_pval,figo_min_qval,figo_max_cor
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ANGPTL2,turquoise,0.60598832,2.696626e-15,15.210745749,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
SERPIND1,brown,0.12339366,0.1478439,3.874555059,0.03686686,0.5880984,0.7891011,0.268310259,0.1199615,0.07632176
CBLN3,black,-0.04606288,0.5902629,14.764553299,0.12032337,0.2357867,0.7891011,0.006693521,0.0257467,0.22680162
LGI2,black,0.4047934,7.709714e-07,14.764553299,0.12032337,0.2357867,0.7891011,0.006693521,0.0257467,0.22680162
COL5A3,turquoise,0.49531043,5.689026e-10,15.210745749,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
TGM5,red,0.01403149,0.869776,0.870808814,-0.01165428,0.9518628,0.992128,0.060059827,0.09883772,0.15990569
PCOLCE2,black,0.16359886,0.05430815,14.764553299,0.12032337,0.2357867,0.7891011,0.006693521,0.0257467,0.22680162
COL6A5,turquoise,0.24895786,0.003123223,15.210745749,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
TNFSF11,turquoise,0.23961095,0.004497676,15.210745749,0.10260576,0.2527572,0.7891011,0.03665683,0.06840227,0.12743211
CST6,red,0.10161756,0.2339212,0.870808814,-0.01165428,0.9518628,0.992128,0.060059827,0.09883772,0.15990569


In [10]:
# Number of modules
nrow(network_me_sig_df)

# Unique modules in univ models
nrow(univ_joined_df)
n_univ_mod <- length(unique(univ_joined_df$module))
n_univ_mod
# prop module repeats
1 - n_univ_mod / nrow(univ_joined_df)

# Unique modules in EN model
nrow(en_joined_df)
n_en_mod <- length(unique(en_joined_df$module))
n_en_mod
# prop module repeats
1 - n_en_mod / nrow(en_joined_df)