In [17]:
library(tidyverse)
library(WGCNA)
library(cowplot)
library(matrixStats)

# Custom package
library(rutils)

In [18]:
options(stringsAsFactors = FALSE)
enableWGCNAThreads()

Allowing parallel execution with up to 15 working processes.


In [19]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

event_code <- list("Alive" = 0, "Dead" = 1)

In [20]:
dset_idx <- 1

# Load data

In [21]:
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_data.RData"))
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_network.RData"))

In [22]:
n_genes <- ncol(data_expr)
n_samples <- nrow(data_expr)

In [23]:
mes_0 <- moduleEigengenes(data_expr, module_colors)$eigengenes
mes <- orderMEs(mes_0)

# Module significance for survival analysis

In [24]:
module_vital <- standardScreeningCensoredTime(
    datExpr = mes,
    time = data_traits$survival_time, 
    event = data_traits$vital_status, 
    fastCalculation = FALSE,
    qValues = TRUE
)

module_vital_df <- module_vital %>%
    as_tibble() %>%
    dplyr::rename_all(tolower) %>%
    dplyr::rename(module = id, pval = pvaluelogrank, qval = qvaluelogrank, hr = hazardratio, dev_cor = cordeviance) %>%
    dplyr::select(module, hr, dev_cor, pval, qval) %>%
    dplyr::rename_if((colnames(.) != "module"), ~ gsub("^", "vital_", .))
# module_vital_df

In [25]:
gene_vital <- standardScreeningCensoredTime(
    datExpr = data_expr,
    time = data_traits$survival_time,
    event = data_traits$vital_status,
    fastCalculation = FALSE,
    qValues = TRUE
)
gene_vital_df <- gene_vital %>%
    as_tibble() %>%
    dplyr::rename_all(tolower) %>%
    dplyr::rename(geneID = id, pval = pvaluelogrank, qval = qvaluelogrank, hr = hazardratio, dev_cor = cordeviance) %>%
    dplyr::select(geneID, hr, dev_cor, pval, qval) %>%
    dplyr::rename_if((colnames(.) != "geneID"), ~ gsub("^", "vital_", .))
# nrow(gene_coxph_res[gene_coxph_res$qvalueLogrank < 0.05, ])
# head(gene_coxph_res[gene_coxph_res$qvalueLogrank < 0.05, ])

“Loglik converged before variable  1 ; coefficient may be infinite. ”

# Module significance for FIGO stage

In [26]:
module_figo_cor <- WGCNA::cor(mes, data_traits[-c(1:2)])
module_figo_cor_df <- module_figo_cor %>%
    as_tibble(rownames = "module") %>%
    dplyr::rename_if((colnames(.) != "module"), ~ gsub("$", "_cor", .))

In [27]:
module_figo_pval <- corPvalueStudent(module_figo_cor, n_samples)
module_figo_pval_df <- module_figo_pval %>%
    as_tibble(rownames = "module") %>%
    dplyr::rename_if((colnames(.) != "module"), ~ gsub("$", "_pval", .))

In [28]:
module_figo_qval_df <- apply(module_figo_pval, MARGIN = 2, function(x) { WGCNA::qvalue(x)$qvalues }) %>%
    as_tibble(rownames = "module") %>%
    dplyr::rename_if((colnames(.) != "module"), ~ gsub("$", "_qval", .))

# Find genes with high module membership (MM) and gene significance (GS)

In [29]:
gene_module_map_df <- tibble(geneID = colnames(data_expr), module = module_colors)

In [30]:
gene_mm <- WGCNA::cor(data_expr, mes, use = "p")
gene_mm_pval <- corPvalueStudent(gene_mm, nSamples = n_samples)
gene_mm_qval <- apply(gene_mm_pval, MARGIN = 2, function(x) { WGCNA::qvalue(x)$qvalues })

In [31]:
gene_figo_cor <- WGCNA::cor(data_expr, data_traits[, -c(1:2)], use = "p")
gene_figo_pval <- corPvalueStudent(gene_figo_cor, nSamples = n_samples)
gene_figo_qval <- apply(gene_figo_pval, MARGIN = 2, function(x) { WGCNA::qvalue(x)$qvalues })

In [32]:
gene_mm_df <- gene_mm %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(startsWith(colnames(.), "ME"), ~ gsub("ME", "", .)) %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_cor", .))
gene_mm_pval_df <- gene_mm_pval %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(startsWith(colnames(.), "ME"), ~ gsub("ME", "", .)) %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_pval", .))
gene_mm_qval_df <- gene_mm_qval %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(startsWith(colnames(.), "ME"), ~ gsub("ME", "", .)) %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_qval", .))

In [33]:
gene_figo_df <- gene_figo_cor %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_cor", .))
gene_figo_pval_df <- gene_figo_pval %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_pval", .))
gene_figo_qval_df <- gene_figo_qval %>%
    as_tibble(rownames = "geneID") %>%
    dplyr::rename_if(!startsWith(colnames(.), "geneID"), ~ gsub("$", "_qval", .))

# Save ME/trait relationships

In [34]:
mes_trait_df <- module_figo_cor_df %>%
    inner_join(module_figo_pval_df, by = "module") %>%
    inner_join(module_figo_qval_df, by = "module") %>%
    inner_join(module_vital_df, by = "module") %>%
    dplyr::mutate(module = gsub("ME", "", module))

In [35]:
write_tsv(mes_trait_df, paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv"))

# Save gene MM & GS correlations/p-values

In [36]:
joined_mm_gs_df <- gene_module_map_df %>%
    inner_join(gene_mm_df, by = "geneID") %>%
    inner_join(gene_mm_pval_df, by = "geneID") %>%
    inner_join(gene_mm_qval_df, by = "geneID") %>%
    # Specify cor, pval, and qval for module to which this gene is asigned
    dplyr::mutate(mm_cor = !!as.name(paste0(.$module, "_cor"))) %>%
    dplyr::mutate(mm_pval = !!as.name(paste0(.$module, "_pval"))) %>%
    dplyr::mutate(mm_qval = !!as.name(paste0(.$module, "_qval"))) %>%
    dplyr::select(geneID, module, mm_cor, mm_pval, mm_qval, everything()) %>%
    inner_join(gene_figo_df, by = "geneID") %>%
    inner_join(gene_figo_pval_df, by = "geneID") %>%
    inner_join(gene_figo_qval_df, by = "geneID") %>%
    inner_join(gene_vital_df, by = "geneID")

In [37]:
write_tsv(joined_mm_gs_df, paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv"))

# Save module Eigengenes

In [38]:
mes_df <- mes %>%
    as_tibble(rownames = "sample_name") %>%
    dplyr::rename_if(startsWith(colnames(.), "ME"), ~ gsub("ME", "", .))

In [39]:
write_tsv(mes_df, paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengenes.tsv"))

# Visualize module results for each trait

In [40]:
pivot_mm_gs_df <- joined_mm_gs_df %>%
    dplyr::select(-one_of(colnames(gene_figo_pval_df)[-1], "vital_pval", "vital_qval", "vital_hr")) %>%
    pivot_longer(cols = colnames(gene_mm_df)[-1], names_to = "module_cor", values_to = "module_cor_val") %>%
    pivot_longer(cols = c(colnames(gene_figo_df)[-1], "vital_dev_cor"), names_to = "trait_cor", values_to = "trait_cor_val") %>%
    pivot_longer(cols = colnames(gene_mm_pval_df)[-1], names_to = "module_pval", values_to = "module_pval_val") %>%
    dplyr::filter(startsWith(module_cor, module), startsWith(module_pval, module))

In [41]:
axis_scale <- function(x, d) {
    sprintf(paste0("%.", d, "f"), x)
}

facet_plot <- function(pivot_df, trait_str, module_pval_thresh = 0.05, module_cor_pval_thresh = 0.5) {
    plt <- ggplot(pivot_df %>% dplyr::filter(trait_cor == paste0(trait_str, "_cor"))) +
        geom_point(aes(x = abs(module_cor_val), y = abs(trait_cor_val), color = (module_pval_val < module_pval_thresh) & (abs(module_cor_val) > module_cor_pval_thresh))) +
        geom_smooth(aes(x = abs(module_cor_val), y = abs(trait_cor_val)), method = "lm", color = "black") +
        facet_wrap(~ module, ncol = 3) +
        theme_classic() +
        labs(color = "Sig. in module", x = "gene abs(ME cor.)", y = paste0("gene abs(", gsub("_", " ", trait_str), " cor.)")) +
        scale_x_continuous(labels = function(x) { axis_scale(x, 2) }) +
        scale_y_continuous(labels = function(x) { axis_scale(x, 2) })
    return(plt)
}

In [42]:
trait_strs <- c("vital_dev", "figo_stage_1", "figo_stage_2", "figo_stage_3", "figo_stage_4")
for (ts in trait_strs) {
    facet_plot(pivot_mm_gs_df, ts)
    ggsave(filename = paste0(dirs$figures_dir, "/network/", unified_dsets[dset_idx], "_module_", ts, "_facet.png"), height = 10, width = 10)
}


`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
