In [234]:
library(tidyverse)
library(clusterProfiler)
library(WGCNA)
library(parallel)

# Custom package
library(rutils)

In [235]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

event_code <- list("Alive" = 0, "Dead" = 1)

In [236]:
dset_idx <- 3

In [237]:
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_data.RData"))
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_network.RData"))

In [238]:
lfc_thresh <- log2(2)
pval_thresh <- 0.05
perm_thresh <- 0
vote_thresh <- 5

In [239]:
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))

Parsed with column specification:
cols(
  hugo_symbol = col_character(),
  entrez_gene_id = col_double()
)
Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


# Load results data

In [240]:
dge_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_DESeq_results.tsv"))
lr_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_l1_lr_results.tsv"))
anova_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_welch_anova_results.tsv"))
network_mm_gs_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv"))
network_me_sig_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv"))
condensed_me_df <- network_me_sig_df %>%
    condense_figo(include_pvals = TRUE) %>%
    dplyr::rename_if(!startsWith(colnames(.), "module"), ~ gsub("^", "me_", .))
coxph_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_coxph_results.tsv"))
cor_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_cor_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double(),
  qval = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double()
)
Parsed with column specification:
cols(
  geneID = col_character(),
  pval = col_double(),
  padj = col_double(),
  qval = col_double()
)
Parsed with column specification:
cols(
  .default = col_double(),
  geneID = col_character(),
  module = col_character()
)
See spec(...) for full column specifications.
Parsed with column sp

# WGCNA: Identify "hub" genes

In [241]:
hub_df <- get_most_conn_genes(data_expr, module_colors, soft_power, conn_vs_hub_thresh = 0.5) %>%
    bind_rows(.id = "module") %>%
    dplyr::select(geneID, everything())

# DGE

In [242]:
filtered_dge_df <- dge_df %>%
    dplyr::filter(qval < 0.05 & abs(log2FoldChange) > lfc_thresh) %>%
    dplyr::rename(lfc = log2FoldChange) %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, lfc, padj, qval)

In [243]:
filtered_dge_m_df <- filtered_dge_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)

In [244]:
dge_geneIDs <- filtered_dge_df$geneID
dge_m_geneIDs <- filtered_dge_m_df$geneID

In [245]:
write_lines(dge_geneIDs, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_dge_gene_list.txt"))

# FIGO

In [246]:
filtered_lr_df <- lr_df %>%
    dplyr::select(one_of("geneID", colnames(.)[startsWith(colnames(.), "mean")])) %>%
    dplyr::mutate(votes = rowSums(.[, -1] > perm_thresh)) %>%
    dplyr::filter(votes >= vote_thresh) %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, everything())

In [247]:
filtered_anova_df <- anova_df %>%
    dplyr::filter(qval < pval_thresh) %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, everything())

In [248]:
filtered_figo_network_df <- network_mm_gs_df %>%
    dplyr::select(geneID, module, mm_pval, mm_cor) %>%
    inner_join(condensed_me_df, by = "module") %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, everything()) %>%
    dplyr::filter(me_figo_min_qval < pval_thresh) %>%
    # Make sure genes are significant members of the module
    dplyr::filter(mm_pval < pval_thresh) %>%
    # Make sure genes are highly connected within the module
    dplyr::filter(geneID %in% hub_df$geneID)

In [249]:
figo_entrezIDs <- filtered_lr_df$entrez_gene_id %>% 
    union(filtered_anova_df$entrez_gene_id) %>% 
    union(filtered_figo_network_df$entrez_gene_id)
figo_geneIDs <- filtered_lr_df$geneID %>% 
    union(filtered_anova_df$geneID) %>% 
    union(filtered_figo_network_df$geneID)

In [250]:
length(figo_entrezIDs) == length(figo_geneIDs)

In [251]:
write_lines(figo_geneIDs, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_gene_list.txt"))

# Survival

In [252]:
filtered_coxph_df <- coxph_df %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::filter(gene_qval < pval_thresh)

In [253]:
filtered_cor_df <- cor_df %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::filter(qval < pval_thresh)

In [254]:
filtered_coxph_network_df <- network_mm_gs_df %>%
    dplyr::select(geneID, module, mm_pval, mm_cor) %>%
    inner_join(condensed_me_df, by = "module") %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, everything()) %>%
    dplyr::filter(me_vital_qval < pval_thresh) %>%
    # Make sure genes are significant members of the module
    dplyr::filter(mm_pval < pval_thresh) %>%
    # Make sure genes are highly connected within the module
    dplyr::filter(geneID %in% hub_df$geneID)

In [255]:
survival_entrezIDs <- filtered_coxph_df$entrez_gene_id %>%
    union(filtered_cor_df$entrez_gene_id) %>%
    union(filtered_coxph_network_df$entrez_gene_id)

survival_geneIDs <- filtered_coxph_df$geneID %>%
    union(filtered_cor_df$geneID) %>%
    union(filtered_coxph_network_df$geneID)

In [256]:
length(survival_geneIDs) == length(survival_geneIDs)

In [257]:
write_lines(survival_geneIDs, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_gene_list.txt"))

In [258]:
length(dge_geneIDs)
length(dge_m_geneIDs)
length(survival_geneIDs)
length(figo_geneIDs)