In [1]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)


# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



clusterProfiler v3.18.0  For help: https://guangchuangyu.github.io/software/clusterProfiler

If you use clusterProfiler in published research, please cite:
Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes amo

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
proj <- c("CESC", "UCS", "UCEC")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
dset_idx <- 3
hub_con_thresh <- 0.5
# hub_con_thresh <- 0.25
# hub_con_thresh <- 0.0
pval_thresh <- 0.05

In [4]:
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_data.RData"))
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_network.RData"))
network_mm_gs_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv"))
network_me_sig_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv"))
demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_dge_m_gene_list.txt"))
condensed_me_df <- network_me_sig_df %>%
    condense_figo(include_pvals = TRUE) %>%
    dplyr::rename_if(!startsWith(colnames(.), "module"), ~ gsub("^", "me_", .))
matrisome_df <- load_matrisome_df(matrisome_path)
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  .default = col_double(),
  geneID = [31mcol_character()[39m,
  module = [31mcol_character()[39m
)
[36mi[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  module = [31mcol_character()[39m,
  figo_stage_1_cor = [32mcol_double()[39m,
  figo_stage_2_cor = [32mcol_double()[39m,
  figo_stage_3_cor = [32mcol_double()[39m,
  figo_stage_4_cor = [32mcol_double()[39m,
  figo_stage_1_pval = [32mcol_double()[39m,
  figo_stage_2_pval = [32mcol_double()[39m,
  figo_stage_3_pval = [32mcol_double()[39m,
  figo_stage_4_pval = [32mcol_double()[39m,
  figo_stage_1_qval = [32mcol_double()[39m,
  figo_stage_2_

In [5]:
colnames(tom) <- colnames(data_expr)
rownames(tom) <- colnames(data_expr)

In [6]:
hub_df <- get_most_conn_genes(data_expr, module_colors, soft_power, conn_vs_hub_thresh = hub_con_thresh) %>%
    bind_rows(.id = "module") %>%
    dplyr::select(geneID, everything())

In [7]:
filtered_figo_network_df <- network_mm_gs_df %>%
    dplyr::select(geneID, module, mm_pval, mm_cor) %>%
    inner_join(condensed_me_df, by = "module") %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol")) %>%
    dplyr::select(geneID, entrez_gene_id, everything()) %>%
    dplyr::filter(me_figo_min_qval < pval_thresh) %>%
    # Make sure genes are significant members of the module
    dplyr::filter(mm_pval < pval_thresh) %>%
    # Make sure genes are highly connected within the module
    dplyr::filter(geneID %in% hub_df$geneID) %>%
    dplyr::filter(geneID %in% demg_list) %>%
    select(-contains("vital"))

In [8]:
simple_mm_df <- network_mm_gs_df %>%
    select(geneID, module, mm_cor, mm_pval)

In [9]:
go_res <- enrichGO(filtered_figo_network_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

Loading required package: org.Hs.eg.db

Loading required package: AnnotationDbi

Loading required package: stats4

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: 'BiocGenerics'


The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union


The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs


The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, basename, cbind, colnames, dirname, do.call,
    duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
    lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
    pmin.int, rank, rbind, ro

In [10]:
kegg_res <- enrichKEGG(filtered_figo_network_df$entrez_gene_id, organism = "hsa")

Reading KEGG annotation online:


Reading KEGG annotation online:




In [11]:
go_df <- make_ea_df(go_res, ea_type = "GO")
kegg_df <- make_ea_df(kegg_res, ea_type = "KEGG")

In [12]:
write.table(tom, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_tom.txt"))
write_tsv(simple_mm_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_mm.tsv"))
write_tsv(filtered_figo_network_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_pseudo_hub_FIGO_sig_and_DEMG.tsv"))
write_tsv(go_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_GO.tsv"))
if (nrow(kegg_df) > 0) {
    write_tsv(kegg_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_KEGG.tsv"))
}