In [1]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)
library(parallel)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats4)
library(BiocGenerics)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



clusterProfiler v3.18.0  For help: https://guangchuangyu.github.io/software/clusterProfiler

If you use clusterProfiler in published research, please cite:
Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes amo

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
projects <- c("cesc", "ucs", "ucec")

In [3]:
dset_idx <- 3
save_res <- TRUE

In [4]:
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))
figo_umsmg_demg_df <- read_lines(paste0(dirs$analysis_dir, "/gene_lists_extra/", projects[dset_idx], "_figo_umsmg_demg.txt")) %>%
    as_tibble() %>%
    dplyr::rename(geneID = value) %>%
    inner_join(gene2id_df, by = c("geneID" = "hugo_symbol"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  hugo_symbol = [31mcol_character()[39m,
  entrez_gene_id = [32mcol_double()[39m
)




In [5]:
figo_nsmg_demg_files <- list.files(paste0(dirs$analysis_dir, "/gene_lists_extra_network"), full.names = TRUE)
dset_mask <- str_detect(figo_nsmg_demg_files, unified_dsets[dset_idx])
figo_nsmg_demg_files <- figo_nsmg_demg_files[dset_mask]
sig_modules <- figo_nsmg_demg_files %>% purrr::map(function(x) { unlist(str_split(x, "_"))[length(unlist(str_split(x, "_"))) - 1] }) %>%
    unlist()
figo_nsmg_demg_dfs <- list()

In [6]:
for (i in seq_len(length(figo_nsmg_demg_files))) {
    figo_nsmg_demg_dfs[[sig_modules[i]]] <- read_lines(figo_nsmg_demg_files[i]) %>%
        as_tibble() %>%
        dplyr::rename(geneID = value) %>%
        inner_join(gene2id_df, by = c("geneID" = "hugo_symbol"))
}

In [7]:
figo_umsmg_demg_go <- enrichGO(figo_umsmg_demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
figo_umsmg_demg_go_df <- figo_umsmg_demg_go %>%
    rutils::make_ea_df(ea_type = "GO")

In [8]:
figo_nsmg_demg_go <- list()
figo_nsmg_demg_go_dfs <- list()
for (n in names(figo_nsmg_demg_dfs)) {
    # Get GO results for each sig. module
    figo_nsmg_demg_go[[n]] <- enrichGO(figo_nsmg_demg_dfs[[n]]$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
    figo_nsmg_demg_go_dfs[[n]] <- figo_nsmg_demg_go[[n]] %>%
        rutils::make_ea_df(ea_type = "GO")
}

In [9]:
if (save_res) {
    save(
        figo_umsmg_demg_go, figo_nsmg_demg_go,
        file = paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_granular_enrichment_results.RData")
    )
    figo_umsmg_demg_go_df %>% write_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_umsmg_demg_go.tsv"))
    for (n in names(figo_nsmg_demg_go_dfs)) {
        figo_nsmg_demg_go_dfs[[n]] %>% write_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_nsmg_demg_", n,"_go.tsv"))
    }
}