In [1]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)
library(parallel)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats4)
library(BiocGenerics)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



clusterProfiler v3.18.0  For help: https://guangchuangyu.github.io/software/clusterProfiler

If you use clusterProfiler in published research, please cite:
Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes amo

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [3]:
dset_idx <- 3
save_res <- TRUE

In [4]:
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))
deg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_deg_list.txt"))
demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))

# FIGO (UMSMG or NSMG) AND DEMG
figo_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_umsmg_list.txt"))
figo_nsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_nsmg_list.txt"))
figo_list <- figo_umsmg_list %>%
    union(figo_nsmg_list) %>%
    intersect(demg_list)

# Survival UMSMG AND DEMG
survival_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_umsmg_list.txt"))
survival_list <- survival_umsmg_list %>%
    intersect(demg_list)
# nsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_nsmg_hub_thresh_", hub_con_thresh, "_demg_list.txt"))
# umsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_demg_list.txt"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  hugo_symbol = [31mcol_character()[39m,
  entrez_gene_id = [32mcol_double()[39m
)




In [5]:
go_dfs <- list()
kegg_dfs <- list()

# DGE

In [6]:
deg_df <- deg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))
demg_df <- demg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [7]:
deg_go <- enrichGO(deg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
demg_go <- enrichGO(demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [8]:
go_dfs[["deg_go"]] <- make_ea_df(deg_go, ea_type = "GO")
go_dfs[["demg_go"]] <- make_ea_df(demg_go, ea_type = "GO")

## Pathway

In [9]:
deg_kegg <- enrichKEGG(deg_df$entrez_gene_id, organism = "hsa")
demg_kegg <- enrichKEGG(demg_df$entrez_gene_id, organism = "hsa")

Reading KEGG annotation online:


Reading KEGG annotation online:




In [10]:
kegg_dfs[["deg_kegg"]] <- make_ea_df(deg_kegg, ea_type = "KEGG")
kegg_dfs[["demg_kegg"]] <- make_ea_df(demg_kegg, ea_type = "KEGG")

# FIGO

In [11]:
figo_df <- figo_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [12]:
figo_go <- enrichGO(figo_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [13]:
go_dfs[["figo_go"]] <- make_ea_df(figo_go, ea_type = "GO")

## Pathway

In [14]:
figo_kegg <- enrichKEGG(figo_df$entrez_gene_id, organism = "hsa")

In [15]:
kegg_dfs[["figo_kegg"]] <- make_ea_df(figo_kegg, ea_type = "KEGG")

# Survival

In [16]:
survival_df <- survival_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [17]:
survival_go <- enrichGO(survival_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [18]:
go_dfs[["survival_go"]] <- make_ea_df(survival_go, ea_type = "GO")

## Pathway

In [19]:
survival_kegg <- enrichKEGG(survival_df$entrez_gene_id, organism = "hsa")

In [20]:
kegg_dfs[["survival_kegg"]] <- make_ea_df(survival_kegg, ea_type = "KEGG")

# Write out

In [21]:
names(go_dfs)

In [22]:
names(kegg_dfs)

In [23]:
# kegg_dfs$survival_kegg

In [24]:
if (save_res) {
    for (go_n in names(go_dfs)) {
        if (nrow(go_dfs[[go_n]]) > 0) {
            write_tsv(go_dfs[[go_n]], paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_", go_n, ".tsv"))
        }
    }
    for (kegg_n in names(kegg_dfs)) {
        if (nrow(kegg_dfs[[kegg_n]]) > 0) {
            write_tsv(kegg_dfs[[kegg_n]], paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_", kegg_n, ".tsv"))
        }
    }
}