In [1]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)
library(parallel)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats4)
library(BiocGenerics)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



clusterProfiler v3.18.0  For help: https://guangchuangyu.github.io/software/clusterProfiler

If you use clusterProfiler in published research, please cite:
Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: 

In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [3]:
dset_idx <- 3
# hub_con_thresh <- 0.5
# hub_con_thresh <- 0.25
hub_con_thresh <- 0.0

In [4]:
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))
# figo_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_hub_thresh_", hub_con_thresh, "_gene_list.txt"))
deg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_deg_list.txt"))
demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))
nsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_nsmg_hub_thresh_", hub_con_thresh, "_demg_list.txt"))
umsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_demg_list.txt"))
# survival_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_demg_list.txt"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  hugo_symbol = [31mcol_character()[39m,
  entrez_gene_id = [32mcol_double()[39m
)




# DGE

In [5]:
deg_df <- deg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))
demg_df <- demg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [6]:
deg_go <- enrichGO(deg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
demg_go <- enrichGO(demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [7]:
deg_go_df <- make_ea_df(deg_go, ea_type = "GO")
demg_go_df <- make_ea_df(demg_go, ea_type = "GO")

## Pathway

In [8]:
deg_kegg <- enrichKEGG(deg_df$entrez_gene_id, organism = "hsa")
demg_kegg <- enrichKEGG(demg_df$entrez_gene_id, organism = "hsa")

Reading KEGG annotation online:


Reading KEGG annotation online:




In [9]:
deg_kegg_df <- make_ea_df(deg_kegg, ea_type = "KEGG")
demg_kegg_df <- make_ea_df(demg_kegg, ea_type = "KEGG")

# Write out

In [10]:
write_tsv(
    deg_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_deg_go.tsv"),
)

write_tsv(
    deg_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_deg_kegg.tsv"),
)

write_tsv(
    demg_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_demg_go.tsv"),
)

write_tsv(
    demg_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_demg_kegg.tsv"),
)

# FIGO

In [11]:
umsmg_demg_df <- umsmg_demg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

nsmg_demg_df <- nsmg_demg_list %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [12]:
umsmg_demg_go <- enrichGO(umsmg_demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
nsmg_demg_go <- enrichGO(nsmg_demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [13]:
umsmg_demg_go_df <- make_ea_df(umsmg_demg_go, ea_type = "GO")
nsmg_demg_go_df <- make_ea_df(nsmg_demg_go, ea_type = "GO")

## Pathway

In [14]:
umsmg_demg_kegg <- enrichKEGG(umsmg_demg_df$entrez_gene_id, organism = "hsa")
nsmg_demg_kegg <- enrichKEGG(nsmg_demg_df$entrez_gene_id, organism = "hsa")

In [15]:
umsmg_demg_kegg_df <- make_ea_df(umsmg_demg_kegg, ea_type = "KEGG")
nsmg_demg_kegg_df <- make_ea_df(nsmg_demg_kegg, ea_type = "KEGG")

# Write out

In [16]:
write_tsv(
    umsmg_demg_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_umsmg_demg_go.tsv")
)

write_tsv(
    nsmg_demg_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_nsmg_demg_go.tsv")
)

write_tsv(
    umsmg_demg_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_umsmg_demg_kegg.tsv")
)

write_tsv(
    nsmg_demg_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_nsmg_demg_kegg.tsv")
)

# Survival

In [17]:
# survival_demg_df <- survival_demg_list %>%
#     as_tibble_col(column_name = "geneID") %>%
#     dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [18]:
# survival_demg_go <- enrichGO(survival_demg_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [19]:
# survival_demg_go_df <- make_ea_df(survival_demg_go, ea_type = "GO")

## Pathway

In [20]:
# survival_demg_kegg <- enrichKEGG(survival_demg_df$entrez_gene_id, organism = "hsa")

In [21]:
# survival_demg_kegg_df <- make_ea_df(survival_demg_kegg, ea_type = "KEGG")

# Write out

In [22]:
# write_tsv(
#     survival_demg_go_df,
#     paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_demg_go.tsv")
# )

# if (nrow(survival_demg_kegg_df) > 0) {
#     write_tsv(
#         survival_demg_kegg_df,
#         paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_demg_kegg.tsv")
#     )
# }
