In [23]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)
library(parallel)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats4)
library(BiocGenerics)

# Custom package
library(rutils)

In [24]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [25]:
dset_idx <- 3

In [26]:
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------------------------------------------------------------------------------------[39m
cols(
  hugo_symbol = [31mcol_character()[39m,
  entrez_gene_id = [32mcol_double()

# DGE

In [27]:
dge_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_dge_gene_list.txt"))
dge_genes_df <- dge_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))
dge_m_genes_df <- dge_genes_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)

## Gene-set

In [28]:
dge_go_res <- enrichGO(dge_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
dge_m_go_res <- enrichGO(
    dge_m_genes_df$entrez_gene_id,
    "org.Hs.eg.db", ont = "ALL", readable = TRUE
)

In [29]:
dge_go_df <- make_ea_df(dge_go_res, ea_type = "GO")
dge_m_go_df <- make_ea_df(dge_m_go_res, ea_type = "GO")

## Pathway

In [30]:
dge_kegg_res <- enrichKEGG(dge_genes_df$entrez_gene_id, organism = "hsa")
dge_m_kegg_res <- enrichKEGG(
    dge_m_genes_df$entrez_gene_id,
    organism = "hsa"
)

In [31]:
dge_kegg_df <- make_ea_df(dge_kegg_res, ea_type = "KEGG")
dge_m_kegg_df <- make_ea_df(dge_m_kegg_res, ea_type = "KEGG")

# Write out

In [32]:
write_tsv(
    dge_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_all_gene_set.tsv"),
)

write_tsv(
    dge_m_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_matrisome_gene_set.tsv")
)

write_tsv(
    dge_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_all_pathway.tsv")
)

write_tsv(
    dge_m_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_matrisome_pathway.tsv")
)

# FIGO

In [33]:
figo_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_gene_list.txt"))
figo_genes_df <- figo_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [34]:
figo_go_res <- enrichGO(figo_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [35]:
figo_go_df <- make_ea_df(figo_go_res, ea_type = "GO")

## Pathway

In [36]:
figo_kegg_res <- enrichKEGG(figo_genes_df$entrez_gene_id, organism = "hsa")

In [37]:
figo_kegg_df <- make_ea_df(figo_kegg_res, ea_type = "KEGG")

In [38]:
write_tsv(
    figo_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_gene_set.tsv")
)

write_tsv(
    figo_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_pathway.tsv")
)

# Survival

In [39]:
survival_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_gene_list.txt"))
survival_genes_df <- survival_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [40]:
survival_go_res <- enrichGO(survival_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [41]:
survival_go_df <- make_ea_df(survival_go_res, ea_type = "GO")

## Pathway

In [42]:
survival_kegg_res <- enrichKEGG(survival_genes_df$entrez_gene_id, organism = "hsa")

In [43]:
survival_kegg_df <- make_ea_df(survival_kegg_res, ea_type = "KEGG")

In [44]:
write_tsv(
    survival_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_gene_set.tsv")
)

write_tsv(
    survival_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_pathway.tsv")
)