In [106]:
library(tidyverse)
library(clusterProfiler)
library(BiocParallel)
library(parallel)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(stats4)
library(BiocGenerics)

# Custom package
library(rutils)

In [107]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [108]:
dset_idx <- 3

In [109]:
matrisome_df <- load_matrisome_df(paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv"))
gene2id_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/hugo2entrez.tsv"))

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)
Parsed with column specification:
cols(
  hugo_symbol = col_character(),
  entrez_gene_id = col_double()
)


# DGE

In [110]:
dge_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_dge_gene_list.txt"))
dge_genes_df <- dge_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))
dge_m_genes_df <- dge_genes_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)

## Gene-set

In [111]:
dge_go_res <- enrichGO(dge_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)
dge_m_go_res <- enrichGO(
    dge_m_genes_df$entrez_gene_id,
    "org.Hs.eg.db", ont = "ALL", readable = TRUE
)

In [112]:
dge_go_df <- make_ea_df(dge_go_res, ea_type = "GO")
dge_m_go_df <- make_ea_df(dge_m_go_res, ea_type = "GO")

## Pathway

In [113]:
dge_kegg_res <- enrichKEGG(dge_genes_df$entrez_gene_id, organism = "hsa")
dge_m_kegg_res <- enrichKEGG(
    dge_m_genes_df$entrez_gene_id,
    organism = "hsa"
)

In [114]:
dge_kegg_df <- make_ea_df(dge_kegg_res, ea_type = "KEGG")
dge_m_kegg_df <- make_ea_df(dge_m_kegg_res, ea_type = "KEGG")

# Write out

In [115]:
write_tsv(
    dge_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_all_gene_set.tsv"),
)

write_tsv(
    dge_m_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_matrisome_gene_set.tsv")
)

write_tsv(
    dge_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_all_pathway.tsv")
)

write_tsv(
    dge_m_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_DEG_matrisome_pathway.tsv")
)

# FIGO

In [116]:
figo_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_gene_list.txt"))
figo_genes_df <- figo_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [117]:
figo_go_res <- enrichGO(figo_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [118]:
figo_go_df <- make_ea_df(figo_go_res, ea_type = "GO")

## Pathway

In [119]:
figo_kegg_res <- enrichKEGG(figo_genes_df$entrez_gene_id, organism = "hsa")

In [120]:
figo_kegg_df <- make_ea_df(figo_kegg_res, ea_type = "KEGG")

In [121]:
write_tsv(
    figo_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_gene_set.tsv")
)

write_tsv(
    figo_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_pathway.tsv")
)

# Survival

In [122]:
survival_genes <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_gene_list.txt"))
survival_genes_df <- survival_genes %>%
    as_tibble_col(column_name = "geneID") %>%
    dplyr::inner_join(gene2id_df, c("geneID" = "hugo_symbol"))

## Gene-set

In [123]:
survival_go_res <- enrichGO(survival_genes_df$entrez_gene_id, "org.Hs.eg.db", ont = "ALL", readable = TRUE)

In [124]:
survival_go_df <- make_ea_df(survival_go_res, ea_type = "GO")

## Pathway

In [125]:
survival_kegg_res <- enrichKEGG(survival_genes_df$entrez_gene_id, organism = "hsa")

In [126]:
survival_kegg_df <- make_ea_df(survival_kegg_res, ea_type = "KEGG")

In [127]:
write_tsv(
    survival_go_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_gene_set.tsv")
)

write_tsv(
    survival_kegg_df,
    paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_pathway.tsv")
)