In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("CESC", "UCS", "UCEC")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
matrisome_df <- rutils::load_matrisome_df(matrisome_path) %>%
    dplyr::select(gene_symbol, division, category) %>%
    dplyr::rename(geneID = gene_symbol)
# demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))
# figo_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_umsmg_list.txt"))
# survival_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_umsmg_list.txt"))
# figo_nsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_nsmg_list.txt"))

# figo_go_df <- read_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_go.tsv"))
# survival_go_df <- read_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_go.tsv"))

figo_go <- list()
survival_go <- list()

for (dset_idx in 1:3) {
    figo_go[[projects[dset_idx]]] <- read_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_figo_go.tsv")) %>%
        select(type, qval)
    survival_go[[projects[dset_idx]]] <- read_tsv(paste0(dirs$analysis_dir, "/enrichment/", unified_dsets[dset_idx], "_survival_go.tsv")) %>%
        select(type, qval)
}


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  type = [31mcol_character()[39m,
  geneIDs = [31mcol_character()[39m,
  count = [32mcol_double()[39m,
  ratio = [32mcol_double()[39m,
  qval = [32mcol_double()[39m,
  ont = [31mcol_character()[39m
)




In [4]:
topn_lists <- function(go_dfs, n = 10) {
    topn_intersect <- (go_dfs$CESC %>% top_n(n = n, wt = desc(qval)) %>% pull(type)) %>%
        intersect((go_dfs$UCEC %>% top_n(n = n, wt = desc(qval)) %>% pull(type))) %>%
        intersect((go_dfs$UCS %>% top_n(n = n, wt = desc(qval)) %>% pull(type)))
    topn_union <- (go_dfs$CESC %>% top_n(n = n, wt = desc(qval)) %>% pull(type)) %>%
        union((go_dfs$UCEC %>% top_n(n = n, wt = desc(qval)) %>% pull(type))) %>%
        union((go_dfs$UCS %>% top_n(n = n, wt = desc(qval)) %>% pull(type)))
    list(
        topn_intersect = topn_intersect,
        topn_union = topn_union
    )
}

full_lists <- function(go_dfs) {
    full_intersect <- go_dfs$CESC$type %>%
        intersect(go_dfs$UCEC$type) %>%
        intersect(go_dfs$UCS$type)
    full_union <- go_dfs$CESC$type %>%
        union(go_dfs$UCEC$type) %>%
        union(go_dfs$UCS$type)
    list(
        full_intersect = full_intersect,
        full_union = full_union
    )
}

# FIGO

## Proportion of total shared by all

In [5]:
figo_full_lists <- full_lists(figo_go)
length(figo_full_lists$full_union)
length(figo_full_lists$full_intersect)
length(figo_full_lists$full_intersect) / length(figo_full_lists$full_union)

## Proportion of top n shared by all

In [11]:
n <- 10
n <- 20
figo_topn_lists <- topn_lists(figo_go, n = n)
length(figo_topn_lists$topn_union)
length(figo_topn_lists$topn_intersect)
figo_topn_lists$topn_intersect
length(figo_topn_lists$topn_intersect) / length(figo_topn_lists$topn_union)

# Survival

## Proportion of total shared by all

In [7]:
survival_full_lists <- full_lists(survival_go)
length(survival_full_lists$full_union)
length(survival_full_lists$full_intersect)
length(survival_full_lists$full_intersect) / length(survival_full_lists$full_union)

## Proportion of top n shared by all

In [9]:
n <- 10
survival_topn_lists <- topn_lists(survival_go, n = n)
length(survival_topn_lists$topn_union)
length(survival_topn_lists$topn_intersect)
survival_topn_lists$topn_intersect
length(survival_topn_lists$topn_intersect) / length(survival_topn_lists$topn_union)