In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dset_idx <- 3
save_res <- TRUE

In [3]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [4]:
matrisome_df <- rutils::load_matrisome_df(matrisome_path) %>%
    dplyr::select(gene_symbol, division, category) %>%
    dplyr::rename(geneID = gene_symbol)
demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))
figo_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_umsmg_list.txt"))
survival_umsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_umsmg_list.txt"))
figo_nsmg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_nsmg_list.txt"))
figo_sig_list <- union(figo_umsmg_list, figo_nsmg_list)
figo_sig_de_list <- intersect(figo_sig_list, demg_list)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)




# Matrisome overview

In [5]:
matrisome_df %>%
    dplyr::group_by(category) %>%
    dplyr::count()

category,n
<chr>,<int>
Collagens,44
ECM Glycoproteins,195
ECM Regulators,238
ECM-affiliated Proteins,171
Proteoglycans,35
Secreted Factors,344


# FIGO UMSMG $\cap$ DEMG

In [6]:
matrisome_df %>%
    dplyr::filter(geneID %in% intersect(figo_umsmg_list, demg_list)) %>%
    dplyr::group_by(category) %>%
    dplyr::count()

category,n
<chr>,<int>
Collagens,2
ECM Glycoproteins,13
ECM Regulators,15
ECM-affiliated Proteins,10
Proteoglycans,2
Secreted Factors,20


In [7]:
figo_umsmg_list %>% length()
figo_sig_list %>% length()
demg_list %>% length()
figo_sig_de_list %>% length()

length(figo_sig_de_list) / length(union(figo_sig_list, demg_list))
# Use survival list as denom since it's smaller in all cases
length(intersect(survival_umsmg_list, figo_sig_list)) / length(survival_umsmg_list)

# Survival UMSMG $\cap$ DEMG

In [8]:
matrisome_df %>%
    dplyr::filter(geneID %in% intersect(survival_umsmg_list, demg_list)) %>%
    dplyr::group_by(category) %>%
    dplyr::count()

category,n
<chr>,<int>
Collagens,1
ECM Glycoproteins,1
ECM Regulators,5
ECM-affiliated Proteins,4
Proteoglycans,2
Secreted Factors,5


# FIGO NSMG $\cap$ DEMG

In [9]:
matrisome_df %>%
    dplyr::filter(geneID %in% intersect(figo_nsmg_list, demg_list)) %>%
    dplyr::group_by(category) %>%
    dplyr::count()

category,n
<chr>,<int>
Collagens,10
ECM Glycoproteins,20
ECM Regulators,29
ECM-affiliated Proteins,21
Proteoglycans,3
Secreted Factors,46
