In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
matrisome_df <- load_matrisome_df(matrisome_path)


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)




In [4]:
# hub_con_thresh <- 0.25
hub_con_thresh <- 0.0

In [8]:
for (dset_idx in 1:3) {
    full_intra_overlap_ls <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_full_overlap_hub_thresh_", hub_con_thresh, "_gene_list.txt"))
    full_intra_overlap_df <- tibble(geneID = full_intra_overlap_ls)
    full_intra_summary_df <- full_intra_overlap_df %>%
        inner_join(matrisome_df, by = c("geneID" = "gene_symbol")) %>%
        select(geneID, hgnc_ids, division, category, gene_name) %>%
        rename(gene_id = geneID)
    write_csv(full_intra_summary_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_hub_thresh_", hub_con_thresh, "_full_overlap_gene_summary.csv"))
    
    demg_univar_intra_overlap_ls <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_univar_overlap_gene_list.txt"))
    demg_univar_intra_overlap_df <- tibble(geneID = demg_univar_intra_overlap_ls)
    demg_univar_intra_summary_df <- demg_univar_intra_overlap_df %>%
        inner_join(matrisome_df, by = c("geneID" = "gene_symbol")) %>%
        select(geneID, hgnc_ids, division, category, gene_name) %>%
        rename(gene_id = geneID)
    write_csv(demg_univar_intra_summary_df, paste0(dirs$analysis_dir, "/one_off/", unified_dsets[dset_idx], "_demg_uninvar_overlap_gene_summary.csv"))
}

In [16]:
cesc_full_ovrlap_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[1], "_hub_thresh_", hub_con_thresh, "_full_overlap_gene_summary.csv"))
cesc_demg_univar_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[1], "_demg_uninvar_overlap_gene_summary.csv"))
ucs_full_ovrlap_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[2], "_hub_thresh_", hub_con_thresh, "_full_overlap_gene_summary.csv"))
ucs_demg_univar_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[2], "_demg_uninvar_overlap_gene_summary.csv"))
ucec_full_ovrlap_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[3], "_hub_thresh_", hub_con_thresh, "_full_overlap_gene_summary.csv"))
ucec_demg_univar_df <- read_csv(paste0(dirs$analysis_dir, "/one_off/", unified_dsets[3], "_demg_uninvar_overlap_gene_summary.csv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  gene_id = [31mcol_character()[39m,
  hgnc_ids = [32mcol_double()[39m,
  division = [31mcol_character()[39m,
  category = [31mcol_character()[39m,
  gene_name = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  gene_id = [31mcol_character()[39m,
  hgnc_ids = [32mcol_double()[39m,
  division = [31mcol_character()[39m,
  category = [31mcol_character()[39m,
  gene_name = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  gene_id = [31mcol_character()[39m,
  hgnc_ids = [32mcol_double()[39m,
  division = [31mcol_charact

In [22]:
cat("Cervical Full")
cesc_full_ovrlap_df %>%
    group_by(division) %>%
    summarize(n = n())
cat("Cervical DEMG & univar.")
cesc_demg_univar_df %>%
    group_by(division) %>%
    summarize(n = n())
cat("Uterine Full")
ucs_full_ovrlap_df %>%
    group_by(division) %>%
    summarize(n = n())
cat("Uterine DEMG & univar.")
ucs_demg_univar_df %>%
    group_by(division) %>%
    summarize(n = n())
cat("Uterine Endometrial Full")
ucec_full_ovrlap_df %>%
    group_by(division) %>%
    summarize(n = n())
cat("Uterine Endometrial DEMG & univar.")
ucec_demg_univar_df %>%
    group_by(division) %>%
    summarize(n = n())

Cervical Full

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,1
2,Matrisome-associated,21


Cervical DEMG & univar.

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,40
2,Matrisome-associated,82


Uterine Full

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,1
2,Matrisome-associated,8


Uterine DEMG & univar.

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,8
2,Matrisome-associated,29


Uterine Endometrial Full

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,2
2,Matrisome-associated,5


Uterine Endometrial DEMG & univar.

Unnamed: 0_level_0,division,n
Unnamed: 0_level_1,<chr>,<int>
1,Core matrisome,20
2,Matrisome-associated,62
