In [7]:
library(tidyverse)

# Custom package
library(rutils)

In [8]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [9]:
dset_idx <- 1
# hub_con_thresh <- 0.25
hub_con_thresh <- 0.0

In [10]:
matrisome_df <- load_matrisome_df(matrisome_path) %>%
    rename(geneID = gene_symbol) %>%
    select(geneID, division, category)


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)




In [11]:
deseq_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  baseMean = [32mcol_double()[39m,
  log2FoldChange = [32mcol_double()[39m,
  lfcSE = [32mcol_double()[39m,
  stat = [32mcol_double()[39m,
  pvalue = [32mcol_double()[39m,
  padj = [32mcol_double()[39m,
  qval = [32mcol_double()[39m
)




In [12]:
umsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_demg_list.txt"))
umsmg_nsmg_demg_list <- read_lines(paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_umsmg_nsmg_hub_thresh_", hub_con_thresh, "_demg_list.txt"))

# UMSMG $\bigcap$ DEMG breakdown

In [13]:
umsmg_demg_matrisome_df <- matrisome_df %>%
    filter(geneID %in% umsmg_demg_list)
umsmg_demg_matrisome_df %>%
    group_by(division) %>%
    count()
umsmg_demg_matrisome_df %>%
    group_by(category) %>%
    count()

division,n
<chr>,<int>
Core matrisome,40
Matrisome-associated,82


category,n
<chr>,<int>
Collagens,5
ECM Glycoproteins,30
ECM Regulators,22
ECM-affiliated Proteins,22
Proteoglycans,5
Secreted Factors,38


# Full $\bigcap$ breakdown

In [14]:
umsmg_nsmg_demg_matrisome_df <- matrisome_df %>%
    filter(geneID %in% umsmg_nsmg_demg_list)
umsmg_nsmg_demg_matrisome_df %>%
    group_by(division) %>%
    count()
umsmg_nsmg_demg_matrisome_df %>%
    group_by(category) %>%
    count()

division,n
<chr>,<int>
Core matrisome,1
Matrisome-associated,21


category,n
<chr>,<int>
ECM Glycoproteins,1
ECM Regulators,8
ECM-affiliated Proteins,7
Secreted Factors,6


# Pair with DE data

In [19]:
deseq_df %>%
    filter(geneID %in% umsmg_nsmg_demg_list) %>%
    rename(base_mean = baseMean, lfc = log2FoldChange) %>%
    mutate(change_dir = ifelse(lfc > 0, "up", "down")) %>%
    select(geneID, change_dir, base_mean, lfc, qval) %>%
    arrange(geneID)

geneID,change_dir,base_mean,lfc,qval
<chr>,<chr>,<dbl>,<dbl>,<dbl>
A2ML1,down,8274.666005,-1.043458,0.04063069
ADAM20,down,5.021051,-2.384743,5.302013e-19
ADAMTS18,down,85.515119,-1.606028,0.01966841
BMP3,up,366.625353,3.928452,2.126623e-06
CLEC2A,up,50.429286,4.845719,0.0002004929
CSTB,up,37130.026057,1.3112,0.0001174906
CXCL2,up,763.507649,2.948239,1.08313e-07
FLG2,down,11.260223,-1.664013,0.0201063
HRNR,down,46.096473,-1.435467,0.002750615
LGALS7,up,2053.21953,4.519554,2.667718e-10
