In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
dset_idx <- 1

In [12]:
p_thresh <- 0.05
lfc_thresh <- log2(2)
q_deg_thresh <- 0.05
coxph_coeff_thresh <- 0.0
mi_thresh <- 0.0
consensus_thresh <- 0.0
consensus_n <- 5
hub_con_thresh <- 0

# Functions

In [21]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)


get_consensus_col <- function(df, n, thresh = 0) {
    consensus_df <- df %>%
        dplyr::select(geneID, contains("mean")) %>%
        dplyr::mutate(consensus = rowSums(.[-1] > thresh) >= n)
    df %>% dplyr::mutate(consensus = consensus_df$consensus)
}

deg_meta <- function(df, lfc_thresh, q_thresh, n) {
    deg_df <- df %>%
        dplyr::filter(abs(lfc) > lfc_thresh, qval < q_thresh)
    list(
        n_deg = nrow(deg_df),
        deg_prop = nrow(deg_df) / n,
        n_up = nrow(deg_df %>% dplyr::filter(lfc > 0)),
        n_down = nrow(deg_df %>% dplyr::filter(lfc < 0))
    )
}


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)




# Data

In [31]:
norm_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/", "norm_counts.tsv"))
m_norm_counts_df <- norm_counts_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)
deseq_results_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv")) %>%
    rename(base_mean = baseMean, lfc = log2FoldChange)


[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  .default = col_double(),
  geneID = [31mcol_character()[39m
)
[36mi[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.



[36m--[39m [1m[1mColumn specification[1m[22m [36m------------------------------------------------------------------------------------------------------------------------[39m
cols(
  geneID = [31mcol_character()[39m,
  baseMean = [32mcol_double()[39m,
  log2FoldChange = [32mcol_double()[39m,
  lfcSE = [32mcol_double()[39m,
  stat = [32mcol_double()[39m,
  pvalue = [32mcol_double()[39m,
  padj = [32mcol_double()[39m,
  qval = [32mcol_double()[39m
)




# DGE analysis

## DEG

In [32]:
deg_meta(deseq_results_df, lfc_thresh, q_deg_thresh, nrow(norm_counts_df))

## DEMG

In [33]:
m_deseq_results_df <- deseq_results_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)
deg_meta(m_deseq_results_df, lfc_thresh, q_deg_thresh, nrow(m_norm_counts_df))