In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m -------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ----------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_list <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
dset_idx <- 3
save_lists <- TRUE

In [4]:
p_thresh <- 0.05
lfc_thresh <- log2(2)
hub_con_thresh <- 0
q_deg_thresh <- 0.05
q_anova_thresh <- 0.05
q_me_thresh <- 0.05
cph_coeff_thresh <- 0.0
q_pbc_thresh <- 0.05
q_cts_thresh <- 0.05
q_univ_surv_thresh <- 0.05
p_mm_thresh <- 0.05

# Functions

# Data

In [5]:
matrisome_df <- rutils::load_matrisome_df(matrisome_list) %>%
    dplyr::select(gene_symbol, division, category)
norm_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/", "norm_counts.tsv"))
m_norm_counts_df <- norm_counts_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)

deseq_results_df <- read_tsv(paste0(dirs$analysis_dir, "/deg/", unified_dsets[dset_idx], "_DESeq_results.tsv")) %>%
    rename(base_mean = baseMean, lfc = log2FoldChange)

en_cph_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_en_cph_results.tsv"))
cts_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_survival_cts_results.tsv"))
univ_survival_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_univ_survival_results.tsv"))

en_multinom_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_en_multinom_results.tsv"))
pbc_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_pbc_results.tsv"))
anova_df <- read_tsv(paste0(dirs$analysis_dir, "/feature_selection/", unified_dsets[dset_idx], "_welch_anova_results.tsv"))

network_mm_gs_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_gene_mm_gs.tsv"))
network_me_sig_df <- read_tsv(paste0(dirs$analysis_dir, "/network/", unified_dsets[dset_idx], "_eigengene_traits.tsv"))
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_data.RData"))
lnames <- load(file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_network.RData"))


[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------[39m
cols(
  Division = [31mcol_character()[39m,
  Category = [31mcol_character()[39m,
  `Gene Symbol` = [31mcol_character()[39m,
  `Gene Name` = [31mcol_character()[39m,
  Synonyms = [31mcol_character()[39m,
  HGNC_IDs = [32mcol_double()[39m,
  `HGNC_IDs Links` = [32mcol_double()[39m,
  UniProt_IDs = [31mcol_character()[39m,
  Refseq_IDs = [31mcol_character()[39m,
  Orthology = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m-------------------------------------------------------------------------------------[39m
cols(
  .default = col_double(),
  geneID = [31mcol_character()[39m
)
[36mi[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.



[36m--[39m [1m[1mColumn specification[1m[22m [36m----------

# DGE analysis

## DEG

In [6]:
deg_meta_ls <- deg_meta(deseq_results_df, lfc_thresh, q_deg_thresh, nrow(norm_counts_df))
deg_meta_ls[1:4]

## DEMG

In [7]:
m_deseq_results_df <- deseq_results_df %>%
    dplyr::filter(geneID %in% matrisome_df$gene_symbol)
demg_meta_ls <- deg_meta(m_deseq_results_df, lfc_thresh, q_deg_thresh, nrow(m_norm_counts_df))
demg_meta_ls[1:4]

# Uni/multivariate analysis

## FIGO

### LASSO multinomial regression

In [8]:
en_multinom_meta_ls <- list(n_var = nrow(en_multinom_df), genes = en_multinom_df$geneID)
en_multinom_meta_ls[1]

### Welch ANOVA

In [9]:
# anova_meta_ls <- simple_test_meta(anova_df, q_anova_thresh)
anova_meta_df <- anova_df %>%
    filter(qval < q_anova_thresh)
anova_meta_ls <- list(n_sig = nrow(anova_meta_df), genes = anova_meta_df$geneID)
anova_meta_ls[1]

### Point-biserial correlation with FIGO

In [10]:
pbc_meta_df <- pbc_df %>%
    condense_figo(include_pvals = TRUE) %>%
    dplyr::filter(figo_min_qval < q_pbc_thresh)
pbc_meta_ls <- list(n_sig = nrow(pbc_meta_df), genes = pbc_meta_df$geneID)
pbc_meta_ls[1]

In [11]:
length(intersect(en_multinom_meta_ls$genes, anova_meta_ls$genes))
length(intersect(en_multinom_meta_ls$genes, pbc_meta_ls$genes))
length(intersect(anova_meta_ls$genes, pbc_meta_ls$genes))

## Survival

### LASSO Cox PH

In [12]:
en_cph_meta_ls <- list(n_var = nrow(en_cph_df), genes = en_cph_df$geneID)
en_cph_meta_ls[1]

### Univariate KM/Cox PH

In [13]:
univ_survival_meta_df <- univ_survival_df %>%
    rowwise() %>%
    mutate(min_qval = min(km_qval, cph_qval)) %>%
    as_tibble() %>%
    filter(min_qval < q_univ_surv_thresh)

univ_survival_meta_ls <- list(n_sig = nrow(univ_survival_meta_df), genes = univ_survival_meta_df$geneID)
univ_survival_meta_ls[1]

### Censored time screen

In [14]:
cts_meta_df <- cts_df %>%
    dplyr::filter(vital_qval < q_cts_thresh)
cts_meta_ls <- list(n_sig = nrow(cts_meta_df), genes = cts_meta_df$geneID)
cts_meta_ls[1]

In [15]:
univ_cph_list <- univ_survival_df %>%
    dplyr::filter(cph_qval < q_univ_surv_thresh) %>%
    dplyr::pull(geneID)

length(intersect(cts_meta_ls$genes, univ_cph_list)) / length(union(cts_meta_ls$genes, univ_cph_list))

In [16]:
length(intersect(en_cph_meta_ls$genes, cts_meta_ls$genes))
length(intersect(en_cph_meta_ls$genes, univ_survival_meta_ls$genes))
length(intersect(univ_survival_meta_ls$genes, cts_meta_ls$genes))

# WGCNA

In [17]:
hub_df <- get_most_conn_genes(data_expr, module_colors, soft_power, conn_vs_hub_thresh = hub_con_thresh) %>%
    bind_rows(.id = "module") %>%
    dplyr::select(geneID, everything())





In [18]:
wgcna_meta_ls <- wgcna_meta(network_me_sig_df, network_mm_gs_df, q_me_thresh, p_mm_thresh, hub_df$geneID)
wgcna_meta_ls[1:2]

## TOM meta

In [19]:
colnames(tom) <- colnames(data_expr)
rownames(tom) <- colnames(data_expr)
rowSums(tom) %>%
    quantile() %>%
    round(digits = 1)

# Create Lists

In [20]:
deg_list <- deg_meta_ls$genes
demg_list <- demg_meta_ls$genes
figo_umsmg_list <- en_multinom_meta_ls$genes %>%
    union(pbc_meta_ls$genes) %>%
    union(anova_meta_ls$genes)
survival_umsmg_list <- en_cph_meta_ls$genes %>%
    union(cts_meta_ls$genes) %>%
    union(univ_survival_meta_ls$genes)
# all_umsmg_list <- figo_umsmg_list %>%
#     intersect(survival_umsmg_list)
figo_nsmg_list <- wgcna_meta_ls$genes
# figo_umsmg_demg_list <- figo_umsmg_list %>%
#     intersect(demg_list)
# survival_umsmg_demg_list <- survival_umsmg_list %>%
#     intersect(demg_list)
# figo_umsmg_nsmg_demg_list <- figo_umsmg_list %>%
#     intersect(figo_nsmg_list) %>%
#     intersect(demg_list)
figo_list <- figo_umsmg_list %>%
    union(figo_nsmg_list) %>%
    intersect(demg_list)
survival_list <- survival_umsmg_list %>%
    intersect(demg_list)
full_overlap_list <- figo_list %>%
    intersect(survival_list)

In [23]:
length(deg_list)
length(demg_list)
length(figo_umsmg_list)
length(survival_umsmg_list)
length(figo_list)
length(survival_list)
length(full_overlap_list)

# length(deg_list)
# length(demg_list)
# length(figo_umsmg_list)
# length(figo_nsmg_list)
# length(survival_umsmg_list)
# length(all_umsmg_list)
# length(figo_umsmg_demg_list)
# length(survival_umsmg_demg_list)
# length(figo_umsmg_nsmg_demg_list)

# Save lists

In [22]:
if (save_lists) {
    write_lines(deg_list, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_deg_list.txt"))
    write_lines(demg_list, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_demg_list.txt"))
    write_lines(figo_umsmg_list, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_umsmg_list.txt"))
    write_lines(survival_umsmg_list, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_survival_umsmg_list.txt"))
    write_lines(figo_nsmg_list, paste0(dirs$analysis_dir, "/gene_lists/", unified_dsets[dset_idx], "_figo_nsmg_list.txt"))
}