In [8]:
library(tidyverse)
library(WGCNA)
library(cowplot)
library(matrixStats)

# Custom package
library(rutils)

# Load data

In [9]:
options(stringsAsFactors = FALSE)
enableWGCNAThreads()

Allowing parallel execution with up to 15 working processes.


In [10]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

event_code <- list("Alive" = 0, "Dead" = 1)

In [11]:
dset_idx <- 3

## Simple data

In [12]:
coldata_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/coldata.tsv"))
m_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/matrisome_counts.tsv"))
nm_counts_df <- read_tsv(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/norm_matrisome_counts.tsv"))

Parsed with column specification:
cols(
  sample_name = col_character(),
  condition = col_character(),
  data_source = col_character()
)
Parsed with column specification:
cols(
  .default = col_double(),
  geneID = col_character()
)
See spec(...) for full column specifications.
Parsed with column specification:
cols(
  .default = col_double(),
  geneID = col_character()
)
See spec(...) for full column specifications.


## Manipulated data

In [13]:
tumor_clinical_data_df <- load_survival_df(paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/survival_data.tsv"), event_code) %>%
    dplyr::select(sample_name, vital_status, survival_time, figo_stage) %>%
    dplyr::filter(rowSums(is.na(.)) == 0) %>%
    decode_figo_stage(to = "n") %>%
    dplyr::mutate(figo_stage = as.character(figo_stage)) %>%
    dplyr::bind_cols(to_one_hot(., col = "figo_stage")) %>%
    dplyr::select(-figo_stage) %>%
    # The following steps may not be necessary since FIGO stage should be NA for 
    # healthy samples, but including anyway
    inner_join(coldata_df, by = "sample_name") %>%
    dplyr::filter(condition == "tumor") %>%
    dplyr::select(-c(condition, data_source))

tumor_m_counts_df <- m_counts_df %>%
    dplyr::select(one_of("geneID", tumor_clinical_data_df$sample_name))

tumor_nm_counts_df <- nm_counts_df %>%
    dplyr::select(one_of("geneID", tumor_clinical_data_df$sample_name))

unexpressed_genes <- tumor_m_counts_df %>%
    dplyr::filter(rowSums(.[, -1]) == 0) %>%
    dplyr::pull(geneID)

data_expr <- tumor_nm_counts_df %>%
    transpose_df("geneID", "sample_name") %>%
    dplyr::select(-one_of(unexpressed_genes)) %>%
    column_to_rownames("sample_name")

data_traits <- tumor_clinical_data_df %>%
    column_to_rownames("sample_name")

Parsed with column specification:
cols(
  sample_name = col_character(),
  vital_status = col_character(),
  survival_time = col_double(),
  figo_stage = col_character(),
  days_to_last_follow_up = col_double(),
  days_to_death = col_double(),
  age_at_diagnosis = col_double(),
  age_at_index = col_double(),
  height = col_double(),
  weight = col_double(),
  bmi = col_double(),
  race = col_character(),
  ethnicity = col_character()
)


In [14]:
dir.create(paste0(dirs$data_dir, "/saved_network_objects/"), showWarnings = FALSE)
save(data_expr, data_traits, file = paste0(dirs$data_dir, "/saved_network_objects/", unified_dsets[dset_idx], "_tumor_data.RData"))