In [1]:
library(tidyverse)

# Custom package
library(rutils)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✔ ggplot2 3.3.2     ✔ purrr   0.3.4
✔ tibble  3.0.3     ✔ dplyr   1.0.0
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [3]:
i <- 1

In [4]:
matrisome_df <- load_matrisome_df(matrisome_path)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


# Load and filter survival data

In [6]:
event_code <- list("Alive" = 0, "Dead" = 1)
covariate_cols_no_figo <- c("age_at_diagnosis", "bmi", "race", "ethnicity")
covariate_cols <- c("figo_stage", covariate_cols_no_figo)
dep_cols <- c("vital_status", "survival_time")

In [7]:
survival_path <- paste0(dirs$data_dir, "/", unified_dsets[i], "/survival_data.tsv")
survival_df <- load_survival_df(survival_path, event_code)

Parsed with column specification:
cols(
  sample_name = col_character(),
  vital_status = col_character(),
  survival_time = col_double(),
  figo_stage = col_character(),
  days_to_last_follow_up = col_double(),
  days_to_death = col_double(),
  age_at_diagnosis = col_double(),
  age_at_index = col_double(),
  height = col_double(),
  weight = col_double(),
  bmi = col_double(),
  race = col_character(),
  ethnicity = col_character()
)


In [8]:
survival_df

sample_name,vital_status,survival_time,figo_stage,days_to_last_follow_up,days_to_death,age_at_diagnosis,age_at_index,height,weight,bmi,race,ethnicity
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
TCGA-C5-A1BF-01B-11R-A13Y-07,1,570,Stage IB,,570,16975,46,,76,,white,not reported
TCGA-MU-A51Y-01A-11R-A26T-07,0,854,Stage IIA1,854,,10199,27,,31,,white,not reported
TCGA-EK-A2RM-01A-21R-A18M-07,0,50,Stage IB,50,,14842,40,167,105,37.64925,white,not hispanic or latino
TCGA-Q1-A73P-01A-11R-A32P-07,0,483,Stage IB1,483,,16450,45,173,82,27.39818,white,not hispanic or latino
TCGA-C5-A8YT-01A-11R-A37O-07,1,633,Stage IB1,186,633,13253,36,,,,white,not hispanic or latino
TCGA-C5-A2LZ-01A-11R-A213-07,1,3046,Stage IIIB,,3046,24059,65,163,85,31.99217,white,not hispanic or latino
TCGA-UC-A7PI-01A-11R-A42S-07,0,2114,Stage IB1,2114,,16427,44,,64,,white,not hispanic or latino
TCGA-VS-A9V1-01A-11R-A42T-07,1,157,Stage IVB,,157,17001,46,155,45,18.73049,white,not reported
TCGA-FU-A5XV-01A-11R-A28H-07,0,321,Stage IIIB,321,,11692,32,157,69,27.99302,white,not hispanic or latino
TCGA-C5-A1BE-01B-11R-A13Y-07,1,2094,Stage IB2,,2094,23727,64,149,76,34.23269,white,not reported
