In [1]:
library(tidyverse)

# Custom package
library(rutils)

-- [1mAttaching packages[22m --------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.6     [32mv[39m [34mdplyr  [39m 1.0.4
[32mv[39m [34mtidyr  [39m 1.1.2     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [3]:
# Define constants
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

event_code <- list("Alive" = 0, "Dead" = 1)
# covariate_cols_no_figo <- c("age_at_diagnosis", "race", "ethnicity")
# covariate_cols <- c("figo_stage", covariate_cols_no_figo)
covariate_cols <- c("figo_stage")
dep_cols <- c("vital_status", "survival_time")

In [5]:
survival_dfs <- list()
for (dset_idx in 1:3) {
    survival_path <- paste0(dirs$data_dir, "/", unified_dsets[dset_idx], "/survival_data.tsv")
    survival_df <- load_survival_df(survival_path, event_code)

    # Load and filter survival data
    survival_dfs[[unified_dsets[dset_idx]]] <- survival_df %>%
        decode_figo_stage(to = "c") %>%
        dplyr::select(sample_name, figo_stage) %>% # make sure using same samples as classification models
        dplyr::filter(rowSums(is.na(.)) == 0)

}



[36m--[39m [1m[1mColumn specification[1m[22m [36m--------------------------------------------------------------------------------------------------------------------------[39m
cols(
  sample_name = [31mcol_character()[39m,
  vital_status = [31mcol_character()[39m,
  survival_time = [32mcol_double()[39m,
  figo_stage = [31mcol_character()[39m,
  days_to_last_follow_up = [32mcol_double()[39m,
  days_to_death = [32mcol_double()[39m,
  age_at_diagnosis = [32mcol_double()[39m,
  age_at_index = [32mcol_double()[39m,
  height = [32mcol_double()[39m,
  weight = [32mcol_double()[39m,
  bmi = [32mcol_double()[39m,
  race = [31mcol_character()[39m,
  ethnicity = [31mcol_character()[39m
)



[36m--[39m [1m[1mColumn specification[1m[22m [36m--------------------------------------------------------------------------------------------------------------------------[39m
cols(
  sample_name = [31mcol_character()[39m,
  vital_status = [31mcol_character()[39m,

In [13]:
figo_stages <- function(df) {
    df %>%
        group_by(figo_stage) %>%
        summarize(n = n()) %>%
        mutate(pct = round(n / sum(n), 2))
}


In [15]:
figo_stages(survival_dfs[[unified_dsets[1]]])
figo_stages(survival_dfs[[unified_dsets[2]]])
figo_stages(survival_dfs[[unified_dsets[3]]])

Unnamed: 0_level_0,figo_stage,n,pct
Unnamed: 0_level_1,<chr>,<int>,<dbl>
1,figo_stage_1,135,0.53
2,figo_stage_2,61,0.24
3,figo_stage_3,40,0.16
4,figo_stage_4,19,0.07


Unnamed: 0_level_0,figo_stage,n,pct
Unnamed: 0_level_1,<chr>,<int>,<dbl>
1,figo_stage_1,17,0.37
2,figo_stage_2,5,0.11
3,figo_stage_3,18,0.39
4,figo_stage_4,6,0.13


Unnamed: 0_level_0,figo_stage,n,pct
Unnamed: 0_level_1,<chr>,<int>,<dbl>
1,figo_stage_1,75,0.54
2,figo_stage_2,19,0.14
3,figo_stage_3,39,0.28
4,figo_stage_4,7,0.05
