In [11]:
library(tidyverse)

# Custom package
library(rutils)

In [12]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")

In [13]:
dset_idx <- 3

# Helpers

In [14]:
transpose_scores_df <- function(df, future_colnames_col) {
    temp_df <- as.data.frame(df)
    rownames(temp_df) <- df[[future_colnames_col]]
    temp_df <- temp_df %>% dplyr::select(-(!!future_colnames_col))
    t(temp_df) %>% as_tibble(rownames = "dataset")
}

# Survival analysis

In [15]:
coxph_scores_df <- read_tsv(paste0(dirs$analysis_dir, "/", "coxph_null_scores.tsv")) %>% transpose_scores_df("score")
coxph_scores_df

Parsed with column specification:
cols(
  score = col_character(),
  unified_cervical_data = col_double(),
  unified_uterine_data = col_double(),
  unified_uterine_endometrial_data = col_double()
)


dataset,lr_test_pval,wald_test_pval,score_test_pval
<chr>,<dbl>,<dbl>,<dbl>
unified_cervical_data,0.0004814013,4.9053e-05,1.224173e-06
unified_uterine_data,0.3392807906,0.989893132,0.3053979
unified_uterine_endometrial_data,0.000773901,0.0,1.040876e-06


# Regression

In [16]:
baselines_df <- read_tsv(paste0(dirs$analysis_dir, "/", "reg_baselines.tsv")) %>% transpose_scores_df("baseline")
baselines_df
mae_baseline = (baselines_df %>% filter(dataset == unified_dsets[dset_idx]))$L1
ev_baseline = (baselines_df %>% filter(dataset == unified_dsets[dset_idx]))$explained_variance
mae_baseline
ev_baseline

Parsed with column specification:
cols(
  baseline = col_character(),
  unified_cervical_data = col_double(),
  unified_uterine_data = col_double(),
  unified_uterine_endometrial_data = col_double()
)


dataset,L2,L1,R2,explained_variance,n
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
unified_cervical_data,641687.7,518.3333,0,0,66
unified_uterine_data,620674.9,516.0741,0,0,27
unified_uterine_endometrial_data,386549.9,415.7083,0,0,24


In [17]:
mae_gbr_res_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_mae_gbr_ref_scores.tsv"))
mae_gbr_res_df
-mean(mae_gbr_res_df$ref_score)
# Want MAE to be < baseline
-mean(mae_gbr_res_df$ref_score) < mae_baseline

Parsed with column specification:
cols(
  model = col_double(),
  ref_score = col_double()
)


model,ref_score
<dbl>,<dbl>
0,-429.8771
1,-429.8557
2,-428.7261
3,-452.9965
4,-419.9518


In [18]:
ev_gbr_res_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_ev_gbr_ref_scores.tsv"))
ev_gbr_res_df
mean(ev_gbr_res_df$ref_score)
# Want EV to be > baseline
mean(ev_gbr_res_df$ref_score) > ev_baseline

Parsed with column specification:
cols(
  model = col_double(),
  ref_score = col_double()
)


model,ref_score
<dbl>,<dbl>
0,0.249179
1,0.17566278
2,0.23557729
3,0.17305281
4,0.08427494


In [19]:
mae_rfr_res_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_mae_rfr_ref_scores.tsv"))
mae_rfr_res_df
-mean(mae_rfr_res_df$ref_score)
# Want MAE to be < baseline
-mean(mae_rfr_res_df$ref_score) < mae_baseline

Parsed with column specification:
cols(
  model = col_double(),
  ref_score = col_double()
)


model,ref_score
<dbl>,<dbl>
0,-429.0907
1,-414.3713
2,-423.3422
3,-425.1046
4,-417.9237


In [20]:
ev_rfr_res_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_ev_rfr_ref_scores.tsv"))
ev_rfr_res_df
mean(ev_rfr_res_df$ref_score)
# Want EV to be > baseline
mean(ev_rfr_res_df$ref_score) > ev_baseline

Parsed with column specification:
cols(
  model = col_double(),
  ref_score = col_double()
)


model,ref_score
<dbl>,<dbl>
0,-0.14132234
1,0.02325248
2,-0.06128286
3,-0.01191531
4,-0.01772288
