In [1]:
wd <- dirname(dirname(getwd()))
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/eval_help.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/paper_settings.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/validation_help.R"))
library(tidyverse)
library(xgboost)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.2     ✔ dplyr   1.0.6
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.1
“package ‘forcats’ was built under R version 3.6.3”── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘xgboost’

The following object is masked from ‘package:dplyr’:

    slice



### 0 - Get Data Ready

In [2]:
validation_ready <- readRDS( paste0(TMP_DIR, "validation-hmf-go.Rds") ) 

### 1 - Prep HMF data

In [3]:
hmf <- (
    validation_ready 
        %>% filter(Study == "HMF-CPCT") 
        %>% mutate_at(vars("tcell","tgfb","prolif","pdl1", "pretreat", "pretreat_comp", "purity"), scale) 
        %>% mutate(tmb_bin = ifelse(exp(tmb)-1 > 10, 1, 0), model_apply = tissue)
        %>% drop_na(os, bor)
)

#### Helpers for fitting

In [4]:
builder <- function( df, model_features ){
    files <- list()
    files[['X']] =  as.matrix( df %>% select(all_of(model_features)))
    files[['id']] = df %>% pull(patient_id)
    files[['complete_id']] = df %>% drop_na(tcell) %>% pull(patient_id)
    files[['Y_lr']] = df %>% pull(bor)
    files[['Y_os']] = df %>% pull(os)
    files
}

In [5]:
best_fit <- function(Y, X, hyper_grid = parameter_grid, model, base_model = NULL){
    grid_fit(Y, X, hyper_grid, model, base_model)$best_model
}

### 2 - Build the models
- Build K models for each tissue

In [6]:
set.seed(62220)
K <- 1

In [7]:
mods <- list()

for( i in seq(K) ){
    
    hmf_models <- list()
    for( model in names(models)) {

        print(model); flush.console()

        #### Strore base data structure
        s <- list()
        s$all <- builder( hmf, models[[model]] )
        for (j in unique( hmf %>% pull(model_apply))) { 
            s[[j]] <- builder( hmf %>% filter(model_apply == j), models[[model]] )
        }
        #### Fit overall models 
        s$all$mod_lr <- best_fit( Y = s$all$Y_lr, X = s$all$X, model = "lr", base_model = NULL ) 
        s$all$mod_os <- best_fit( Y = s$all$Y_os, X = s$all$X, model = "os", base_model = NULL )

        #### Tissue specific models guided by overall model
        for (k in unique(hmf$tissue)) {
            s[[k]][['mod_lr']] <- best_fit( s[[k]]$Y_lr, s[[k]]$X, model = "lr", base_model = s$all$mod_lr)
            s[[k]][['mod_os']] <- best_fit( s[[k]]$Y_os, s[[k]]$X, model = "os", base_model = s$all$mod_os) 
            }        
        ### save model
        hmf_models[[model]] <- s
    }    
    ### save as repition model i
    mods[[as.character(i)]] <- hmf_models
}

[1] "tmb_bin"
[1] "base_bin"
[1] "tmb"
[1] "base"
[1] "rna"
[1] "no_pretreat"
[1] "no_tmb"
[1] "five_latent"
[1] "five_latent_hmf"
[1] "five_latent_purity"


### 3 - Send it

In [8]:
saveRDS( mods, paste0(TMP_DIR, "validation-hmf-models.Rds"))