In [1]:
wd <- dirname(dirname(getwd()))
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/eval_help.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/paper_settings.R"))
source(paste0(wd,"/mission_control/helpers/xgboost/validation_help.R"))
library(tidyverse)
library(xgboost)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ─────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.2     ✔ dplyr   1.0.6
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.1
“package ‘forcats’ was built under R version 3.6.3”── Conflicts ────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Attaching package: ‘xgboost’

The following object is masked from ‘package:dplyr’:

    slice



### 0 - Get Data Ready

In [2]:
validation_ready <- readRDS( paste0(TMP_DIR, "validation-hmf-go.Rds") ) 

### 1 - Prep HMF data

In [150]:
hmf <- (
validation_ready 
    %>% filter(Study == "HMF-CPCT") 
    %>% mutate_at(vars("tcell","tgfb","prolif", "pretreat", "tmb"), scale)
    %>% mutate(model_apply = tissue)
    %>% drop_na(os, bor, tcell)
    %>% select("patient_id", "bor", "os", "tissue", "tcell","tgfb","prolif", "pretreat", "tmb")
)

#### Helpers for fitting

In [151]:
best_fit <- function(Y, X, hyper_grid = parameter_grid, model, base_model = NULL){
    grid_fit(Y, X, hyper_grid, model, base_model)$best_model
}

### 2 - Build the five-factor model
- Build K models for each tissue

In [152]:
set.seed(62220)
K <- 1

In [153]:
dfs <- list()
for( i in unique(hmf$tissue)){
    dfs[[i]] <- hmf %>% filter(tissue == i)
}

### Overall Model

In [155]:
X <- as.matrix(hmf %>% select("tcell","tgfb","prolif", "pretreat", "tmb"))
pan_lr <- grid_fit(as.matrix(hmf$bor), X, parameter_grid, model = "lr")$best_model
pan_os <- grid_fit(as.matrix(hmf$os), X, parameter_grid, model = "os")$best_model

In [172]:
pred_lr <- as.data.frame(predict(pan_lr, X, predcontrib = TRUE)) %>% mutate(mod = "lr", tissue = "pan", patient_id = hmf$patient_id)
pred_os <- as.data.frame(predict(pan_os, X, predcontrib = TRUE)) %>% mutate(mod = "os", tissue = "pan", patient_id = hmf$patient_id)
pan_pred <- rbind(pred_lr, pred_os)

#### Tissue predictions

In [173]:
dfs <- list()
for( i in unique(hmf$tissue)){
    dfs[[i]] <- hmf %>% filter(tissue == i)
}

In [174]:
pred_maker <- function(i){
    df <- dfs[[i]]
    Y_bor <- as.matrix( df$bor )
    Y_os <- as.matrix(df$os)
    X <- as.matrix( df %>% select("tcell","tgfb","prolif", "pretreat", "tmb"))
    mod_lr <- grid_fit(Y_bor, X, parameter_grid, model = "lr", base_model = pan_lr)$best_model
    mod_os <- grid_fit(Y_os, X, parameter_grid, model = "os", base_model = pan_os)$best_model
    pred_lr <- as.data.frame(predict(mod_lr, X, predcontrib = TRUE)) %>% mutate(mod = "lr", tissue = i, patient_id = df$patient_id)
    pred_os <- as.data.frame(predict(mod_os, X, predcontrib = TRUE)) %>% mutate(mod = "os", tissue = i, patient_id = df$patient_id)
    rbind(pred_lr, pred_os) 
}

In [175]:
preds <- list()

In [176]:
for( i in unique(hmf$tissue)){
    print(i)
    preds[[i]] <- pred_maker(i)
}

[1] "lung"
[1] "skin"
[1] "other"
[1] "bladder"


In [178]:
vamonos <- rbind(do.call("rbind", preds), pan_pred)

### 3 - Send it

In [179]:
saveRDS( vamonos, paste0(TMP_DIR, "validation-hmf-preds-pdp.Rds"))