In [1]:
wd <- dirname(dirname(getwd()))
source(paste0(wd,"/mission_control/treasure_map.R"))
source(paste0(wd,"/mission_control/helpers/exhaustive_study/exhaustive_help.R"))
library(tidyverse)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.2     ✔ dplyr   1.0.6
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.5.1
“package ‘forcats’ was built under R version 3.6.3”── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


#### 0 - Read prepared CPI data

In [2]:
cpi <- readRDS(paste0(I_DIR, "cpi_go.Rds"))

In [5]:
table(cpi %>% select(contains("clinical")) %>% pull(clinical_cpi_mechanism))


multiple      PD1     PDL1 
      65      387       27 

#### 1 - Scale the data
- scale only numeric features, and avoid scaling response features
- scaling here over all the HMF data, instead of cohort specific

In [3]:
scale_the_data <- function( df ){
    col_types <- sapply( df, typeof )
    responses <- colnames(df %>% select(Y_best_response_binary, Y_best_response_time_in_days, Y_relapse, contains("Surv")))
    step1 <- col_types[which(col_types != "character")]
    step2 <- names(step1[-which(names(step1) %in% responses)])
    df %>% mutate_at( vars(all_of(step2)), ~ (scale(.) %>% as.vector))
}
cpi_scaled <- scale_the_data(cpi)

#### 2 - Create Data Store for Exhaustive Analysis
- Store dataframe to be used in exhaustive analyses
- Note data here has been scaled over all of HMF db in step above
- Specifying studies here

##### Unique treatments

In [4]:
studies <- list()

### overall
studies[['all']] <- cpi_scaled

### tissues
studies[['skin']]    <- cpi_scaled %>% filter(clinical_tumor_location_group == "skin")
studies[['lung']]    <- cpi_scaled %>% filter(clinical_tumor_location_group == "lung")
studies[['bladder']] <- cpi_scaled %>% filter(clinical_tumor_location_group == "bladder")
studies[['other']]   <- cpi_scaled %>% filter(clinical_tumor_location_group == "other")

In [5]:
get_ready <- function ( studies ) {

    response <- list(); survival <- list()

    for( i in names(studies)){
        response[[i]] <- studies[[i]] %>% filter(Filter_meta_responseMeasured == "Yes") %>% drop_na(Y_best_response_binary)
        survival[[i]] <- studies[[i]]   
    }
    ready = list()
    ready[["survival"]] = survival
    ready[["response"]] = response
    ready
}

In [6]:
go <- get_ready( studies )

### 2 - Send it!

In [7]:
saveRDS(go, paste0(TMP_DIR, "exhaustive-ready.Rds"))