In [2]:
wd <- dirname(dirname(dirname(dirname(getwd()))))
source(paste0(wd,"/mission_control/treasure_map.R"))
library(tidyverse)

I_DIR <- paste0(E_DIR, "/val_hebron/clinical/raw/")
O_DIR <- paste0(E_DIR, "/val_hebron/clinical/clean/")

### 0 - Read the raw data

In [3]:
a <- read.csv( paste0(I_DIR, "Machine_learning_models_IT_MSI_def_v1.csv"), 
               sep = ";",
               stringsAsFactors = FALSE)

### 1 - Clean and Prepare

#### Consolidate names

In [5]:
b <- 
a %>% transmute(
    patient_id = sap,
    clinical_age = Age.at.dx, 
    clinical_gender = gender, 
    biopsy_location,
    treatment = IT_tto, 
    bor = best_response_IT, 
    trt_line = IT_line,
    birth_date = birthdate,
    date_crc_diagnosis,
    biopsy_date, 
    trt_start_date = start_date, 
    trt_end_date = end_date,
    os_date = date_last_follow_up,
    os_status
)

#### Format Dates

In [6]:
nice_date <- function(i) tryCatch(as.Date(i, tryFormats = c("%m/%d/%y", "%d/%m/%y"), origin = "12/30/1924"))

In [7]:
for( i in c("trt_start_date", "os_date")){
    print(i)
    b[,paste0(i,"_t")] <- unlist(lapply(as.character(b[,i]), nice_date))
}

[1] "trt_start_date"
[1] "os_date"


In [8]:
b$os_days <- b$os_date_t - b$trt_start_date_t
c <- b %>% select(-contains("date"))

#### Maps 

In [9]:
c$clinical_biopsy_location <- ifelse(c$biopsy_location %in% c("colon", "", "colon(endoscopy)"), "colon", b$biopsy_location)
c$clinical_pretreat <- ifelse(c$trt_line == 1, 0, 1)
c$os_event <- ifelse(c$os_status == "Alive", 0, 1)
c$clinical_recist <- c$bor
c$bor <- ifelse(c$clinical_recist %in% c("CR", "PR"), 1, 0)

In [10]:
trt_map <- list(
    "atezolizumab" = "pdl",
    "avelumab" = "pdl",
    "anti-pdl" = "pdl",
    "bintrafusp" = "pdl",
    "durvalumab" = "pd",
    "pembrolizumab" = "pd", 
    "nivolumab" = "pd",
    "ipilimumab" = "ctla", 
    "regeneron" = "pd",
    "amg404" = "pd",
    "faz053" = "pdl"
)
mechanism_map <- function(i){
    if( grepl('atezolizumab',i) | grepl('faz053',i)){
        'pdl'
    } else {
        'pd'
    }
}
c$clinical_mechanism <-       unlist(lapply( tolower(c$treatment), function(i) mechanism_map(i))) 
c$clinical_tumor_location <- "colon"

### 2 - Select and Send it

In [11]:
d <- c %>% select(patient_id, contains("clinical"), bor, os_event, os_days, treatment)

In [12]:
saveRDS( d, paste0( O_DIR, "colon.Rds"))