In [None]:
wd <- dirname(dirname(dirname(dirname(getwd()))))
source(paste0(wd,"/mission_control/treasure_map.R"))
library(tidyverse)

I_DIR <- paste0(E_DIR, "/val_hebron/clinical/raw/")
O_DIR <- paste0(E_DIR, "/val_hebron/clinical/clean/")

### 0 - Read raw data

In [None]:
a <- read.csv( paste0(I_DIR, "llistat_pacients_mol_profiled_bladder.csv"), 
               sep = ";",
               stringsAsFactors = FALSE)

### 1 - Cleaning

In [None]:
b <- a %>% transmute( 
            patient_id = NHC, 
            birth_date = Fecha.Nacimiento,
            biopsy_date = Fecha.biopsia, 
            biopsy_location = Localización.biopsia,
            tumor_location = Localización.primario.Tumor,
            tumor_type = Tipo.de.tumor.primario, 
            trt1_start_date = Fecha.inicio.Tto,
            trt1_end_date = Fecha.fin.Tto,
            trt1_stop_reason = Suspensión.tratamiento, 
            trt1 = Esquema.Tto, 
            recist = RECIST,
            recist_date = Fecha.RECIST, 
            last_date = Fecha.último.seguimiento...defunción,
            os_event = Status
    ) %>% filter(!grepl("general", recist_date))

#### Format Dates

In [None]:
nice_date <- function(i) as.Date(i, tryFormats = c("%d.%m.%Y", "%d.%m.%y"))

In [None]:
for( i in names( b %>% select(contains("date")) )){
    b[,paste0(i,"_t")] <- unlist(lapply(as.character(b[,i]), nice_date))
}

In [None]:
c <- b %>% mutate(   
                clinical_age = round((trt1_start_date_t - birth_date_t)/365),
                os_days = last_date_t - trt1_start_date_t, 
                biopsy_vs_treat = biopsy_date_t - trt1_start_date_t
        )

#### Maps

In [None]:
trt_map <- list(
    "atezolizumab" = "pdl",
    "avelumab" = "pdl",
    "anti-pdl" = "pdl",
    "bintrafusp" = "pdl",
    "durvalumab" = "pd",
    "pembrolizumab" = "pd"
)
location_map <- list(
    "vejiga" = "bladder",
    "colón" = "colon",
    "ureter" = "ureter",
    "adenopatía" = "adenopathy",
    "hepatica" = "liver",
    "hígado" = "liver",
    "pared abdominal" = "abdominal_wall"
)
recist_map <- list(
    "4" = "UK",
    "3" = "PD",
    "2" = "SD",
    "1" = "PR",
    "0" = "CR"
)
bor_map <- list(
    "4" = NA,
    "3" = 0,
    "2" = 0,
    "1" = 1,
    "0" = 1
)
mechanism_map <- function(i){
    if( grepl('atezolizumab',i) | grepl('avelumab',i) | grepl('anti-pdl',i) | grepl('bintrafusp',i)){
        'pdl'
    } else {
        'pd'
    }
}

In [None]:
c$clinical_tumor_location <-  unlist(lapply(c$tumor_location, function(i) location_map[[i]]))
c$clinical_biopsy_location <- unlist(lapply(c$biopsy_location, function(i) location_map[[i]]))
c$clinical_mechanism <-       unlist(lapply( tolower(c$trt1), function(i) mechanism_map(i)))                                      
c$clinical_recist <-          unlist(lapply( as.character(c$recist), function(i) recist_map[[i]]))
c$bor <-                      unlist(lapply( as.character(c$recist), function(i) bor_map[[i]]))
c$clinical_gender <- NA
c$clinical_pretreat <- NA
c$treatment <- c$trt1

### 2 - Clean and Send it

In [None]:
d <- c %>% select(patient_id, treatment, contains("clinical"), bor, os_event, os_days)

In [None]:
saveRDS( d, paste0( O_DIR, "bladder.Rds"))