In [None]:
library(dplyr)
library(tibble)
library(tidyr)

rm(list=ls())

In [None]:
source(paste0("/data/repos/actin-personalization/scripts/ncr/ncr_data_exploration_functions.R"))
source(paste0("/data/repos/actin-personalization/scripts/ncr/ncr_patients_like_me_functions.R"))

In [None]:
ncr <- load_ncr_data_notebook()

In [None]:
## Write out initial treatment plan
ncr_lines_substance_prep <- ncr %>%
  dplyr::filter(tumgericht_ther==1) %>%
  select(c('key_nkr','key_zid','key_eid'),starts_with(c('syst_schemanum','syst_code'))) %>%
  pivot_longer(cols = starts_with("syst_schemanum"), names_to = "syst_schemanum_key", values_to = "syst_schemanum_value") %>%
  pivot_longer(cols = starts_with("syst_code"), names_to = "syst_code_key", values_to = "syst_code_value")

ncr_lines_substance <- ncr_lines_substance_prep %>%
  add_column(schemanum_code_value = as.numeric(gsub("\\D", "", ncr_lines_substance_prep$syst_schemanum_key)), .after = 2) %>%
  add_column(code_schemanum_value = as.numeric(gsub("\\D", "", ncr_lines_substance_prep$syst_code_key)), .after = 6) %>%
  distinct() %>%
  group_by(key_nkr, key_zid, key_eid) %>%
  do(concat_syst_code_values(.)) %>%
  ungroup()

ncr_lines_substance[] <- lapply(ncr_lines_substance, function(x) gsub("^c\\((.*)\\)$", "\\1", x))
ncr_lines_substance[ncr_lines_substance == "character(0)"] <- ""

ncr_lines_substance_written <- ncr_lines_substance %>%
  add_column(line1_written = sapply(ncr_lines_substance$line1, translate_atc)) %>%
  add_column(line2_written = sapply(ncr_lines_substance$line2, translate_atc)) %>%
  add_column(line3_written = sapply(ncr_lines_substance$line3, translate_atc)) %>%
  add_column(line4_written = sapply(ncr_lines_substance$line4, translate_atc)) %>%
  add_column(line5_written = sapply(ncr_lines_substance$line5, translate_atc)) %>%
  add_column(line6_written = sapply(ncr_lines_substance$line6, translate_atc)) %>%
  add_column(line7_written = sapply(ncr_lines_substance$line7, translate_atc)) %>%
  select(matches("key") | matches("written")) 

ncr_first_lines_summary <- ncr_lines_substance_written %>% dplyr::filter(line1_written != "") %>% group_by(line1_written) %>% summarise(count=n(), distinct_count_key_nkr=n_distinct(key_nkr))

ncr_lines_substance_written$key_nkr <- as.integer(ncr_lines_substance_written$key_nkr)
ncr_lines_substance_written$key_zid <- as.integer(ncr_lines_substance_written$key_zid)
ncr_lines_substance_written$key_eid <- as.integer(ncr_lines_substance_written$key_eid)

In [None]:
## Select start and stop interval for every part of the treatment plan, and merge with initial treatment plan and calculate durations
ncr_lines_start_prep <- ncr %>%
  dplyr::filter(tumgericht_ther==1) %>%
  select(c('key_nkr','key_zid'),starts_with(c('syst_schemanum','syst_start_int'))) %>%
  pivot_longer(cols = starts_with("syst_schemanum"), names_to = "syst_schemanum_key", values_to = "syst_schemanum_value") %>%
  pivot_longer(cols = starts_with("syst_start_int"), names_to = "syst_start_int_key", values_to = "syst_start_int_value")

ncr_lines_start <- ncr_lines_start_prep %>%
  add_column(schemanum_line_start_value = as.numeric(gsub("\\D", "", ncr_lines_start_prep$syst_schemanum_key)), .after = 2) %>%
  add_column(line_start_schemanum_value = as.numeric(gsub("\\D", "", ncr_lines_start_prep$syst_start_int_key)), .after = 6) %>%
  distinct() %>%
  group_by(key_nkr, key_zid) %>%
  do(concat_start_int_values(.)) %>%
  ungroup()

ncr_lines_start[] <- lapply(ncr_lines_start, function(x) gsub("^c\\((.*)\\)$", "\\1", x))
ncr_lines_start[ncr_lines_start == "integer(0)"] <- ""
ncr_lines_start$line_start_1 <- sapply(ncr_lines_start$line_start_1, extract_min)
ncr_lines_start$line_start_2 <- sapply(ncr_lines_start$line_start_2, extract_min)
ncr_lines_start$line_start_3 <- sapply(ncr_lines_start$line_start_3, extract_min)
ncr_lines_start$line_start_4 <- sapply(ncr_lines_start$line_start_4, extract_min)
ncr_lines_start$line_start_5 <- sapply(ncr_lines_start$line_start_5, extract_min)
ncr_lines_start$line_start_6 <- sapply(ncr_lines_start$line_start_6, extract_min)
ncr_lines_start$line_start_7 <- sapply(ncr_lines_start$line_start_7, extract_min)

ncr_lines_start$key_nkr <- as.integer(ncr_lines_start$key_nkr)
ncr_lines_start$key_zid <- as.integer(ncr_lines_start$key_zid)

ncr_lines_stop_prep <- ncr %>%
  dplyr::filter(tumgericht_ther==1) %>%
  select(c('key_nkr','key_zid'),starts_with(c('syst_schemanum','syst_stop_int'))) %>%
  pivot_longer(cols = starts_with("syst_schemanum"), names_to = "syst_schemanum_key", values_to = "syst_schemanum_value") %>%
  pivot_longer(cols = starts_with("syst_stop_int"), names_to = "syst_stop_int_key", values_to = "syst_stop_int_value")

ncr_lines_stop <- ncr_lines_stop_prep %>%
  add_column(schemanum_line_stop_value = as.numeric(gsub("\\D", "", ncr_lines_stop_prep$syst_schemanum_key)), .after = 2) %>%
  add_column(line_stop_schemanum_value = as.numeric(gsub("\\D", "", ncr_lines_stop_prep$syst_stop_int_key)), .after = 6) %>%
  distinct() %>%
  group_by(key_nkr, key_zid) %>%
  do(concat_stop_int_values(.)) %>%
  ungroup()

ncr_lines_stop[] <- lapply(ncr_lines_stop, function(x) gsub("^c\\((.*)\\)$", "\\1", x))
ncr_lines_stop[ncr_lines_stop == "integer(0)"] <- ""
ncr_lines_stop$line_stop_1 <- sapply(ncr_lines_stop$line_stop_1, extract_max)
ncr_lines_stop$line_stop_2 <- sapply(ncr_lines_stop$line_stop_2, extract_max)
ncr_lines_stop$line_stop_3 <- sapply(ncr_lines_stop$line_stop_3, extract_max)
ncr_lines_stop$line_stop_4 <- sapply(ncr_lines_stop$line_stop_4, extract_max)
ncr_lines_stop$line_stop_5 <- sapply(ncr_lines_stop$line_stop_5, extract_max)
ncr_lines_stop$line_stop_6 <- sapply(ncr_lines_stop$line_stop_6, extract_max)
ncr_lines_stop$line_stop_7 <- sapply(ncr_lines_stop$line_stop_7, extract_max)

ncr_lines_stop$key_nkr <- as.integer(ncr_lines_stop$key_nkr)
ncr_lines_stop$key_zid <- as.integer(ncr_lines_stop$key_zid)

ncr_lines_details <- inner_join(ncr_lines_substance_written,ncr_lines_start, by=c('key_nkr','key_zid')) %>%
  inner_join(ncr_lines_stop, by=c('key_nkr','key_zid'))
  
ncr_lines_details <- ncr_lines_details %>%
  add_column(line_duration_1 = as.integer(ifelse(ncr_lines_details$line_stop_1 == "","", as.integer(ncr_lines_details$line_stop_1)-as.integer(ncr_lines_details$line_start_1)))) %>%
  add_column(line_duration_2 = as.integer(ifelse(ncr_lines_details$line_stop_2 == "","", as.integer(ncr_lines_details$line_stop_2)-as.integer(ncr_lines_details$line_start_2)))) %>%
  add_column(line_duration_3 = as.integer(ifelse(ncr_lines_details$line_stop_3 == "","", as.integer(ncr_lines_details$line_stop_3)-as.integer(ncr_lines_details$line_start_3)))) %>%
  add_column(line_duration_4 = as.integer(ifelse(ncr_lines_details$line_stop_4 == "","", as.integer(ncr_lines_details$line_stop_4)-as.integer(ncr_lines_details$line_start_4)))) %>%
  add_column(line_duration_5 = as.integer(ifelse(ncr_lines_details$line_stop_5 == "","", as.integer(ncr_lines_details$line_stop_5)-as.integer(ncr_lines_details$line_start_5)))) %>%
  add_column(line_duration_6 = as.integer(ifelse(ncr_lines_details$line_stop_6 == "","", as.integer(ncr_lines_details$line_stop_6)-as.integer(ncr_lines_details$line_start_6)))) %>%
  add_column(line_duration_7 = as.integer(ifelse(ncr_lines_details$line_stop_7 == "","", as.integer(ncr_lines_details$line_stop_7)-as.integer(ncr_lines_details$line_start_7))))

In [None]:
## Find similar patients and what type of treatment they received
### <-- Try some random patient details
age <- 78
who <- 0
patient_has_had_surgery <- 1

ncr_similar_out <- find_similar_patients_2(ncr_ref_data = ncr, patient_age = age, patient_who = who, patient_has_had_surgery= patient_has_had_surgery)
ncr_similar_out_with_treatment <- inner_join(ncr_similar_out, ncr_lines_details, by=c('key_nkr','key_zid')))