In [None]:
library(tidyverse)
library(stringr) 
library(plotly)

In [None]:
##############################################################################

##########################     INPUT PARAMETERS    ###########################

##############################################################################



# filetype = # "ratio_all_germline_singletons_excluded_tumor_4_cs" # # "ratio_singletons_excluded_tumor_4_cs" # "ratio" 
filetype = "ratio"

# input_folder =   "ci5_cs1e9" # "all_germline_ci5_cs1e9" # "all_germline_filtered_bams_tumor_ci5_cs1e9" # "all_germline_filtered_bams_tumor_subtract_other_tumor_ci5_cs1e9", all_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA
# all_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA # de_novo_germline # all_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA_subtracted_kmers_not_seen
# all_germline_filtered_bams_tumor_ci5_cs1e9_well_mapping_subtracted # de_novo_germline # de_novo_germline_filtered_cfDNA # de_novo_germline_reference
input_folder = "de_novo_germline_reference"

# output_folder =  "ci5_cs1e9" # "union_germline_ci4_cs1e9" # "union_germline_filtered_tumor_bams_ci5_cs1e9" # "union_germline_filtered_tumor_bams_subtract_other_tumors_ci5_cs1e9"
# union_germline_filtered_tumor_bams_ci5_cs1e9_filtered_cfDNA # de_novo_germline # union_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA_subtracted_kmers_not_seen
# union_germline_filtered_tumor_ci5_cs1e9_well_mapping_subtracted # de_novo_germline # de_novo_germline_filtered_cfDNA # de_novo_germline_reference
output_folder = "de_novo_germline_reference"

low_qual_sample <- read.table("../low_qual_sample.txt")
low_qual_sample <- as.character(low_qual_sample[[1]])

phaseIpt <- read.csv("../phaseI_pt.csv", header=FALSE)
phaseIpt <- as.character(unlist(c(phaseIpt[1,])))

In [None]:
paramspace = read.csv("../data/metadata/paramspace_cfDNA_phaseI.csv")

paramspace  <- paramspace %>% rename(patient_id = pt_id)
head(paramspace)

In [None]:
SAMPLES = paramspace["patient_id"]
FOLDERS = paramspace["cfDNA_folder"]

In [None]:
nrow(paramspace)

In [None]:
paramspace["big_ratio"] <- NA
paramspace["big_ratio_CI_lower"] <- NA
paramspace["big_ratio_CI_upper"] <- NA

paramspace["small_ratio"] <- NA
paramspace["small_ratio_CI_lower"] <- NA
paramspace["small_ratio_CI_upper"] <- NA

#paramspace["jaccard"] <- NA
#paramspace["jaccard_CI_lower"] <- NA
#paramspace["jaccard_CI_upper"] <- NA

for (row in 1:nrow(paramspace)){
    pt = paramspace[row, "patient_id"]
    #print(pt)
    fd = paramspace[row, "cfDNA_folder"]
    big_r = read.csv(paste("../data/", pt, "/" , fd, "/", input_folder, "/", "big_", filetype, ".csv", sep = ""))
    small_r = read.csv(paste("../data/", pt, "/", fd, "/", input_folder, "/", "small_", filetype, ".csv", sep = ""))
    #print(big_r[[1]])
    #print(small_r[[1]])
    paramspace[row, "big_ratio"] = big_r$ratio
    paramspace[row, "big_ratio_CI_lower"] = big_r$lower_CI
    paramspace[row, "big_ratio_CI_upper"] = big_r$upper_CI
    
    paramspace[row, "small_ratio"] = small_r$ratio
    paramspace[row, "small_ratio_CI_lower"] = small_r$lower_CI
    paramspace[row, "small_ratio_CI_upper"] = small_r$upper_CI
}

In [None]:
res <- paramspace
head(res)

In [None]:
#res <- res %>% separate(cfDNA_folder, c("sampleID", "sample_type", "sample_number"))
res <- res %>% 
  mutate(tmp_chunks = stringr::str_split(cfDNA_folder, stringr::fixed("_"),  n = 3)) %>%
  mutate(sampleID = map_chr(tmp_chunks, 1),
         sub_value = map_chr(tmp_chunks, 2), 
         sub_val = map_chr(tmp_chunks, 3)) %>%
  select(-c(tmp_chunks, sub_value, sub_val))

head(res)
dim(res)

In [None]:
sample_timepoint <- read.csv("../data/metadata/clin_data/sample_timepoint_formatted.csv")

In [None]:
head(sample_timepoint)
sample_timepoint_days <- sample_timepoint %>% filter(phase == "phaseI")
sample_timepoint_days <- sample_timepoint_days %>% select(sampleID, sample_timepoint_days_since_OP)
sample_timepoint_days %>% filter(is.na(sample_timepoint_days_since_OP))
dim(sample_timepoint_days)
head(sample_timepoint_days)

In [None]:
res$sampleID <- as.character(res$sampleID)
res <- left_join(res, sample_timepoint_days, by = c("sampleID"))
head(res)
dim(res)

In [None]:
res %>% filter(is.na(sample_timepoint_days_since_OP))

In [None]:
clinical_data <- read.csv("../data/metadata/clin_data/clinical_data_formatted.csv")
clinical_data_relapse <- clinical_data %>% select(patient_id, time_to_relapse_days, adjuvant_chemo_start_days, adjuvant_chemo_end_days)
head(clinical_data_relapse)

In [None]:
res <-left_join(res, clinical_data_relapse, by="patient_id")
head(res)
dim(res)

In [None]:
interventions = read.csv("../data/metadata/clin_data/intervention_formatted.csv")

intervention_chemo = interventions %>% filter(intervention_type %in% c(4, 5, 12))
intervention_other = interventions %>% filter(!intervention_type %in% c(4, 5, 12))

intervention_chemo  <- intervention_chemo %>% rename(intervention_chemo_start_days = intervention_start_days)
intervention_chemo  <- intervention_chemo %>% rename(intervention_chemo_end_days = intervention_end_days)
intervention_other  <- intervention_other %>% rename(intervention_other_start_days = intervention_start_days)
intervention_other  <- intervention_other %>% rename(intervention_other_end_days = intervention_end_days)

intervention_chemo <- intervention_chemo %>% select(patient_id, intervention_chemo_start_days, intervention_chemo_end_days)
intervention_other <- intervention_other %>% select(patient_id, intervention_other_start_days, intervention_other_end_days)

phase_I_pts = phaseIpt
intervention_chemo <- intervention_chemo %>% filter(patient_id %in% phase_I_pts)
intervention_other <- intervention_other %>% filter(patient_id %in% phase_I_pts)


head(intervention_chemo)
head(intervention_other)


dim(intervention_chemo)
dim(intervention_other)

In [None]:
res_intervention <- full_join(res, intervention_chemo, by = c("patient_id"))
res_intervention <- full_join(res_intervention, intervention_other, by = c("patient_id"))
head(res_intervention)
dim(res_intervention)

In [None]:
write.csv(res_intervention, "../results_phaseI.csv")

In [None]:
means <- tibble(patient_id = as.character(), 
                big_ratio_min = as.numeric(), 
                big_ratio_max = as.numeric(), 
                #jaccard_min = as.numeric(), 
                #jaccard_max = as.numeric(), 
                small_ratio_min = as.numeric(), 
                small_ratio_max = as.numeric())

for (ind in phase_I_pts){
    res_ind <- res_intervention %>% filter(patient_id == ind)
    ind_big_r_max <- max(res_ind$big_ratio)
    ind_big_r_min <- min(res_ind$big_ratio)
    #ind_jaccard_max <- max(res_ind$jaccard)
    #ind_jaccard_min <- min(res_ind$jaccard)
    ind_small_r_max <- max(res_ind$small_ratio)
    ind_small_r_min <- min(res_ind$small_ratio)
    
    row <- tibble(patient_id = ind, big_ratio_min = ind_big_r_min, big_ratio_max = ind_big_r_max, small_ratio_min = ind_small_r_min, small_ratio_max = ind_small_r_max)
    #big_upper_ratio_min = ind_big_upper_r_min, big_upper_ratio_max = ind_big_upper_r_max,
    
    means <- rbind(means, row)
    
}

head(means)
res_intervention <- left_join(res_intervention, means, by = c("patient_id"))
head(res_intervention)

In [None]:
pt_unique = res_intervention %>% distinct(patient_id) %>% select(patient_id)
pt_unique_v = pt_unique$patient_id

plots_big_r <- NULL
#plots_big_upper_r <- NULL
plots_small_r <- NULL

#res_pt = res %>% filter(pt_id == pt_unique_v[pt])
    
p_bigr <- ggplot() + 
                geom_ribbon(data = res_intervention, aes(x = sample_timepoint_days_since_OP, ymin = big_ratio_CI_lower, ymax = big_ratio_CI_upper), alpha = 0.1, color="grey") +
                geom_line(data = res_intervention, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = big_ratio)) + 
                geom_point(data = res_intervention, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = big_ratio)) +
                #ggtitle(paste("Big ratio cs1e9, ", pt_unique_v[pt], sep = "")) + 
                xlab("Days") +
                ylab("TF (big_ratio)") +
                 
                
                
                geom_vline(data = res_intervention, aes(xintercept=0), color= "firebrick4") +                                 # Surgery
                geom_vline(data = res_intervention, aes(xintercept=time_to_relapse_days), color = "steelblue4", size = 1) +   # Relapse
                
                # adjuvant chemo
                geom_rect(data = res_intervention,  
                         aes(xmin = adjuvant_chemo_start_days, xmax = adjuvant_chemo_end_days, ymin = big_ratio_min, ymax = big_ratio_max), 
                             fill = "cadetblue4", alpha = 0.01) + 

                # chemo interventions
                geom_rect(data = res_intervention,
                         aes(xmin = intervention_chemo_start_days, xmax = intervention_chemo_end_days, ymin = big_ratio_min, ymax = big_ratio_max), 
                             fill = "coral1", alpha = 0.01) + 
                
                 ## other interventions
                geom_vline(data = res_intervention,
                           aes(xintercept = intervention_other_start_days), color = "coral1", size = 1, alpha = 0.5) +

                #geom_text(data = res_intervention, aes(x = sample_timepoint_days_since_OP, y = big_ratio, label =cfDNA_folder))+

                #scale_color_manual(name='Legend',
                #     breaks=c("Surgery", "Relapse", "Intervention"),
                #     values=c("Surgery"="firebrick4", "Relapse"="steelblue4", "Intervention"="steelblue2"))+
                
                theme_minimal() +
                facet_wrap(~res_intervention$patient_id, ncol = 1, scales = "free_y")
    
    

options(repr.plot.width=12, repr.plot.height=90)
p_bigr


    

In [None]:
ggsave(
  paste("../plotting_results/", output_folder, "/big_plots/Big_", filetype, ".png", sep = ""),
  p_bigr, width = 13, height = 50, dpi = "print", limitsize = FALSE
    
)

In [None]:
#p_big_upper_r <- ggplot() + 
#                geom_ribbon(data = res_intervention, aes(x = sample_timepoint_days_since_OP, ymin = big_upper_ratio_CI_lower, ymax = big_upper_ratio_CI_upper), alpha = 0.1, color="grey") +
#                geom_line(data = res_intervention, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = big_upper_ratio)) + 
#                geom_point(data = res_intervention, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = big_upper_ratio)) +
#                #ggtitle(paste("Big ratio cs1e9, ", pt_unique_v[pt], sep = "")) + 
#                xlab("Days") +
#                ylab("TF (big_upper_ratio)") +
                 
                
                
#                geom_vline(data = res_intervention, aes(xintercept=0), color= "firebrick4") +                                 # Surgery
#                geom_vline(data = res_intervention, aes(xintercept=time_to_relapse_days), color = "steelblue4", size = 1) +   # Relapse
                
                # adjuvant chemo
#                geom_rect(data = res_intervention,  
#                         aes(xmin = adjuvant_chemo_start_days, xmax = adjuvant_chemo_end_days, ymin = big_upper_ratio_min, ymax = big_upper_ratio_max), 
#                             fill = "cadetblue4", alpha = 0.01) + 

                # chemo interventions
#                geom_rect(data = res_intervention,
#                         aes(xmin = intervention_chemo_start_days, xmax = intervention_chemo_end_days, ymin = big_upper_ratio_min, ymax = big_upper_ratio_max), 
#                             fill = "coral1", alpha = 0.01) + 
                
                 ## other interventions
#                geom_vline(data = res_intervention,
#                           aes(xintercept = intervention_other_start_days), color = "coral1", size = 1, alpha = 0.5) +
                #scale_color_manual(name='Legend',
                #     breaks=c("Surgery", "Relapse", "Intervention"),
                #     values=c("Surgery"="firebrick4", "Relapse"="steelblue4", "Intervention"="steelblue2"))+
                
#                theme_minimal() +
#                facet_wrap(~res_intervention$patient_id, ncol = 1, scales = "free_y")
    
    

#options(repr.plot.width=12, repr.plot.height=90)
#p_big_upper_r

In [None]:
#ggsave(
#  paste("../plotting_results/", output_folder, "/big_plots/Big_upper_", filetype, ".png", sep = ""),
#  p_big_upper_r, width = 13, height = 50, dpi = "print", limitsize = FALSE)

In [None]:
res_all <- res_intervention %>% select(-c("intervention_chemo_start_days", "intervention_chemo_end_days", "intervention_other_start_days", "intervention_other_end_days")) %>% distinct()
dim(res_all)
head(res_all)

In [None]:
res_all_min_max_val <- res_all %>% select(patient_id, small_ratio_min, small_ratio_max)

In [None]:
intervention_chemo <- left_join(intervention_chemo, res_all_min_max_val, by = "patient_id")
intervention_other <- left_join(intervention_other, res_all_min_max_val, by = "patient_id")

In [None]:
low_qual_sample <- read.table("../low_qual_sample.txt")
low_qual_sample <- as.character(low_qual_sample[[1]])

In [None]:
res_all <- res_all %>% filter(patient_id != low_qual_sample)
intervention_chemo <- intervention_chemo %>% filter(patient_id != low_qual_sample)
intervention_other <- intervention_other %>% filter(patient_id != low_qual_sample)

In [None]:
patients = as.character(unique(res_all$patient_id))
normalized_res_intervention <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(normalized_res_intervention) <- c("patient_id", "cfDNA_folder", "small_ratio", "sample_timepoint_days_since_OP", "time_to_relapse_days","small_ratio_normalized_preop", "small_ratio_normalized_postop")
res_all_normalized <- NULL

for (i in patients){
    res_pt <- res_all %>% filter(patient_id == i)
    
    first_sample <- min(res_pt$sample_timepoint_days_since_OP)
    without_first_sample <- res_pt %>% filter(sample_timepoint_days_since_OP != first_sample)
    post_op_sample <- min(without_first_sample$sample_timepoint_days_since_OP)
    
    pre_val_small_pre <- res_pt$small_ratio[which(res_pt$sample_timepoint_days_since_OP == first_sample)]
    pre_val_small_post <- res_pt$small_ratio[which(res_pt$sample_timepoint_days_since_OP == post_op_sample)]
    #print(pre_val_small_pre)
    #print(pre_val_small_post)
    res_pt <- res_pt %>% mutate(small_ratio_normalized_preop = small_ratio/pre_val_small_pre, small_ratio_normalized_postop = small_ratio/pre_val_small_post)
    res_all_normalized <- rbind(res_all_normalized, res_pt)   
}

res_all_normalized <- res_all_normalized %>% mutate(small_ratio_normalized_postop_log = log10(small_ratio_normalized_postop), relapse_status_pred = ifelse(small_ratio_normalized_postop_log > 0.5, "D", "ND"))
head(res_all_normalized)

In [None]:
## Read in c2i res
c2i_res <- read.csv("../data/metadata/clin_data/c2i_res.csv")
c2i_res$Sample.ID <- as.character(c2i_res$Sample.ID)
c2i_res$C2.Test <- as.character(c2i_res$C2.Test)
c2i_res <- c2i_res %>% mutate(patient_id = paste0("C0", Subject.ID), sampleID = substr(Sample.ID,1,nchar(Sample.ID)-6)) %>%
    select(patient_id, sampleID, C2.Test, Tumor.Fraction)
c2i_res$patient_id <- as.character(c2i_res$patient_id) 
c2i_res <- c2i_res %>% filter(patient_id != low_qual_sample)

c2i_res <- c2i_res %>% filter(patient_id %in% unique(paramspace$patient_id))
c2i_res <- left_join(c2i_res, sample_timepoint_days, by = c("sampleID")) 
c2i_res <- c2i_res %>% filter(!is.na(sample_timepoint_days_since_OP))
head(c2i_res)
dim(c2i_res)

c2i_res <-left_join(c2i_res, clinical_data_relapse, by="patient_id")
head(c2i_res)
dim(c2i_res)

In [None]:
meansc2i <- tibble(patient_id = as.character(), 
                TF_min = as.numeric(), 
                TF_max = as.numeric())

for (ind in unique(c2i_res$patient_id)){
    res_ind <- c2i_res %>% filter(patient_id == ind)
    ind_TF_max <- max(res_ind$Tumor.Fraction)
    ind_TF_min <- min(res_ind$Tumor.Fraction)
    
    row <- tibble(patient_id = ind, TF_min = ind_TF_min, TF_max = ind_TF_max)
    #big_upper_ratio_min = ind_big_upper_r_min, big_upper_ratio_max = ind_big_upper_r_max,
    
    meansc2i <- rbind(meansc2i, row)
    
}

head(means)
c2i_res <- left_join(c2i_res, meansc2i, by = c("patient_id"))
head(c2i_res)

In [None]:
res_c2i_min_max_val <- c2i_res %>% select(patient_id, TF_min, TF_max)
intervention_chemo_c2i <- left_join(intervention_chemo, res_c2i_min_max_val, by = "patient_id")
intervention_other_c2i <- left_join(intervention_other, res_c2i_min_max_val, by = "patient_id")

# plotting

In [None]:
options(repr.plot.width=9, repr.plot.height=10)
options(warn=-1)
p_smallr <- ggplot(data = NULL) + 
                #geom_ribbon(data = res_all, aes(x = sample_timepoint_days_since_OP, ymin = small_ratio_CI_lower, ymax = small_ratio_CI_upper), alpha = 0.2, color="grey") + 
                geom_line(data = res_all_normalized, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = small_ratio), size = 0.7) + 
                geom_point(data = res_all_normalized, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = small_ratio, color = relapse_status_pred), size = 1.5) +
                scale_color_manual(values=c('#FF0000','#00008B'))+
#ggtitle(paste("Big ratio cs1e9, ", pt_unique_v[pt], sep = "")) + 
                xlab("Sample time point (days since surgery)") +
                ylab("UT kmers in cfDNA / UT kmers") +

                ## plot c2i
                #geom_line(data = c2i_res, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = Tumor.Fraction), color = "lightgrey") + 
                #geom_point(data = c2i_res, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = Tumor.Fraction, color = C2.Test)) +
                
                geom_vline(data = res_all_normalized, aes(xintercept=0), color= "firebrick4", size = 1.1) +                                 # Surgery
                geom_vline(data = res_all_normalized, aes(xintercept=time_to_relapse_days), color = "steelblue4", size = 1.1) +   # Relapse
                
                # adjuvant chemo
                geom_rect(data = res_all_normalized,  
                         aes(xmin = adjuvant_chemo_start_days, xmax = adjuvant_chemo_end_days, ymin = small_ratio_min, ymax = small_ratio_max), 
                             fill = "cadetblue4", alpha = 0.01) + 

                # chemo interventions
                geom_rect(data = intervention_chemo,
                         aes(xmin = intervention_chemo_start_days, xmax = intervention_chemo_end_days, ymin = small_ratio_min, ymax = small_ratio_max), 
                             fill = "coral1", alpha = 0.01) + 
                
                 ## other interventions
                geom_vline(data = intervention_other,
                           aes(xintercept = intervention_other_start_days), color = "coral1", size = 1.1, alpha = 0.5) +
                #scale_color_manual(name='Legend',
                #     breaks=c("Surgery", "Relapse", "Intervention"),
                #     values=c("Surgery"="firebrick4", "Relapse"="steelblue4", "Intervention"="steelblue2"))+
                
                theme_minimal() +
                theme(text = element_text(size = 16),
                      legend.position="none") +
                facet_wrap(~patient_id, ncol = 2, scales = "free_y")

p_smallr
                     

#intervention_chemo
#intervention_other
   
    

In [None]:
p_smallr <- ggplot(data = NULL) + 
                #geom_ribbon(data = res_all, aes(x = sample_timepoint_days_since_OP, ymin = small_ratio_CI_lower, ymax = small_ratio_CI_upper), alpha = 0.2, color="grey") + 
                #geom_line(data = res_all_normalized, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = small_ratio)) + 
                #geom_point(data = res_all_normalized, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = small_ratio, color = relapse_status_pred)) +
                #ggtitle(paste("Big ratio cs1e9, ", pt_unique_v[pt], sep = "")) + 
                xlab("Sample time point (days since surgery)") +
                ylab("UT kmers in cfDNA / UT kmers") +

                ## plot c2i
                geom_line(data = c2i_res, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = Tumor.Fraction), color = "black") + 
                geom_point(data = c2i_res, aes(x = as.numeric(as.character(sample_timepoint_days_since_OP)), y = Tumor.Fraction, color = C2.Test)) +
                
                geom_vline(data = c2i_res, aes(xintercept=0), color= "firebrick4") +                                 # Surgery
                geom_vline(data = c2i_res, aes(xintercept=time_to_relapse_days), color = "steelblue4", size = 1) +   # Relapse
                
                # adjuvant chemo
                geom_rect(data = c2i_res,  
                         aes(xmin = adjuvant_chemo_start_days, xmax = adjuvant_chemo_end_days, ymin = TF_min, ymax = TF_max), 
                             fill = "cadetblue4", alpha = 0.01) + 

                # chemo interventions
                geom_rect(data = intervention_chemo_c2i,
                         aes(xmin = intervention_chemo_start_days, xmax = intervention_chemo_end_days,  ymin = TF_min, ymax = TF_max), 
                             fill = "coral1", alpha = 0.01) + 
                
                 ## other interventions
                geom_vline(data = intervention_other_c2i,
                           aes(xintercept = intervention_other_start_days), color = "coral1", size = 1, alpha = 0.5) +
                #scale_color_manual(name='Legend',
                #     breaks=c("Surgery", "Relapse", "Intervention"),
                #     values=c("Surgery"="firebrick4", "Relapse"="steelblue4", "Intervention"="steelblue2"))+
                
                theme_minimal() +
                theme(text = element_text(size = 16),
                      legend.position="none") +
                facet_wrap(~patient_id, ncol = 2, scales = "free_y")

p_smallr
                     


In [None]:
ggsave(
  paste("../plotting_results/", output_folder, "/big_plots/Small_", filetype, ".png", sep = ""),
  p_smallr, width = 13, height = 50, dpi = "print", limitsize = FALSE
    
)