In [None]:
library(tidyverse)

In [None]:
correlation_file_name <- "ci5_cs1e9_correlation"
# ci5_cs1e9_correlation
# all_germline_ci5_cs1e9_correlation 
# all_germline_filtered_bams_tumor_ci5_cs1e9_correlation
# de_novo_germline_reference_correlation

In [None]:
paramspace <- read.csv("data/metadata/paramspace_cfDNA_phaseI.csv")

In [None]:
patients <- as.character(unique(paramspace$pt_id))

In [None]:
patients

In [None]:
correlation_per_patient <- data.frame(matrix(ncol = 3, nrow = 0))

colnames(correlation_per_patient) <-c("pt_id", "cfDNA_sample", "correlation")

In [None]:
for (i in patients){
    correlation_csv <- read.csv(paste("data/", i, "/", correlation_file_name, ".csv", sep = ""))
    for (j in correlation_csv$cfDNA_sample){
        correlation_row <- correlation_csv %>% filter(cfDNA_sample == j)
        correlation <- correlation_row$Correlation
        correlation_per_patient[nrow(correlation_per_patient)+1,] = c(i, j, correlation)
                                    
}}

In [None]:
correlation_per_patient <- correlation_per_patient %>% 
  mutate(tmp_chunks = stringr::str_split(cfDNA_sample, stringr::fixed("_"),  n = 3)) %>%
  mutate(sampleID = map_chr(tmp_chunks, 1),
         sub_value = map_chr(tmp_chunks, 2), 
         sub_val = map_chr(tmp_chunks, 3)) %>%
  select(-c(tmp_chunks, sub_value, sub_val))

head(correlation_per_patient)
dim(correlation_per_patient)

In [None]:
sample_timepoint <- read.csv("data/metadata/clin_data/sample_timepoint_formatted.csv")
sample_timepoint_days <- sample_timepoint %>% filter(phase == "phaseI")
sample_timepoint_days <- sample_timepoint_days %>% select(sampleID, sample_timepoint_days_since_OP)

In [None]:
correlation_per_patient <- left_join(correlation_per_patient, sample_timepoint_days, by="sampleID")
head(correlation_per_patient)

In [None]:
phaseIpt_R <- read.csv("../phaseI_pt_R.csv", header=FALSE)
phaseIpt_R <- as.character(unlist(c(phaseIpt_R[1,])))

In [None]:
relapse_pt = phaseIpt_R

In [None]:
Correlation <- correlation_per_patient %>% mutate(Relapse = ifelse(pt_id %in% relapse_pt, "Relapse", "No_relapse"))
head(Correlation)

In [None]:
Correlation$correlation <- as.numeric(Correlation$correlation)

In [None]:
write.csv(Correlation, paste("data/", correlation_file_name, ".csv", sep = ""))

In [None]:
head(Correlation)

In [None]:
clinical_data <- read.csv("data/metadata/clin_data/clinical_data_formatted.csv")
clinical_data_relapse <- clinical_data %>% select(patient_id, time_to_relapse_days, adjuvant_chemo_start_days, adjuvant_chemo_end_days)
colnames(clinical_data_relapse) <- c("pt_id", "time_to_relapse_days", "adjuvant_chemo_start_days", "adjuvant_chemo_end_days")
head(clinical_data_relapse)

In [None]:
res <-left_join(Correlation, clinical_data_relapse, by="pt_id")

In [None]:
head(res)

In [None]:
normalized_res_intervention_relapses <- res %>% filter(Relapse == "Relapse", sample_timepoint_days_since_OP >= time_to_relapse_days)
normalized_res_intervention_pre_or_no_relapse1 <- res %>% group_by(pt_id) %>% filter(Relapse == "Relapse", sample_timepoint_days_since_OP >= time_to_relapse_days) %>% filter(sample_timepoint_days_since_OP == min(sample_timepoint_days_since_OP))

normalized_res_intervention_pre_or_no_relapse2 <- res %>% filter(Relapse == "No_relapse" | 
                                                                                        sample_timepoint_days_since_OP < time_to_relapse_days)

normalized_res_intervention_pre_or_no_relapse <- bind_rows(normalized_res_intervention_pre_or_no_relapse1, normalized_res_intervention_pre_or_no_relapse2)

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

In [None]:
combined_correlation_plot <- ggplot() + 
    geom_line(data = normalized_res_intervention_pre_or_no_relapse, aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse, group=pt_id)) + 
    geom_line(data = normalized_res_intervention_relapses, aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse, group=pt_id), linetype = "dashed") + 
    geom_point(data = res, aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse))+ theme(text = element_text(size = 20))  #+ geom_text(data = res, aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse, label=pt_id))


ggsave(paste("plotting_results/Correlation_ctDNA_tumor_counts/", correlation_file_name, ".png", sep=""),
  combined_correlation_plot, width = 12, height = 8, dpi = "print")

In [None]:
combined_correlation_plot

In [None]:
head(res)

In [None]:
readcounts <- read.csv("data/read_counts.csv")
readcounts <- readcounts %>% select(-pt_id)

In [None]:
readcounts <- left_join(res, readcounts, by="cfDNA_sample")

In [None]:
means_cor <- res %>% group_by(pt_id) %>% summarize(cor_mean = mean(correlation))
head(means_cor)

low_qual_sample <- read.table("../low_qual_sample.txt")
low_qual_sample <- as.character(unlist(c(low_qual_sample[1,])))

In [None]:
normalized_res_intervention_pre_or_no_relapse$pt_id <- as.character(normalized_res_intervention_pre_or_no_relapse$pt_id)
normalized_res_intervention_relapses$pt_id <- as.character(normalized_res_intervention_relapses$pt_id)
res$pt_id <- as.character(res$pt_id)

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

pt_id.labs <- c(paste(means_cor$pt_id, ",  mean cor: ", round(means_cor$cor_mean, 3), sep = ""))
names(pt_id.labs) <- c(as.character(means_cor$pt_id))

ggplot() + 
    geom_line(data = normalized_res_intervention_pre_or_no_relapse  %>% filter(pt_id != low_qual_sample), aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse)) + 
    geom_line(data = normalized_res_intervention_relapses  %>% filter(pt_id != low_qual_sample), aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse), linetype = "dashed") + 
    geom_point(data = res %>% filter(pt_id != low_qual_sample), aes(x = sample_timepoint_days_since_OP, y = correlation, color = Relapse))+ theme(text = element_text(size = 20)) +
    theme_minimal() +
    facet_wrap(~pt_id, labeller = labeller(pt_id = pt_id.labs), ncol = 2) +
    scale_color_manual(values=c('#00BFC4', '#F8766D'), labels = c("No relapse", "Relapse"))+
    xlab("cfDNA sample timepoint (days since surgery)") + 
    ylab("Correlation") + 
    theme(strip.text.x = element_text(size = 12),
          text = element_text(size = 12),
          legend.text=element_text(size=12))

In [None]:

phaseIpt_R <- read.csv("../phaseI_pt_R.csv", header=FALSE)
phaseIpt_R <- as.character(unlist(c(phaseIpt_R[1,])))
relapsing_pt <- phaseIpt_R

res %>% filter(pt_id %in% relapsing_pt) %>% summarize(mean_cor = mean(correlation))
res %>% filter(!(pt_id %in% relapsing_pt)) %>% filter(!(pt_id != low_qual_sample)) %>% summarize(mean_cor = mean(correlation))

In [None]:
normalized_res_intervention_pre_or_no_relapse %>% filter(pt_id %in% relapsing_pt) %>% 
    arrange(pt_id) %>% 
    group_by(pt_id) %>% 
    summarize(mean_cor = mean(correlation))

In [None]:
relapsing_pt2 <- phaseIpt_R

normalized_res_intervention_relapses_wo_min <- NULL
for (i in relapsing_pt){
    normalized_res_intervention_relapses_pt_min <- normalized_res_intervention_relapses %>% filter(pt_id == i) %>% 
        filter(sample_timepoint_days_since_OP != min(sample_timepoint_days_since_OP))
    normalized_res_intervention_relapses_wo_min <- rbind(normalized_res_intervention_relapses_wo_min, normalized_res_intervention_relapses_pt_min)
}

normalized_res_intervention_relapses_wo_min %>% filter(pt_id %in% relapsing_pt) %>% 
    arrange(pt_id) %>% 
    group_by(pt_id) %>% summarize(mean_cor = mean(correlation))

In [None]:
head(readcounts)

In [None]:
options(repr.plot.width=8, repr.plot.height=12)

ggplot() + 
    geom_line(data=readcounts, aes(x = sample_timepoint_days_since_OP, y = readcount, color = Relapse))+
    geom_point(data=readcounts, aes(x = sample_timepoint_days_since_OP, y = readcount, color = Relapse))+
    theme_minimal() +
    facet_wrap(~pt_id, ncol =1, scales = "free_y")