In [None]:
library(tidyverse)
library(ROCR)

In [None]:
paramspace = read.csv("../data/metadata/paramspace_cfDNA_phaseI.csv")

paramspace  <- paramspace %>% rename(patient_id = pt_id)
head(paramspace)

In [None]:
nrow(paramspace)

low_qual_sample <- read.table("../low_qual_sample.txt")
low_qual_sample <- as.character(low_qual_sample[[1]])

In [None]:
#filetype = # "ratio_cs1e9" # "ratio_singletons_excluded_tumor_cs" # "ratio_singletons_excluded_tumor_4_cs"
filetype = "ratio"

#input_folder = all_germline_filtered_bams_tumor_ci5_cs1e9 # all_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA # de_novo_germline # de_novo_germline_filtered_cfDNA
input_folder = "de_novo_germline_filtered_cfDNA"

paramspace["big_ratio"] <- NA
paramspace["big_ratio_CI_lower"] <- NA
paramspace["big_ratio_CI_upper"] <- NA

paramspace["small_ratio"] <- NA
paramspace["small_ratio_CI_lower"] <- NA
paramspace["small_ratio_CI_upper"] <- NA

for (row in 1:nrow(paramspace)){
    pt = paramspace[row, "patient_id"]
    #print(pt)
    fd = paramspace[row, "cfDNA_folder"]
    big_r = read.csv(paste("../data/", pt, "/" , fd, "/", input_folder, "/big_", filetype, ".csv", sep = ""))
    small_r = read.csv(paste("../data/", pt, "/", fd, "/", input_folder, "/small_", filetype, ".csv", sep = ""))
    #print(big_r[[1]])
    #print(small_r[[1]])
    paramspace[row, "big_ratio"] = big_r$ratio
    paramspace[row, "big_ratio_CI_lower"] = big_r$lower_CI
    paramspace[row, "big_ratio_CI_upper"] = big_r$upper_CI
    
    paramspace[row, "small_ratio"] = small_r$ratio
    paramspace[row, "small_ratio_CI_lower"] = small_r$lower_CI
    paramspace[row, "small_ratio_CI_upper"] = small_r$upper_CI
}

In [None]:
res <- paramspace
head(res)

In [None]:
#res <- res %>% separate(cfDNA_folder, c("sampleID", "sample_type", "sample_number"))
res <- res %>% 
  mutate(tmp_chunks = stringr::str_split(cfDNA_folder, stringr::fixed("_"),  n = 3)) %>%
  mutate(sampleID = map_chr(tmp_chunks, 1),
         sub_value = map_chr(tmp_chunks, 2), 
         sub_val = map_chr(tmp_chunks, 3)) %>%
  select(-c(tmp_chunks, sub_value, sub_val))

head(res)
dim(res)

In [None]:
sample_timepoint <- read.csv("../data/metadata/clin_data/sample_timepoint_formatted.csv")

In [None]:
head(sample_timepoint)
sample_timepoint_days <- sample_timepoint %>% filter(phase == "phaseI")
sample_timepoint_days <- sample_timepoint_days %>% select(sampleID, sample_timepoint_days_since_OP)
sample_timepoint_days %>% filter(is.na(sample_timepoint_days_since_OP))
dim(sample_timepoint_days)
head(sample_timepoint_days)

In [None]:
res$sampleID <- as.character(res$sampleID)
res <- left_join(res, sample_timepoint_days, by = c("sampleID"))
head(res)
dim(res)

In [None]:
pos_neg = read.csv("../data/metadata/pos_neg_samples_phaseI.csv")
resv1 <- left_join(res, pos_neg, by = c("sampleID"))
head(resv1)

In [None]:
res_only_posneg <- resv1 %>% filter(!is.na(pos_neg_samples))
head(res_only_posneg)
dim(res_only_posneg)

In [None]:
res_only_posneg_pt_rm <- res_only_posneg %>% filter(!patient_id %in% low_qual_sample)

In [None]:
pred <- prediction(res_only_posneg_pt_rm$big_ratio, res_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_big_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() + ggtitle(paste("Big ratio, AUC: ", round(auc, 4), sep = ""))

print(p_big_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_big_", filetype, ".png", sep = ""))
#print(p_big_ratio)
#dev.off()

In [None]:
pred <- prediction(res_only_posneg_pt_rm$small_ratio, res_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_small_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() +  ggtitle(paste("Small ratio, AUC: ", round(auc, 4), sep = ""))

print(p_small_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_small_", filetype, ".png", sep = ""))
#print(p_small_ratio)
#dev.off()

In [None]:
pos_neg = read.csv("../data/metadata/pos_neg_samples_phaseI_V2.csv")
pos_neg$sampleID <- as.character(pos_neg$sampleID)
resv2 <- left_join(res, pos_neg, by = c("sampleID"))
head(resv2)

In [None]:
res_only_posneg_v2 <- resv2 %>% filter(!is.na(pos_neg_samples))
head(res_only_posneg_v2)
dim(res_only_posneg_v2)

In [None]:
res_only_posneg_v2_pt_rm <- res_only_posneg_v2 %>% filter(!patient_id %in% low_qual_sample)

In [None]:
pred <- prediction(res_only_posneg_v2_pt_rm$big_ratio, res_only_posneg_v2_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_big_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() + ggtitle(paste("V2 Big ratio, AUC: ", round(auc, 4), sep = ""))

print(p_big_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_big_", filetype, ".png", sep = ""))
#print(p_big_ratio)
#dev.off()

In [None]:
pred <- prediction(res_only_posneg_v2_pt_rm$small_ratio, res_only_posneg_v2_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_small_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() +  ggtitle(paste("V2 Small ratio, AUC: ", round(auc, 4), sep = ""))

print(p_small_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_small_", filetype, ".png", sep = ""))
#print(p_small_ratio)
#dev.off()

### Normalized by post op ROCs

In [None]:
Correlations <- read.csv("../data/ci5_cs1e9_correlation.csv")
relapse_label <- Correlations %>% select(cfDNA_sample, Relapse)
colnames(relapse_label) <- c("cfDNA_folder", "Relapse_label")

In [None]:
res_post_op_normalized <- NULL
patients = as.character(unique(res$patient_id))

for (i in patients){
    res_pt <- resv1 %>% filter(patient_id == i)
    
    first_sample <- min(res_pt$sample_timepoint_days_since_OP)
    without_first_sample <- res_pt %>% filter(sample_timepoint_days_since_OP != first_sample)
    post_op_sample <- min(without_first_sample$sample_timepoint_days_since_OP)
        
    print(post_op_sample)
    for (x in res_pt$cfDNA_folder){
        sample_label_check <- res_pt %>% filter(cfDNA_folder == x)
        
        if (sample_label_check$sample_timepoint_days_since_OP == post_op_sample){
            pre_val_big = sample_label_check$big_ratio
            pre_val_small = sample_label_check$small_ratio}}
    
    res_post_op_normalized_pt <- res_pt %>% mutate(big_ratio_normalized = big_ratio/pre_val_big, small_ratio_normalized = small_ratio/pre_val_small)
    
    res_post_op_normalized <- rbind(res_post_op_normalized, res_post_op_normalized_pt)
    
    
    }

In [None]:
res_post_op_normalized <- inner_join(res_post_op_normalized, relapse_label, by = "cfDNA_folder")

In [None]:
res_post_op_normalized_only_posneg <- res_post_op_normalized %>% filter(!is.na(pos_neg_samples))
head(res_post_op_normalized_only_posneg)
dim(res_post_op_normalized_only_posneg)

In [None]:
res_post_op_normalized_only_posneg_pt_rm <- res_post_op_normalized_only_posneg %>% filter(!patient_id %in% low_qual_sample)

In [None]:
pred <- prediction(res_post_op_normalized_only_posneg_pt_rm$big_ratio_normalized, res_post_op_normalized_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_big_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() + ggtitle(paste("Big ratio normalized by post op, AUC: ", round(auc, 4), sep = ""))

print(p_big_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_big_", filetype, ".png", sep = ""))
#print(p_big_ratio)
#dev.off()

In [None]:
pred <- prediction(res_post_op_normalized_only_posneg_pt_rm$small_ratio_normalized, res_post_op_normalized_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_small_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() +  ggtitle(paste("Small ratio normalized by post op, AUC: ", round(auc, 4), sep = ""))

print(p_small_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_small_", filetype, ".png", sep = ""))
#print(p_small_ratio)
#dev.off()

In [None]:
res_post_op_normalized_v2 <- NULL
patients = as.character(unique(resv2$patient_id))

for (i in patients){
    res_pt <- resv2 %>% filter(patient_id == i)
    
    first_sample <- min(res_pt$sample_timepoint_days_since_OP)
    without_first_sample <- res_pt %>% filter(sample_timepoint_days_since_OP != first_sample)
    post_op_sample <- min(without_first_sample$sample_timepoint_days_since_OP)
        
    print(post_op_sample)
    for (x in res_pt$cfDNA_folder){
        sample_label_check <- res_pt %>% filter(cfDNA_folder == x)
        
        if (sample_label_check$sample_timepoint_days_since_OP == post_op_sample){
            pre_val_big = sample_label_check$big_ratio
            pre_val_small = sample_label_check$small_ratio}}
    
    res_post_op_normalized_pt <- res_pt %>% mutate(big_ratio_normalized = big_ratio/pre_val_big, small_ratio_normalized = small_ratio/pre_val_small)
    
    res_post_op_normalized_v2 <- rbind(res_post_op_normalized_v2, res_post_op_normalized_pt)
    
    
    }

In [None]:
res_post_op_normalized_v2 <- inner_join(res_post_op_normalized_v2, relapse_label, by = "cfDNA_folder")

In [None]:
res_post_op_normalized_v2_only_posneg <- res_post_op_normalized_v2 %>% filter(!is.na(pos_neg_samples))
head(res_post_op_normalized_v2_only_posneg)
dim(res_post_op_normalized_v2_only_posneg)

In [None]:
res_post_op_normalized_v2_only_posneg_pt_rm <- res_post_op_normalized_v2_only_posneg  %>% filter(!patient_id %in% low_qual_sample)

In [None]:
pred <- prediction(res_post_op_normalized_v2_only_posneg_pt_rm$big_ratio_normalized, res_post_op_normalized_v2_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_big_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() + ggtitle(paste("Big ratio normalized by post op, AUC: ", round(auc, 4), sep = ""))

print(p_big_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_big_", filetype, ".png", sep = ""))
#print(p_big_ratio)
#dev.off()

In [None]:
pred <- prediction(res_post_op_normalized_v2_only_posneg_pt_rm$small_ratio_normalized, res_post_op_normalized_v2_only_posneg_pt_rm$pos_neg_samples)
perf <- performance(pred,"tpr","fpr")
perf <- tibble(FPR = perf@x.values[[1]], TPR = perf@y.values[[1]])

auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]

p_small_ratio <- ggplot() + geom_line(data = perf, aes(x = FPR, y=TPR), color = "firebrick", size = 1) + 
           geom_abline(intercept = 0, slope = 1, color = "lightgrey", size = 0.5) +
           theme_minimal() +  ggtitle(paste("Small ratio normalized by post op, AUC: ", round(auc, 4), sep = ""))

print(p_small_ratio)
#png(paste("../plotting_results/ROC_curves/ROC_small_", filetype, ".png", sep = ""))
#print(p_small_ratio)
#dev.off()