In [None]:
library(tidyverse)
library(RColorBrewer)
library(ggh4x)

In [None]:
paramspace <- read.csv("../data/metadata/paramspace_phaseII.csv")
paramspace <- paramspace %>% select(pt_id, output_path_unique_kmers)
head(paramspace)
dim(paramspace)

In [None]:
phase2ptex <- read.csv("../phase2_exclude3.csv", header=FALSE)
phase2ptex <- as.character(unlist(c(phase2ptex[1,])))

paramspace <- paramspace %>% filter(!pt_id %in% phase2ptex)

In [None]:
phaseIpt_R <- read.csv("../phaseI_pt_R.csv", header=FALSE)
phaseIpt_R <- as.character(unlist(c(phaseIpt_R[1,])))

In [None]:
processFile = function(filepath) {
    con = file(filepath, "r")
    while (TRUE) {
        line = readLines(con, n = 1)
        if ( length(line) == 0 || str_split(line, ":")[[1]][1] == "1 of 1 steps (100%) done") {
            break
            }
        if (str_split(line, ":")[[1]][1] == "   Total no. of reads                 "){
            reads = gsub(" ", "", str_split(line, ":")[[1]][2], fixed = TRUE)
            }
        if (str_split(line, ":")[[1]][1] == "   No. of unique counted k-mers       "){
            kmers = gsub(" ", "", str_split(line, ":")[[1]][2], fixed = TRUE)
            }
    }
    res = c(reads, kmers)
    close(con)
    return(res) 
}

In [None]:
res_germline <- NULL
logs = list.files('../logs/count_germline_phase_II/')

for (row in 1:nrow(paramspace)){
    patient = paramspace[row, "pt_id"]
    folder = paramspace[row, "output_path_unique_kmers"]
    
    log = grep(patient, logs, value=TRUE)
    res_germ_pt <- processFile(paste0('../logs/count_germline_phase_II/', log))
    res_germ_pt_df <- tibble(pt_id = patient, 
                            tumor_folder = folder, 
                            germline_reads = as.numeric(res_germ_pt[1]),
                            germline_kmers = as.numeric(res_germ_pt[2]))
    res_germline <- bind_rows(res_germline, res_germ_pt_df)
}
head(res_germline)

In [None]:
union_germline <- read.table(paste("../data/", "phase_II_patients/plotdata_germline_union_de_novo_and_k50_and_reference_filtered.txt", sep = ""))
union_kmers <- as.numeric(sum(union_germline$V2))
res_germ_union <- tibble(pt_id = "union_germline", 
                            tumor_folder = NA, 
                            germline_reads = NA,
                            germline_kmers = as.numeric(union_kmers))
res_germline <- bind_rows(res_germline, res_germ_union)

In [None]:
options(repr.plot.width=15, repr.plot.height=8)

a <- ifelse(res_germline$pt_id %in% phaseIpt_R, "red", "darkblue")

res_germline %>% filter(pt_id != "union_germline") %>% ggplot() +
  geom_col(aes(x = pt_id, y = germline_reads), fill = "grey") +
    ggtitle("Germline reads") + theme_minimal() + 
    theme(axis.text.x = element_text(colour = "darkblue", 
                                    angle=60))

In [None]:
res_germline <- res_germline %>% arrange(germline_reads)
write.csv(res_germline, "res_phaseII_germline.csv")

In [None]:
res_germline %>% filter(pt_id != "union_germline") %>% ggplot() +
  geom_col(aes(x = pt_id, y = germline_kmers), fill = "grey") +
    ggtitle("Germline kmers") + theme_minimal()  + 
    theme(axis.text.x = element_text(colour = a))+
    xlab("") + ylab("Number of germline k-mers")

In [None]:
res_germline_without <- res_germline %>% filter(pt_id != "union_germline") %>% filter(!pt_id %in% phase2ptex)
res_germline_without <- res_germline_without %>% mutate(tick_l_y_end = ifelse(!pt_id %in% res_germline_without$pt_id[seq(1, length(res_germline_without$pt_id), 2)], -300000000, -150000000 ))
res_germline_without <- res_germline_without %>% mutate(tick_l_y_end_tumor = ifelse(!pt_id %in% res_germline_without$pt_id[seq(1, length(res_germline_without$pt_id), 2)], -300000000, -150000000))
head(res_germline_without)

In [None]:
levels_order <- rev(unique(res_germline_without$pt_id))

In [None]:
levels_order <- as.data.frame(rev(sort(levels_order)))

In [None]:
colnames(levels_order) <- c("pt_id")

In [None]:
res_germline_without <- res_germline %>% filter(pt_id != "union_germline") %>% filter(!pt_id %in% phase2ptex)
res_germline_without <- left_join(levels_order, res_germline_without, by="pt_id")
res_germline_without <- res_germline_without %>% mutate(tick_l_y_end = ifelse(!pt_id %in% res_germline_without$pt_id[seq(1, length(res_germline_without$pt_id), 2)], -600000000, -1400000000 ))
res_germline_without <- res_germline_without %>% mutate(tick_l_y_end_tumor = ifelse(!pt_id %in% res_germline_without$pt_id[seq(1, length(res_germline_without$pt_id), 2)], -300000000, -150000000))
head(res_germline_without)

In [None]:
options(repr.plot.width=14, repr.plot.height=18)


ggplot(res_germline_without, aes(x = pt_id, y = germline_kmers)) + geom_col(fill = "darkgrey") +
    geom_linerange(data = res_germline_without, aes(x=pt_id, ymax=-0.1, ymin=tick_l_y_end), # The custom tickmarks
                 size=0.8,
                 inherit.aes = F) + 
    guides(y = guide_axis_manual(n.dodge = 2)) + 
    theme_minimal()  + 
    theme(text = element_text(size = 16),
          axis.text.x = element_text(angle = 0, color = "black"))+
    ylab("Number of germline k-mers") + xlab("") + 
    #scale_y_continuous(expand = c(0, 0), breaks = c(0, 1000000000, 20000000000, 30000000000)) + 
    coord_flip(clip='off', ylim = c(-0.4, NA)) 

In [None]:
res_tumor_unique_tumor <- NULL

logs_tumor = list.files('../logs/count_tumor_phase_II/')



for (row in 1:nrow(paramspace)){
    patient = paramspace[row, "pt_id"]
    print(as.character(patient))
    folder = paramspace[row, "output_path_unique_kmers"]
    
    
    log_tumor = grep(patient, logs_tumor, value=TRUE)
    reads_kmers_tumor <- processFile(paste0('../logs/count_tumor_phase_II/', log_tumor))
    
    unique_kmers_tumor <- read.table(paste("../data/phase_II_patients/", patient, "/", folder, "/final_pipeline_NEW/", "plotdata_unique_tumor_kmers_subs59_and_72_filtered.txt", sep = ""))
    tumor_kmers_tumor <- read.table(paste("../data/phase_II_patients/", patient, "/", folder, "/final_pipeline/", "plotdata_tumor_filtered.txt", sep = ""))
    tumor <- tibble(pt_id = patient, 
                        tumor_folder = folder, 
                        tumor_reads = as.numeric(reads_kmers_tumor[1]),
                        tumor_kmers = as.numeric(sum(tumor_kmers_tumor$V2)),
                        tumor_kmers_total_n = as.numeric(sum(as.numeric(tumor_kmers_tumor$V2) * as.numeric(tumor_kmers_tumor$V1))),
                        unique_tumor_kmers = as.numeric(sum(unique_kmers_tumor$V2)), 
                        unique_tumor_kmers_total_n = as.numeric(sum(as.numeric(unique_kmers_tumor$V2) * as.numeric(unique_kmers_tumor$V1))), 
                        pipeline = "final")
    #print(reads_kmers_ci1_cs1e9[1])
    #print(sum(tumor_kmers_ci1_cs1e9$V2))
    res_tumor_unique_tumor <- bind_rows(res_tumor_unique_tumor, tumor)
    
    
   
}

In [None]:
res_tumor_unique_tumor$pipeline <- factor(res_tumor_unique_tumor$pipeline, levels = c("final"))
res_tumor_unique_tumor$pt_id <- as.character(res_tumor_unique_tumor$pt_id)
res_tumor_unique_tumor <- res_tumor_unique_tumor %>% arrange(pt_id)
head(res_tumor_unique_tumor)


In [None]:

ggplot(data = res_tumor_unique_tumor) +
  geom_col(aes(x = pt_id, y = tumor_kmers, fill = pipeline), position = "dodge2") 

In [None]:
res_tumor_unique_tumor <- res_tumor_unique_tumor %>% mutate(tick_l_y_end = ifelse(!pt_id %in% res_tumor_unique_tumor$pt_id[seq(1, length(res_tumor_unique_tumor$pt_id), 2)], -330000000, -150000000))
res_tumor_unique_tumor <- res_tumor_unique_tumor %>% mutate(tick_l_y_end_tumor = ifelse(!pt_id %in% res_tumor_unique_tumor$pt_id[seq(1, length(res_tumor_unique_tumor$pt_id), 2)], -150000000, -0.6))
head(res_tumor_unique_tumor)

In [None]:
options(repr.plot.width=14, repr.plot.height=18)

ggplot(res_tumor_unique_tumor, aes(x = pt_id, y = tumor_kmers)) + geom_col(fill = "darkgrey") +
    geom_linerange(data = res_tumor_unique_tumor, aes(x=pt_id, ymax=-0.1, ymin=tick_l_y_end), # The custom tickmarks
                 size=0.8,
                 inherit.aes = F) + 
    guides(y = guide_axis_manual(n.dodge = 2)) + 
    theme_minimal()  + 
    theme(text = element_text(size = 16),
          axis.text.x = element_text(angle = 0, color = "black"))+
    ylab("Number of tumor k-mers") + xlab("") + 
    #scale_y_continuous(expand = c(0, 0), breaks = c(0, 1000000000, 20000000000, 30000000000)) + 
    coord_flip(clip='off', ylim = c(-0.4, NA)) 
    


In [None]:
res_tumor_unique_tumor <- res_tumor_unique_tumor %>% mutate(tick_l_y_end = ifelse(!pt_id %in% res_tumor_unique_tumor$pt_id[seq(1, length(res_tumor_unique_tumor$pt_id), 2)], -2.4, -1.35))
res_tumor_unique_tumor <- res_tumor_unique_tumor %>% mutate(tick_l_y_end_tumor = ifelse(!pt_id %in% res_tumor_unique_tumor$pt_id[seq(1, length(res_tumor_unique_tumor$pt_id), 2)], -1000000, -0.6))
head(res_tumor_unique_tumor)

In [None]:
options(repr.plot.width=14, repr.plot.height=18)

ggplot(res_tumor_unique_tumor, aes(x = pt_id, y = unique_tumor_kmers)) + geom_col(fill = "darkgrey") +
    geom_linerange(data = res_tumor_unique_tumor, aes(x=pt_id, ymax=-0.1, ymin=tick_l_y_end), # The custom tickmarks
                 size=0.8,
                 inherit.aes = F) + 
    guides(y = guide_axis_manual(n.dodge = 2)) + 
    theme_minimal()  + 
    theme(text = element_text(size = 16),
          axis.text.x = element_text(angle = 0, color = "black"))+
    ylab("log(Number of unique tumor k-mers)") + xlab("") + 
    #scale_y_continuous(expand = c(0, 0), breaks = c(0, 1000000000, 20000000000, 30000000000)) + 
    coord_flip(clip='off', ylim = c(-0.4, NA)) 

# cfDNA 

In [None]:
paramspace_cfDNA <- read.csv("../data/metadata/paramspace_cfDNA_phaseII_missing_removed.csv")
paramspace_cfDNA <- paramspace_cfDNA %>% select(pt_id, cfDNA_folder, unique_kmers_folder)
head(paramspace_cfDNA)
dim(paramspace_cfDNA)

In [None]:
paramspace_cfDNA <- paramspace_cfDNA %>% 
  mutate(tmp_chunks = stringr::str_split(cfDNA_folder, stringr::fixed("_"),  n = 3)) %>%
  mutate(sampleID = map_chr(tmp_chunks, 1),
         sub_value = map_chr(tmp_chunks, 2), 
         sub_val = map_chr(tmp_chunks, 3)) %>%
  select(-c(tmp_chunks, sub_value, sub_val))

head(paramspace_cfDNA)
dim(paramspace_cfDNA)

In [None]:
sample_timepoint <- read.csv("../data/metadata/clin_data/sample_timepoint_formatted.csv")

In [None]:
head(sample_timepoint)
sample_timepoint_days <- sample_timepoint %>% filter(phase == "phaseII")
sample_timepoint_days <- sample_timepoint_days %>% select(sampleID, sample_timepoint_days_since_OP)
sample_timepoint_days %>% filter(is.na(sample_timepoint_days_since_OP))
dim(sample_timepoint_days)
head(sample_timepoint_days)

In [None]:
paramspace_cfDNA$sampleID <- as.character(paramspace_cfDNA$sampleID)
paramspace_cfDNA <- left_join(paramspace_cfDNA, sample_timepoint_days, by = c("sampleID"))
head(paramspace_cfDNA)
dim(paramspace_cfDNA)

In [None]:
res_cfDNA <- NULL

logs_cfDNA = list.files('../logs/count_cfDNA_kmers_filtered_cfDNA/')


for (row in 1:nrow(paramspace_cfDNA)){
    patient = paramspace_cfDNA[row, "pt_id"]
    #print(as.character(patient))
    cfDNA_folder = paramspace_cfDNA[row, "cfDNA_folder"]
    folder = paramspace_cfDNA[row, "unique_kmers_folder"]
    
    logs_pt = grep(patient, logs_cfDNA, value=TRUE)
    logs_pt_cfDNA_sample = grep(cfDNA_folder, logs_pt, value=TRUE)
    if (length(logs_pt_cfDNA_sample) == 0){
        print(cfDNA_folder)
        reads_kmers_cfDNA = c(NA, NA)
    } else {
        reads_kmers_cfDNA <- processFile(paste0('../logs/count_cfDNA_kmers_filtered_cfDNA/', logs_pt_cfDNA_sample))
    }
    
    #print(reads_kmers_cfDNA)
    
    cfDNA_kmers_df <- read.table(paste("../data/phase_II_patients/", patient, "/", cfDNA_folder, "/final_pipeline/", "plotdata_cfDNA_filtered.txt", sep = ""))
    #cfDNA_kmers_filtered_df <- read.table(paste("../data/", patient, "/", cfDNA_folder, "/all_germline_filtered_bams_tumor_ci5_cs1e9_filtered_cfDNA/", "plotdata_cfDNA_kmers_filtered.txt", sep = ""))
    cfDNA_kmers_df_ci2 = cfDNA_kmers_df[-c(1), ]
    cfDNA_kmers_df$V2 <- as.numeric(cfDNA_kmers_df$V2)
    cfDNA_kmers_df$V1 <- as.numeric(cfDNA_kmers_df$V1)
    
    #cfDNA_kmers_filtered_df$V2 <- as.numeric(cfDNA_kmers_filtered_df$V2)
    #cfDNA_kmers_filtered_df$V1 <- as.numeric(cfDNA_kmers_filtered_df$V1)
    
    cfDNA_res_sample <- tibble(pt_id = patient, 
                        tumor_folder = folder, 
                        cfDNA_folder = cfDNA_folder,
                        cfDNA_reads = as.numeric(reads_kmers_cfDNA[1]),
                        cfDNA_kmers = as.numeric(sum(cfDNA_kmers_df$V2)),
                        cfDNA_kmers_ci2 = as.numeric(sum(cfDNA_kmers_df_ci2$V2)),
                        cfDNA_kmers_total_n = as.numeric(sum(cfDNA_kmers_df$V2 * cfDNA_kmers_df$V1)),
                        #cfDNA_kmers_filtered = as.numeric(sum(cfDNA_kmers_filtered_df$V2)),
                        #cfDNA_kmers_filtered_total_n = as.numeric(sum(cfDNA_kmers_filtered_df$V2 * cfDNA_kmers_filtered_df$V1)),
                        sample_timepoint_days_since_OP = paramspace_cfDNA[row, "sample_timepoint_days_since_OP"],
                        pipeline = "final")
    #print(reads_kmers_cfDNA[2])
    #print(sum(cfDNA_kmers_df$V2))
    res_cfDNA <- bind_rows(res_cfDNA, cfDNA_res_sample)
    }

In [None]:
res_cfDNA_final <- NULL
color_palette_size <- 0
for (pt in unique(res_cfDNA$pt_id)){
    res_cfDNA_pt <- res_cfDNA %>% filter(pt_id == pt)
    res_cfDNA_pt$sample_timepoint_days_since_OP <- as.numeric(res_cfDNA_pt$sample_timepoint_days_since_OP)
    res_cfDNA_pt <- res_cfDNA_pt %>% arrange(sample_timepoint_days_since_OP) %>% mutate(samples_order = seq(1, nrow(res_cfDNA_pt)))
    if (nrow(res_cfDNA_pt) > color_palette_size){
        color_palette_size <- nrow(res_cfDNA_pt)
    }
    res_cfDNA_final <- rbind(res_cfDNA_final, res_cfDNA_pt)
}
head(res_cfDNA_final)

In [None]:
res_cfDNA_final <- res_cfDNA_final %>% filter(!pt_id %in% phase2ptex)

In [None]:
nrow(res_cfDNA_final)

In [None]:
res_cfDNA_final_pt <- as.data.frame(unique(res_cfDNA_final$pt_id))
res_cfDNA_final_pt <- res_cfDNA_final_pt %>% mutate(plot_group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                                                                   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
                                                                   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
                                                                   4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
                                                                   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
                                                                   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,  
                                                                   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   
                                                                   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 
                                                                   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
                                                                   ))
                                                                   
                                                                  
colnames(res_cfDNA_final_pt) <- c("pt_id", "plot_group")     
res_cfDNA_final <- left_join(res_cfDNA_final_pt, res_cfDNA_final, by="pt_id")

In [None]:
head(res_cfDNA_final)
tail(res_cfDNA_final)

In [None]:
getPalette = colorRampPalette(brewer.pal(9, "Blues"))
res_cfDNA_final$samples_order <- as.factor(res_cfDNA_final$samples_order)

In [None]:
colsi = colorRampPalette(brewer.pal(9, "Blues"))(17)[4:17]
res_cfDNA_final$samples_order <- as.factor(res_cfDNA_final$samples_order)

In [None]:
options(repr.plot.width=15, repr.plot.height=20)
ggplot(res_cfDNA_final) +
    geom_col(aes(x = pt_id, y = cfDNA_reads, fill = samples_order), position = "dodge2") +
    #ggtitle("cfDNA reads") + 
    theme_minimal() + 
    scale_fill_manual(values = c("firebrick", colsi)) + 
    theme(axis.text.x = element_text(colour = a))+
    facet_wrap(vars(plot_group), scales="free", ncol = 1)+xlab("")+ ylab("Number of cfDNA reads")+
     theme(text = element_text(size = 15), strip.text.x = element_text(size=0))+labs(fill="Order of samples")

In [None]:
options(repr.plot.width=15, repr.plot.height=20)
ggplot(res_cfDNA_final) +
    geom_col(aes(x = pt_id, y = cfDNA_kmers, fill = samples_order), position = "dodge2") +
    #ggtitle("cfDNA reads") + 
    theme_minimal() + 
    scale_fill_manual(values = c("firebrick", colsi)) + 
    theme(axis.text.x = element_text(colour = a))+
    facet_wrap(vars(plot_group), scales="free_x", ncol = 1)+xlab("")+ ylab("Number of cfDNA k-mers")+
     theme(text = element_text(size = 15), strip.text.x = element_text(size=0))+labs(fill="Order of samples")

# Intersections

In [None]:
res_intersection <- NULL

for (row in 1:nrow(paramspace_cfDNA)){
    patient = paramspace_cfDNA[row, "pt_id"]
   # print(as.character(patient))
    cfDNA_folder = paramspace_cfDNA[row, "cfDNA_folder"]
    folder = paramspace_cfDNA[row, "unique_kmers_folder"]
    
    intersection_kmers_ci1_cs1e9 <- read.table(paste("../data/phase_II_patients/", patient, "/", cfDNA_folder, "/final_pipeline_NEW/", "plotdata_intersection_filtered.txt", sep = ""))
    intersection_ci1_cs1e9 <- tibble(pt_id = patient, 
                                    tumor_folder = folder, 
                                    cfDNA_folder = cfDNA_folder,
                                    intersection_kmers = as.numeric(sum(intersection_kmers_ci1_cs1e9$V2)),
                                    intersection_kmers_total_n = as.numeric(sum(intersection_kmers_ci1_cs1e9$V2 * intersection_kmers_ci1_cs1e9$V1)),
                                    sample_timepoint_days_since_OP = paramspace_cfDNA[row, "sample_timepoint_days_since_OP"],
                                    pipeline = "final")
    res_intersection <- bind_rows(res_intersection, intersection_ci1_cs1e9)
    
    }
    
   

In [None]:
res_intersection_final <- NULL # 
color_palette_size <- 0
for (pt in unique(res_intersection$pt_id)){
    res_intersection_pt <- res_intersection %>% filter(pt_id == pt)
    for (pipeline_C in c("final")){
        res_intersection_pt_pipeline <- res_intersection_pt %>% filter(pipeline == pipeline_C)
        res_intersection_pt_pipeline$sample_timepoint_days_since_OP <- as.numeric(res_intersection_pt_pipeline$sample_timepoint_days_since_OP)
        res_intersection_pt_pipeline <- res_intersection_pt_pipeline %>% arrange(sample_timepoint_days_since_OP) %>% mutate(samples_order = seq(1, nrow(res_intersection_pt_pipeline)))
        if (nrow(res_intersection_pt_pipeline) > color_palette_size){
            color_palette_size <- nrow(res_intersection_pt_pipeline)
        }
        res_intersection_final <- rbind(res_intersection_final, res_intersection_pt_pipeline)
        }
}



In [None]:
res_intersection_final$samples_order <- as.factor(res_intersection_final$samples_order)
res_intersection_final$pipeline <- factor(res_intersection_final$pipeline, levels = c("final"))
head(res_intersection_final)

In [None]:
res_intersection_final <- res_intersection_final %>% filter(!pt_id %in% phase2ptex)

In [None]:
res_intersection_final_pt <- as.data.frame(unique(res_intersection_final$pt_id))
res_intersection_final_pt <- res_intersection_final_pt %>% mutate(plot_group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                                                                   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
                                                                   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
                                                                   4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
                                                                   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
                                                                   6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,  
                                                                   7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   
                                                                   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
                                                                   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
                                                                   ))
                                                                   
                                                                  
colnames(res_intersection_final_pt) <- c("pt_id", "plot_group")     
res_intersection_final <- left_join(res_intersection_final_pt, res_intersection_final, by="pt_id")

In [None]:
colsi = colorRampPalette(brewer.pal(9, "Blues"))(17)[4:17]
res_intersection_final$samples_order <- as.factor(res_intersection_final$samples_order)

In [None]:
options(repr.plot.width=15, repr.plot.height=20)
ggplot(res_intersection_final) +
    geom_col(aes(x = pt_id, y = log(intersection_kmers), fill = samples_order), position = "dodge2") +
    #ggtitle("cfDNA reads") + 
    theme_minimal() + 
    scale_fill_manual(values = c("firebrick", colsi)) + 
    #theme(axis.text.x = element_text(colour = a))+
    facet_wrap(vars(plot_group), scales="free_x", ncol = 1)+
    xlab("")+ ylab("Number of ctDNA k-mers")+
    theme(text = element_text(size = 15), strip.text.x = element_text(size=0))+labs(fill="Order of samples")

In [None]:
denovo_file <- read.table("../data/phase_II_patients/creating_union_germline_de_novo/plotdata_germline_filtered.txt")

In [None]:
head(denovo_file)

In [None]:
de_novo_sum <- sum(denovo_file$V2)
de_novo_sum