# 1. Parameters

In [1]:
simulation_dir <- "simulations"

coverage_cases <- c(5, 10, 20, 30, 40, 50)
coverage_paths <- sapply(coverage_cases, function(x){file.path(simulation_dir, sprintf("cov-%s", x))})
coverage_read_trees <- sapply(coverage_paths, function(x){file.path(x, "index-reads", "reads.tre")})
coverage_assembly_trees <- sapply(coverage_paths, function(x){file.path(x, "index-assemblies", "assemblies.tre")})

names(coverage_read_trees) <- coverage_cases
names(coverage_assembly_trees) <- coverage_cases

initial_tree_file <- "input/salmonella.tre"
trees_table_file <- file.path(simulation_dir, "tree-comparisons.tsv")

# 2. Compare trees

## 2.1. Load trees

In [2]:
library(ape)

read_and_root_tree <- function(file, keep.multi) {
    tree <- ape::read.tree(file=file, keep.multi=keep.multi)
    tree <- ape::root(tree, "reference", resolve.root=TRUE)
    
    return(tree)
}

t_orig <- read_and_root_tree(file=initial_tree_file, keep.multi=FALSE)

t_reads <- lapply(coverage_read_trees, function(x){read_and_root_tree(file=x, keep.multi=TRUE)})
t_reads <- do.call(c, unlist(t_reads, recursive=FALSE))
t_assemblies <- lapply(coverage_assembly_trees, function(x){read_and_root_tree(file=x, keep.multi=TRUE)})
t_assemblies <- do.call(c, unlist(t_assemblies, recursive=FALSE))

## 2.2. Distances between trees

In [3]:
library(treespace)

reference_other_distances <- function(reference_tree, other_trees) {
    rf_distances <- c()
    bs_distances <- c()
    labels <- names(other_trees)
    for(label in labels) {
        tree <- other_trees[[label]]
        other_distances <- phangorn::treedist(reference_tree, tree)
        rf_distances <- c(rf_distances, other_distances["symmetric.difference"])
    }
    
    return(rf_distances)
}

tree_distances_reference <- function(reference_tree, read_trees, assembly_trees, label_name) {
    types <- c(rep("reads", each=length(read_trees)), rep("assembly", each=length(assembly_trees)))
    labels <- c(names(read_trees), names(assembly_trees))
    lambda <- 0.5
    distances = c(treespace::refTreeDist(reference_tree, read_trees, lambda=lambda),
                 treespace::refTreeDist(reference_tree, assembly_trees, lambda=lambda))
    other_distances_reads <- reference_other_distances(t_orig, read_trees)
    other_distances_assemblies <- reference_other_distances(t_orig, assembly_trees)
    rf_distances <- c(other_distances_reads, other_distances_assemblies)
    df <- data.frame(
               Types = types,
               Coverage = labels,
               RF_Distance = rf_distances,
               KC_Distance = distances)
    
    return(df)
}

tree_distances_df <- tree_distances_reference(t_orig, t_reads, t_assemblies, "Coverage")
tree_distances_df["Coverage"] <- as.numeric(tree_distances_df$Coverage)
tree_distances_df

Loading required package: ade4

code for methods in class “Rcpp_SpExtent” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpExtent” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpPoly” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpPoly” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpPolyPart” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpPolyPart” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)

code for methods in class “Rcpp_SpPolygons” was not checked for suspicious field assignments (recommended package ‘codetoo

Types,Coverage,RF_Distance,KC_Distance
<chr>,<dbl>,<dbl>,<dbl>
reads,5,112,249.91242
reads,10,84,79.78536
reads,20,78,86.85927
reads,30,90,86.04379
reads,40,88,101.09099
reads,50,90,94.77731
assembly,5,78,74.5591
assembly,10,82,71.47704
assembly,20,84,77.11151
assembly,30,82,62.74741


In [4]:
write.table(tree_distances_df, trees_table_file, append = FALSE, sep = "\t", dec = ".",
            row.names = FALSE, col.names = TRUE)

# 3. Visualize trees