In [None]:
library("ggplot2")
library("data.table")
library("cowplot")

nice_name <- function(align) {
    if (align == "muscle") ret <- "Muscle"
    if (align == "mafft") ret <- "MAFFT"
    if (align == "hmmer") ret <- "HMMER"
    if (align == "clustalo") ret <- "ClustalO"
    if (align == "null") ret <- "null"
    ret
}

results = fread("./distances.csv")
results[, `:=` ("NiceName" = sapply(results[,aligner], nice_name))]
results

In [None]:
options(repr.plot.width = 21, repr.plot.height=13)

In [None]:
p.nf = ggplot(results, aes(x = nf, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Nick Frequency", y = "Expected Node Distance")


p.ov = ggplot(results, aes(x = ov, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Overhang", y = "Expected Node Distance")


p.ds = ggplot(results, aes(x = ds, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Double Strand", y = "Expected Node Distance")


p.ss = ggplot(results, aes(x = ss, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Single Strand", y = "Expected Node Distance")


plot_grid(p.nf, p.ov, p.ss, p.ds, labels="AUTO")

In [None]:
p.nf = ggplot(results, aes(x = nf, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm", se=TRUE, formula = y ~ splines::ns(x,3)) + 
    theme(text=element_text(size=15)) + 
    labs(x = "Nick Frequency", y = "Expected Node Distance")


p.ov = ggplot(results, aes(x = ov, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm", se=TRUE, formula = y ~ splines::ns(x,3)) + 
    theme(text=element_text(size=15)) + 
    labs(x = "Overhang", y = "Expected Node Distance")


p.ds = ggplot(results, aes(x = ds, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm", se=TRUE, formula = y ~ splines::ns(x,2)) + 
    theme(text=element_text(size=15)) + 
    labs(x = "Double Strand", y = "Expected Node Distance")


p.ss = ggplot(results, aes(x = ss, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="lm", se=TRUE, formula = y ~ splines::ns(x,1)) + 
    theme(text=element_text(size=15)) + 
    labs(x = "Single Strand", y = "Expected Node Distance")


plot_grid(p.nf, p.ov, p.ss, p.ds, labels="AUTO")

In [None]:
p.nf = ggplot(results, aes(x = nf, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="gam") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Nick Frequency", y = "Expected Node Distance")


p.ov = ggplot(results, aes(x = ov, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="gam") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Overhang", y = "Expected Node Distance")


p.ds = ggplot(results, aes(x = ds, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="gam") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Double Strand", y = "Expected Node Distance")


p.ss = ggplot(results, aes(x = ss, y = e_nd)) + 
    facet_grid(rows="NiceName", scale="free_y") + 
    geom_smooth(method="gam") + 
    theme(text=element_text(size=15)) + 
    labs(x = "Single Strand", y = "Expected Node Distance")


plot_grid(p.nf, p.ov, p.ss, p.ds, labels="AUTO")

In [None]:
aligners = factor(unique(results$aligner))

aligners

In [None]:
models <- lapply(aligners, function(x){
        lm(e_nd ~ nf, results[aligner == x])
    })
lapply(models, summary)

In [None]:
models <- lapply(aligners, function(x){
    lm(e_nd ~ ov, results[aligner == x])
    })
lapply(models, summary)

In [None]:
models <- lapply(aligners, function(x){
    lm(e_nd ~ ss, results[aligner == x])
    })
lapply(models, summary)

In [None]:
models <- lapply(aligners, function(x){
    lm(e_nd ~ ds, results[aligner == x])
    })
lapply(models, summary)

In [None]:
ggplot(results, aes(x = nf, after_stat(density))) + geom_histogram() + geom_density()

In [None]:
ggplot(results, aes(x = ov, after_stat(density))) + geom_histogram() + geom_density()

In [None]:
ggplot(results, aes(x = ds, after_stat(density))) + geom_histogram() + geom_density()

In [None]:
ggplot(results, aes(x = ss, after_stat(density))) + geom_histogram() + geom_density()

In [None]:
options(repr.plot.width = 10, repr.plot.height=12)
ggplot(results, aes(y = e_nd, x = NiceName, fill = aligner)) + 
    geom_boxplot(staplewidth=0.5, outlier.alpha = 0.1) + 
    xlab("Alignment Tool") +
    ylab("eND") +
    scale_fill_brewer(palette="Pastel1", guide="none")