diff --git a/NEWS b/NEWS index 9bb76de0..e852473c 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,13 @@ +poppr 2.4.0.99 +=========== + +BUG FIX +------- + +* A corner case where repeat length vectors out of order would be erroneously + subset with `test_replen()` and `fix_replen()` has been fixed. See + https://github.com/grunwaldlab/poppr/issues/136 for details. + poppr 2.4.0 =========== diff --git a/R/bruvo.r b/R/bruvo.r index 99314df8..b669f9b1 100755 --- a/R/bruvo.r +++ b/R/bruvo.r @@ -179,7 +179,7 @@ #' #' # Load the nancycats dataset and construct the repeat vector. #' data(nancycats) -#' +#' names(alleles(nancycats)) <- locNames(nancycats) # small bug in this data set #' # Assume the alleles are all dinucleotide repeats. #' ssr <- rep(2, nLoc(nancycats)) #' test_replen(nancycats, ssr) # Are the repeat lengths consistent? @@ -620,11 +620,13 @@ bruvo.msn <- function (gid, replen = 1, add = TRUE, loss = TRUE, #' \code{\link{bruvo.msn}} \code{\link{bruvo.boot}} #' @author Zhian N. Kamvar #' @examples -#' data(nancycats) -#' test_replen(nancycats, rep(2, 9)) +#' data(Pram) +#' (Pram_replen <- setNames(c(3, 2, 4, 4, 4), locNames(Pram))) +#' test_replen(Pram, Pram_replen) test_replen <- function(gid, replen){ - alleles <- lapply(gid@all.names, as.numeric) - are_consistent <- vapply(1:nLoc(gid), consistent_replen, logical(1), + replen <- cromulent_replen(gid, replen) + alleles <- lapply(alleles(gid), as.numeric) + are_consistent <- vapply(locNames(gid), consistent_replen, logical(1), alleles, replen) names(are_consistent) <- locNames(gid) return(are_consistent) @@ -685,8 +687,9 @@ consistent_replen <- function(index, alleles, replen){ #' \code{\link{bruvo.msn}} \code{\link{bruvo.boot}} #' @examples #' -#' data(nancycats) -#' fix_replen(nancycats, rep(2, 9)) +#' data(Pram) +#' (Pram_replen <- setNames(c(3, 2, 4, 4, 4), locNames(Pram))) +#' fix_replen(Pram, Pram_replen) #' # Let's start with an example of a tetranucleotide repeat motif and imagine #' # that there are twenty alleles all 1 step apart: #' (x <- 1:20L * 4L) @@ -706,10 +709,7 @@ consistent_replen <- function(index, alleles, replen){ #' (PxPcr <- round(PxPc)) #' diff(PxPcr) fix_replen <- function(gid, replen, e = 1e-5, fix_some = TRUE){ - if (length(replen) != nLoc(gid)) { - stop(paste0("length of repeats (", length(replen), ") does not equal", - " the number of loci (", nLoc(gid), ").")) - } + replen <- cromulent_replen(gid, replen) consistent_reps <- test_replen(gid, replen) names(replen) <- locNames(gid) ADD <- FALSE diff --git a/R/internal.r b/R/internal.r index 1d942a4a..2b835bc5 100644 --- a/R/internal.r +++ b/R/internal.r @@ -1000,6 +1000,9 @@ match_replen_to_loci <- function(gid_loci, replen){ gid_loci <- locNames(gid_loci) } if (is.null(names(replen))){ + if (is.character(gid_loci)){ + names(replen) <- gid_loci + } return(replen) } else { return(replen[gid_loci]) @@ -2871,4 +2874,26 @@ make_psex <- function(n_encounters, p_genotype, sample_ids = NULL, n_samples){ out <- dbinom(encounters, n_samples, p_genotype) names(out) <- sample_ids return(out) +} + +cromulent_replen <- function(gid, replen){ + the_loci <- locNames(gid) + if (length(replen) != nLoc(gid) && is.null(names(replen))){ + msg <- mismatched_repeat_length_warning(replen, nLoc(gid)) + stop(msg, call. = FALSE) + } else if (length(replen) < nLoc(gid)){ + msg <- mismatched_repeat_length_warning(replen, nLoc(gid)) + stop(msg, call. = FALSE) + } else if (length(replen) > nLoc(gid)){ + msg <- trimmed_repeats_warning(replen, the_loci) + replen <- replen[names(replen) %in% the_loci] + warning(msg, call. = FALSE, immediate. = TRUE) + } + new_replen <- match_replen_to_loci(the_loci, replen) + new_replen <- new_replen[!is.na(new_replen)] + if (length(new_replen) != nLoc(gid)){ + msg <- unmatched_loci_warning(names(replen), the_loci) + stop(msg, call. = FALSE) + } + return(new_replen) } \ No newline at end of file diff --git a/R/messages.r b/R/messages.r index a4ada682..462d9aed 100644 --- a/R/messages.r +++ b/R/messages.r @@ -148,6 +148,39 @@ non_ssr_data_warning <- function(){ return(msg) } +mismatched_repeat_length_warning <- function(replen, nloci){ + paste0("length of repeats (", length(replen), ") does not equal", + " the number of loci (", nloci, ").") +} + +trimmed_repeats_warning <- function(replen, loci){ + keep <- names(replen) %in% loci + removed <- strwrap(paste(names(replen[!keep]), collapse = ", ")) + removed <- paste(removed, sep = "\n") + paste0("There are more repeat lengths (", length(replen), ") than loci ", + "(", length(loci), "). The following repeat lengths will be removed:", + "\n ", removed) +} + + +unmatched_loci_warning <- function(replen, loci){ + nr <- length(replen) + nl <- length(loci) + replen <- strwrap(paste(replen, collapse = ", "), + initial = "", + prefix = "\t ") + replen <- paste(replen, collapse = "\n") + loci <- strwrap(paste(loci, collapse = ", "), + initial = "", + prefix = "\t ") + loci <- paste(loci, collapse = "\n") + msg <- paste0("The following repeat lengths (", nr, ") do not match ", + "any of the loci (", nl, "):\n", + "\trepeat lengths... ", replen, "\n", + "\tloci............. ", loci) + return(msg) +} + #==============================================================================# # Warning message for Neighbor-Joining trees. # Public functions utilizing this function: diff --git a/man/bruvo.dist.Rd b/man/bruvo.dist.Rd index 5e6509b2..c475a643 100755 --- a/man/bruvo.dist.Rd +++ b/man/bruvo.dist.Rd @@ -126,7 +126,7 @@ Do not use missingno with this function. # Load the nancycats dataset and construct the repeat vector. data(nancycats) - +names(alleles(nancycats)) <- locNames(nancycats) # small bug in this data set # Assume the alleles are all dinucleotide repeats. ssr <- rep(2, nLoc(nancycats)) test_replen(nancycats, ssr) # Are the repeat lengths consistent? diff --git a/man/fix_replen.Rd b/man/fix_replen.Rd index 110284e2..112f76e9 100644 --- a/man/fix_replen.Rd +++ b/man/fix_replen.Rd @@ -46,8 +46,9 @@ This function is modified from the version used in } \examples{ -data(nancycats) -fix_replen(nancycats, rep(2, 9)) +data(Pram) +(Pram_replen <- setNames(c(3, 2, 4, 4, 4), locNames(Pram))) +fix_replen(Pram, Pram_replen) # Let's start with an example of a tetranucleotide repeat motif and imagine # that there are twenty alleles all 1 step apart: (x <- 1:20L * 4L) diff --git a/man/test_replen.Rd b/man/test_replen.Rd index 5b1791c3..f2ddd8c7 100644 --- a/man/test_replen.Rd +++ b/man/test_replen.Rd @@ -24,8 +24,9 @@ This function is modified from the version used in \url{http://dx.doi.org/10.5281/zenodo.13007}. } \examples{ -data(nancycats) -test_replen(nancycats, rep(2, 9)) +data(Pram) +(Pram_replen <- setNames(c(3, 2, 4, 4, 4), locNames(Pram))) +test_replen(Pram, Pram_replen) } \references{ Zhian N. Kamvar, Meg M. Larsen, Alan M. Kanaskie, Everett M. diff --git a/tests/testthat/test-replen.R b/tests/testthat/test-replen.R new file mode 100644 index 00000000..aba683ec --- /dev/null +++ b/tests/testthat/test-replen.R @@ -0,0 +1,44 @@ +context("repeat length handling") + +data("nancycats", package = "adegenet") +names(alleles(nancycats)) <- locNames(nancycats) +nanrep <- rep(2, 9) +named_nanrep <- setNames(nanrep, locNames(nancycats)) +nantest <- test_replen(nancycats, nanrep) +nanfix <- fix_replen(nancycats, nanrep) +named_nantest <- test_replen(nancycats, named_nanrep) +named_nanfix <- fix_replen(nancycats, named_nanrep) +test_that("test_replen and fix_replen works as expected for conguent vectors", { + expect_equal(sum(nantest), 5) + expect_identical(nantest, named_nantest) + expect_identical(nanfix, named_nanfix) + expect_true(all(floor(nanfix)[!nantest] == 1)) +}) + +test_that("test_replen and fix_replen work for larger length vectors", { + nanrep10 <- c(nanrep, 5) + named_nanrep11 <- c(named_nanrep, foo = 5, bar = 5) + bad_named_nanrep11 <- named_nanrep11 + names(bad_named_nanrep11)[1] <- "bob" + expect_error(test_replen(nancycats, nanrep10), "length of repeats \\(10\\)") + expect_error(fix_replen(nancycats, nanrep10), "length of repeats \\(10\\)") + expect_warning(expect_error(test_replen(nancycats, bad_named_nanrep11), "repeat lengths... fca23"), "bob") + expect_warning(nts <- test_replen(nancycats, named_nanrep11), "foo, bar") + expect_warning(nfx <- fix_replen(nancycats, named_nanrep11), "foo, bar") + expect_identical(nts, nantest) + expect_identical(nfx, nanfix) +}) + +test_that("test_replen and fix_replen will not work for short vectors", { + expect_error(test_replen(nancycats, nanrep[1:7]), "length of repeats \\(7\\)") + expect_error(test_replen(nancycats, named_nanrep[1:7]), "length of repeats \\(7\\)") +}) + +test_that("fix_replen throws errors for weird replens", { + skip_on_cran() + data(partial_clone) + expect_warning(fix_replen(partial_clone, rep(10, 10)), paste(locNames(partial_clone), collapse = ", ")) + expect_warning(fix_replen(partial_clone, rep(10, 10), fix_some = FALSE), "Original repeat lengths are being returned") + expect_warning(fix_replen(partial_clone, rep(2, 10)), "The repeat lengths for Locus_2, Locus_7, Locus_9 are not consistent.") + expect_warning(fix_replen(partial_clone, rep(2, 10)), "Repeat lengths with some modification are being returned: Locus_3") +}) diff --git a/tests/testthat/test-values.R b/tests/testthat/test-values.R index 7452d44d..f7d8d36c 100644 --- a/tests/testthat/test-values.R +++ b/tests/testthat/test-values.R @@ -357,24 +357,6 @@ test_that("samp.ia works",{ expect_equivalent(nopos.res, pos.res) }) -test_that("fix_replen works as expected", { - data(nancycats) - nanrep <- rep(2, 9) - nantest <- test_replen(nancycats, nanrep) - nanfix <- fix_replen(nancycats, nanrep) - expect_equal(sum(nantest), 5) - expect_true(all(floor(nanfix)[!nantest] == 1)) -}) - -test_that("fix_replen throws errors for weird replens", { - skip_on_cran() - data(partial_clone) - expect_warning(fix_replen(partial_clone, rep(10, 10)), paste(locNames(partial_clone), collapse = ", ")) - expect_warning(fix_replen(partial_clone, rep(10, 10), fix_some = FALSE), "Original repeat lengths are being returned") - expect_warning(fix_replen(partial_clone, rep(2, 10)), "The repeat lengths for Locus_2, Locus_7, Locus_9 are not consistent.") - expect_warning(fix_replen(partial_clone, rep(2, 10)), "Repeat lengths with some modification are being returned: Locus_3") -}) - test_that("poppr_has_parallel returns something logical", { expect_is(poppr_has_parallel(), "logical") }) \ No newline at end of file