added basic cran code for v1.3-0

boopsboops · Jan 12, 2018 · 9a14489 · 9a14489
1 parent b8f785e
commit 9a14489
Show file tree

Hide file tree

Showing 93 changed files with 3,550 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,15 @@
+Package: spider
+Type: Package
+Title: Species Identity and Evolution in R
+Version: 1.3-0
+Date: 2013-12-25
+Author: Samuel Brown, Rupert Collins, Stephane Boyer, Marie-Caroline Lefort, Jagoba Malumbres-Olarte, Cor Vink, Rob Cruickshank
+Maintainer: Samuel Brown <s_d_j_brown@hotmail.com>
+Description: A package for the analysis of species limits and DNA barcoding data
+License: GPL
+LazyLoad: yes
+Depends: ape, pegas
+Packaged: 2013-12-27 04:41:31 UTC; sam
+NeedsCompilation: no
+Repository: CRAN
+Date/Publication: 2013-12-27 07:30:41
diff --git a/MD5 b/MD5
@@ -0,0 +1,92 @@
+6ef850e8e432b9a07bea47191083989c *DESCRIPTION
+ccedd6712a186c4e228ed15a91f11bc1 *NAMESPACE
+3ca298078ffa7ac200abef3880f87f64 *NEWS
+e62af89c4a267ddca83f35e3fe45aee2 *R/bestCloseMatch.R
+fd8a6057196e4b37eacaf2702ffae307 *R/cgraph.R
+72989f994b8cbc46f268ae5685db274e *R/chaoHaplo.R
+4eff78217d84219aaebc735c28541926 *R/checkDNA.R
+5e3aa555d847d01f976df668706ce0ed *R/dataStat.R
+595c83f688fd9a2c296d8eb245aafa73 *R/haploAccum.R
+0279eec6c00e72eba4ceaf40df02e009 *R/heatmapSpp.R
+7589015d4d0e23e0e375a20013be8eff *R/is.ambig.R
+e1b514b3ba62439d0c880a4fc16da87d *R/localMinima.R
+1fc67e9c3058d97ddc13df5d33716480 *R/maxInDist.R
+216a150498025dd63a342afaaa2321e8 *R/monophyly.R
+464fe43648d24bfc14bfa467e3ddb65a *R/monophylyBoot.R
+c3cc009b71dbe65bbe6abd320b5a75d1 *R/nearNeighbour.R
+ab6cd5493a4d65160a222f6f2b0f4c87 *R/nonConDist.R
+5b6311a4810307002e91df28a1129b31 *R/nucDiag.R
+ddfdef735e02ae9536ff0ad3e4d366cc *R/ordinDNA.R
+db011b6dc2ec60650faedd89f2f4f3af *R/paa.R
+3e4082cf030704b680d31a8124c0542e *R/plot.haploAccum.R
+22603e5ef969a93b61cc28616cae1b37 *R/plot.ordinDNA.R
+f67592797d2df601b3b5bbc8f9aabd61 *R/plot.slidWin.R
+a2241669fe8df719b7c5236be8d14d73 *R/polyBalance.R
+81468d47d63834ba47fe57655f67bf06 *R/rankSlidWin.R
+06830956b67fa1d383f114c296bc8d5d *R/read.BOLD.R
+ceb5f7e91c8f10fe002599df70565fc3 *R/read.GB.R
+ffc7ad16cf592a257baa14cd85a77764 *R/rmSingletons.R
+01dce3948a335e691f124a160208aad6 *R/rosenberg.R
+12f6fa948324572903f7f9ba30676a39 *R/search.BOLD.R
+62452208c0f3b1f49b6c33ea7023ab65 *R/seeBarcode.R
+29ac3818d61baed21e8e78b5d57730f1 *R/seqStat.R
+5781e96e6e54232f5b4bad3e37cbc9c1 *R/slideAnalyses.R
+194ed594ea4c76d876523f2840ac582b *R/slideBoxplots.R
+03a948a9a4fd88a8dba868b2426b7d7c *R/slideNucDiag.R
+74cfd49a4b8abc658fcb8ceb63f7ba03 *R/slidingWindow.R
+3511609b716c4584fc450e51f8822014 *R/sppDist.R
+9d887177ee46fd110413143c6aaa0773 *R/sppDistMatrix.R
+6adee9b3f4f7e03f565c8dc8ab0e806b *R/stats.BOLD.R
+bc47e61e1f0c547cd7507e8d41475b2e *R/tajima.K.R
+4d17fa0332ad6b57b1d72c10009e8ede *R/tclust.R
+cfdccf7e0e59ae4440ceee670cb1b5cf *R/threshID.R
+36917b934985d79bdcd18255a7ed07e7 *R/threshOpt.R
+3b2a366119a12d521dd36e1089d668af *R/tiporder.R
+88d340c437eb3fcbc80d5b27cec8fcb2 *R/titv.R
+9c4ccbfa0a9c40518aa0fb9a55c0c680 *R/tree.comp.R
+edbf5b74d01751270e51c2440f10a279 *data/anoteropsis.rda
+a712abccd48075cbc548da442dfd926f *data/dolomedes.rda
+c6688b7a4a356ce5c708bd30cc626f5a *data/sarkar.rda
+4e86ccbe285fc2a1af25996cfc8b457f *inst/CITATION
+151dc35bb3daf70a1b274c7bb064e9c7 *man/anoteropsis.Rd
+9558e43c2f67120fd4c895a037fa1a43 *man/cgraph.Rd
+d597bc9189acf85ec1e6392dba999108 *man/chaoHaplo.Rd
+768ef7767fe1b49c6d84073a89655990 *man/checkDNA.Rd
+daf7029dc23352d4080fe069ef41f1be *man/dataStat.Rd
+e7c02820ae571e81c13adb7f068be728 *man/dolomedes.Rd
+b35336167fe6b8df5adff3b35f433cbb *man/haploAccum.Rd
+f23d99ed5511d13b7e8d9448d0da6112 *man/heatmapSpp.Rd
+49de57ec34792547dd95134e3401d29f *man/is.ambig.Rd
+060748e190f0e69b41ca5c7fe273cdb9 *man/localMinima.Rd
+e48d6963b281b10c9e3475db4b06643b *man/monophyly.Rd
+ac12c007eb3d9ffa710f6d30dec9108d *man/nearNeighbour.Rd
+a3f99dc4b41a24bbc2ab15b14d5f4122 *man/nonConDist.Rd
+7f6d54110e9c5ab22dba09a76250859f *man/nucDiag.Rd
+67bc6ecb798844864b7a704b9b61b33d *man/ordinDNA.Rd
+33d607edf25a5ffe7c249d7453b93a2e *man/paa.Rd
+c51185d75775b4412904cc5612a3bab9 *man/plot.haploAccum.Rd
+0f0c7676694d88e8a30c179cd7f9c415 *man/plot.ordinDNA.Rd
+1464a0d035a146a7ffc06186f6d888e1 *man/plot.slidWin.Rd
+596598619f59d979962f958adf55dd66 *man/polyBalance.Rd
+91212b502f4232ab8de1b32b7c7e187b *man/rankSlidWin.Rd
+197f3386d00be5bad7999ecf32189d0d *man/read.BOLD.Rd
+b04e908ea0703656331dd88d5214012b *man/read.GB.Rd
+6a877d3d9110047e4b643eba1dd84cb8 *man/rmSingletons.Rd
+d5a18ce2ab6ba74a8298eb9bb909ab8e *man/rosenberg.Rd
+e327eac7dd250d34ac50c1a6e8064cc0 *man/sarkar.Rd
+6292e0149930ef0ed84547d77e9da426 *man/seeBarcode.Rd
+0ee9121c8b61193e56943efd509977f8 *man/seqStat.Rd
+a6767e83ec9592f2d9010242c27e0acc *man/slideAnalyses.Rd
+0fd14c4652e09ef5618b371b9be57618 *man/slideBoxplots.Rd
+ea4e754f069b34fe3fc3d9e63caa9852 *man/slideNucDiag.Rd
+53579bb4209920f371ff0387f6785195 *man/slidingWindow.Rd
+ca9ac3a2da2faaa5da664ffadbda1822 *man/spider-package.Rd
+84d9cd7f8592d23479bd367b572ca17d *man/sppDist.Rd
+ba0afcd2a953082fbf615c05e0c88969 *man/sppDistMatrix.Rd
+7cb8795e205f98427cf6cce5c5489ea6 *man/sppVector.Rd
+37652708d38147697e7bf28e0966d677 *man/tajima.K.Rd
+d19607e2d857b9f66ecc031952c79900 *man/tclust.Rd
+625a1e3dc27added71281f730f25fa34 *man/threshOpt.Rd
+d64605bb7cd5b6a1dc477aca67ac7249 *man/tiporder.Rd
+d4433bc014f57a1985367cd049adde85 *man/titv.Rd
+e87a692944d243cf97d391354e99b50e *man/tree.comp.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,14 @@
+# Remove the previous line if you edit this file
+
+# Export all names
+exportPattern("^[^\\.]")
+
+# Import all packages listed as Imports or Depends
+importFrom(ape, dist.dna, base.freq, seg.sites)
+importFrom(pegas, haplotype)
+importFrom(graphics, plot)
+
+
+#S3 methods
+S3method(plot, slidWin)
+S3method(plot, haploAccum)
diff --git a/NEWS b/NEWS
@@ -0,0 +1,66 @@
+############################################
+                                SPIDER VERSION 1.3-0
+				Released 25 December 2013
+
+
+-- read.GB: Space removed from around the pipe in the default naming scheme.
+-- chaoHaplo: 95% confidence interval calculated around the estimated number of haplotypes.
+-- New functions: cgraph, ordinDNA and plot.ordinDNA
+-- namespace issues resolved
+
+############################################
+                                SPIDER VERSION 1.2-0
+				Released 17 November 2012
+
+
+-- Functions added: heatmapSpp
+-- read.BOLD: Function completely rewritten to compensate for BOLD deprecating their eFetch system
+
+############################################
+                                SPIDER VERSION 1.1-5
+				Released 11 October 2012
+
+
+-- monophyly: Code changed from calling the ape C routine "bipartition" directly, to using prop.part()
+-- monophylyBoot: Code changed from calling the ape C routine "bipartition" directly, to using prop.part()
+-- tree.comp: Code changed from calling the ape C routine "bipartition" directly, to using prop.part()
+
+############################################
+                                SPIDER VERSION 1.1-4
+				Released 17 June 2012
+
+
+-- sarkar: The addition of a dataset containing the dummy sequences published in Sarkar et al to illustrate the different categories of diagnostic nucleotides.
+
+############################################
+                                SPIDER VERSION 1.1-3
+				Released 29 March 2012
+
+
+-- rankSlidWin: bug fixed that caused an error when multiple names were given to "criteria"
+
+############################################
+                                SPIDER VERSION 1.1-2
+				Released 10 March 2012
+
+
+-- read.GB: modified to work with eFetch version 2.0 (http://www.ncbi.nlm.nih.gov/books/NBK25499/)
+-- read.BOLD: help file updated
+-- search.BOLD: Modified to work with BOLDsystems v 3.0
+-- Functions added: stats.BOLD
+
+############################################
+                                SPIDER VERSION 1.1-1
+				Released 27 November 2011
+
+
+-- tiporder: "labels" option added to toggle between returning the labels (when "labels"=TRUE), or returning the indices (when "labels"=FALSE).
+
+############################################
+                                SPIDER VERSION 1.1-0
+				Released 6 November 2011
+
+-- Initial release onto CRAN
+-- No known issues
+
+############################################
diff --git a/R/bestCloseMatch.R b/R/bestCloseMatch.R
@@ -0,0 +1,15 @@
+bestCloseMatch <- function(distobj, sppVector, threshold = 0.01){
+	distobj <- as.matrix(distobj)
+	diag(distobj) <- NA
+	output <- rep(NA, length(sppVector))
+	aa <- apply(distobj, MARGIN=2, FUN=function(x) which(x == min(x, na.rm = TRUE)))
+	bb <- lapply(aa, function(x) unique(sppVector[x]))
+	cc <- sppVector == bb
+	dd <- sapply(1:length(sppVector), function(x) sppVector[x] %in% bb[[x]])
+	ee <- apply(distobj, MARGIN=2, FUN=function(x) min(x, na.rm = TRUE))
+	output[which(cc & dd)] <- "correct"
+	output[which(!cc & !dd)] <- "incorrect"
+	output[which(!cc & dd)] <- "ambiguous"
+	output[which(ee > threshold)] <- "no id"
+	output
+}
diff --git a/R/cgraph.R b/R/cgraph.R
@@ -0,0 +1,8 @@
+cgraph <- function(x, y = NULL, ...){
+	if(!is.null(y)) mat <- cbind(x, y) else mat <- x
+	dd <- dim(mat)[1]
+	if(dd < 2) return()
+	if(is.null(dd)) return()
+	ddComb <- combn(1:dd, 2)
+	segments(mat[ddComb[1,], 1], mat[ddComb[1,], 2], mat[ddComb[2,], 1], mat[ddComb[2,], 2], ...)
+}
diff --git a/R/chaoHaplo.R b/R/chaoHaplo.R
@@ -0,0 +1,22 @@
+chaoHaplo <- function(DNAbin){
+	haplo <- haplotype(DNAbin)
+	i <- if(length(grep("[-|?|r|y|m|k|w|s|b|d|h|v|n]", DNAbin)) > 0) message("There are missing or ambiguous data, which may cause an overestimation of the number of haplotypes")
+	nums <- sapply(attr(haplo, "index"), length)
+	n <- dim(DNAbin)[1]
+	h <- length(nums)
+	s <- length(which(nums == 1))
+	d <- length(which(nums == 2))
+	#Estimated number of haplotypes (From Vink et al 2011)
+	if(d > 0) est <- h + ((s^2)/(2 * d)) else est <- h + ((s * (s - 1))/2)
+
+	#Confidence intervals (Modified from Chao 1989)
+	varest <- est/((h / (est - h)) - n/est)
+	C <- exp(1.96 * sqrt(log( 1 + (varest / ((est - h)^2)))))
+
+	low <- h + (est - h)/C
+	high <- h + (est - h) * C
+
+	c(est, low, high)
+}
+
+
diff --git a/R/checkDNA.R b/R/checkDNA.R
@@ -0,0 +1,8 @@
+checkDNA <-
+function(DNAbin, gapsAsMissing = TRUE){
+	if(gapsAsMissing) bases <- c(2, 240, 4) else bases <- c(2, 240)
+	if(is.list(DNAbin)) output <- sapply(DNAbin, function(x) length(which(as.numeric(x) %in% bases)))
+	if(is.matrix(DNAbin)) output <- apply(DNAbin, MARGIN = 1, FUN = function(x) length(which(as.numeric(x) %in% bases)))
+output
+}
+
diff --git a/R/dataStat.R b/R/dataStat.R
@@ -0,0 +1,9 @@
+dataStat <- function(sppVector, genVector, thresh = 5){
+unSpp <- unique(sppVector)
+unGen <- unique(genVector)
+sppnum <- sapply(unSpp, function(x) length(which(sppVector %in% x)))
+tab <- table(NULL)
+tab[1:7] <- c(length(unGen), length(unSpp), min(sppnum), max(sppnum), median(sppnum), mean(sppnum), length(which(sppnum < thresh)))
+names(tab) <- c("Genera", "Species", "Min", "Max", "Median", "Mean", "Thresh" )
+round(tab, digits=0)
+}
diff --git a/R/haploAccum.R b/R/haploAccum.R
@@ -0,0 +1,76 @@
+haploAccum<- function (DNAbin, method = "random", permutations = 100, ...){
+
+    if (is.list(DNAbin)) DNAbin <- as.matrix(DNAbin)	# If seq DNAbin is list, turn it matrix
+    i <- (length(grep("[-|?|r|y|m|k|w|s|b|d|h|v|n]", DNAbin))>0)
+        message("There are missing or ambiguous data, which may cause an overestimation of the number of haplotypes")
+
+    seq_names<-as.vector(rownames(DNAbin))	# Create a vector of seq name
+    nms.dat <- deparse(substitute(DNAbin))		# Create a character object from seq DNAbina
+    rownames(DNAbin) <- NULL				# Remove row names
+    y <- apply(DNAbin, 1, rawToChar)		# Translate sequences
+    n <- length(y)				# Number of sequences
+    keep <- nhaplo <- 1L		# To remove?
+    no <- list(1L)			# To remove?
+    for (i in 2:n) {
+        already.seen <- FALSE
+        j <- 1L
+        while (j <= nhaplo) {
+            if (y[i] == y[keep[j]]) {
+                no[[j]] <- c(no[[j]], i)
+                already.seen <- TRUE
+                break
+            }
+            j <- j + 1L
+        }
+        if (!already.seen) {
+            keep <- c(keep, i)
+            nhaplo <- nhaplo + 1L
+            no[[nhaplo]] <- i
+        }
+    }
+    obj <- DNAbin[keep, ]
+    rownames(obj) <- as.character(as.roman(1:length(keep)))
+    class(obj) <- c("haplotype", "DNAbin")
+    attr(obj, "index") <- no
+    attr(obj, "from") <- nms.dat
+    n_haplo<-length(no)
+    z<- matrix(nrow=length(seq_names),ncol=n_haplo,0)
+    colnames(z)<-as.vector(unlist(attributes(obj)$dimnames[1]))
+    rownames(z)<-seq_names
+    for (i in c(1:n_haplo)){
+	for (j in c(1:length(as.vector(unlist(attributes(obj)$index[i]))))){
+		z[unlist(attributes(obj)$index[i])[j],i]<- 1
+	}
+    }
+    z <- z[, colSums(z) > 0, drop=FALSE]
+    n <- nrow(z)
+    h <- ncol(z)
+    sequences <- 1:n
+    if (h == 1) {
+	  z <- t(z)
+        n <- nrow(z)
+        h <- ncol(z)
+    }
+    accumulator <- function(x, sequences) {
+        rowSums(apply(x[sequences, ], 2, cumsum) > 0)
+    }
+    METHODS <- c("collector", "random")
+    method <- match.arg(method, METHODS)
+    haploaccum <- sdaccum <- perm <- NULL
+    if (n == 1)
+        message("There is only 1 sequence. No accumulation was possible")
+    switch(method, collector = {
+        haploaccum <- accumulator(z, sequences) },
+	random = {
+        perm <- array(dim = c(n, permutations))
+        for (i in 1:permutations) {
+            perm[, i] <- accumulator(z, sample(n))
+        }
+        haploaccum <- apply(perm, 1, mean)
+        sdaccum <- apply(perm, 1, sd)
+    })
+    out <- list(call = match.call(), method = method, sequences = sequences,
+                n.haplotypes = haploaccum, sd = sdaccum, perm = perm)
+    class(out) <- "haploAccum"
+    out
+}
diff --git a/R/heatmapSpp.R b/R/heatmapSpp.R
@@ -0,0 +1,12 @@
+heatmapSpp <- function(distObj, sppVector, col = NULL, axisLabels = NULL){
+	if (!is.matrix(distObj)) distObj <- as.matrix(distObj)
+
+	if (is.null(col)) cols <- c("#D33F6A", "#D95260", "#DE6355", "#E27449", "#E6833D", "#E89331", "#E9A229", "#EAB12A", "#E9C037", "#E7CE4C", "#E4DC68", "#E2E6BD") else cols <- col
+
+	if (is.null(axisLabels)) axisLabels <- sppVector[order(sppVector)] else axisLabels <- axisLabels[order(sppVector)]
+
+	image(distObj[order(sppVector), order(sppVector)], col = cols, xaxt = "n", yaxt = "n")
+	axis(1, at = seq(0, 1, length.out = dim(distObj)[1]), labels = axisLabels, las = 2)
+	axis(2, at = seq(0, 1, length.out = dim(distObj)[1]), labels = axisLabels, las = 2)
+
+}
diff --git a/R/is.ambig.R b/R/is.ambig.R
@@ -0,0 +1,8 @@
+is.ambig <-
+function(DNAbin){
+   x <- as.matrix(DNAbin)
+   bases <- c(136, 72, 40, 24)
+   ambig <- apply(x, 2, FUN=function(x) sum(as.numeric(!as.numeric(x) %in% bases)))
+   ambig > 0
+}
+
diff --git a/R/localMinima.R b/R/localMinima.R
@@ -0,0 +1,10 @@
+localMinima <- function(distobj){
+	den <- density(distobj)
+	a <- rep(NA, length(den$y)-2)
+	for(i in 2:(length(den$y)-1)) a[i-1] <- den$y[i-1] > den$y[i] & den$y[i+1] > den$y[i]
+	den$localMinima <- den$x[which(a)]
+	den$data.name <- deparse(substitute(distobj))
+	den$call <- paste("density.default(", den$data.name, ")", sep="")
+	print(den$localMinima)
+	invisible(den)
+}
diff --git a/R/maxInDist.R b/R/maxInDist.R
@@ -0,0 +1,14 @@
+maxInDist <- 
+function(distobj, sppVector = NULL, propZero = FALSE, rmNA = FALSE){
+    dat <- as.matrix(distobj)
+    if(length(sppVector) > 0) dimnames(dat)[[1]] <- sppVector
+    conSpecDists <- list()
+    for (i in 1:length(dimnames(dat)[[1]])) {
+        conSpec <- dimnames(dat)[[1]] == dimnames(dat)[[1]][i]
+        conSpecDists[[i]] <- max(dat[conSpec, i], na.rm = rmNA)
+    }
+    if (propZero) 
+        output <- length(which(unlist(conSpecDists) == 0))/length(unlist(conSpecDists))
+    else output <- unlist(conSpecDists)
+    output
+}