version 0.8.0

cran · May 6, 2021 · 94f19c8 · 94f19c8
1 parent ad72a05
commit 94f19c8
Show file tree

Hide file tree

Showing 6 changed files with 18 additions and 39 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,14 +9,14 @@ Authors@R: c(person("Scott", "Sherrill-Mix", role = c("aut", "cre"),
                      email = "shescott@upenn.edu"))
 BugReports: https://github.com/sherrillmix/taxonomizr/issues
 Description: Functions for assigning taxonomy to NCBI accession numbers and taxon IDs based on NCBI's accession2taxid and taxdump files. This package allows the user to downloads NCBI data dumps and create a local database for fast and local taxonomic assignment.
-Version: 0.7.1
-Date: 2021-04-22
+Version: 0.8.0
+Date: 2021-05-05
 Suggests: testthat, knitr, rmarkdown
 Depends: R (>= 3.0.0)
-Imports: RSQLite, R.utils, data.table
+Imports: RSQLite, R.utils, data.table, curl
 RoxygenNote: 7.1.1
 VignetteBuilder: knitr
 NeedsCompilation: yes
-Packaged: 2021-04-22 20:16:20 UTC; scott
+Packaged: 2021-05-05 15:26:16 UTC; scott
 Repository: CRAN
-Date/Publication: 2021-04-23 15:10:02 UTC
+Date/Publication: 2021-05-06 23:20:02 UTC
diff --git a/MD5 b/MD5
@@ -1,7 +1,7 @@
-5186759d07fd139b9c31ad797f318f11 *DESCRIPTION
+b25528851520a8dfa654383d428a48d5 *DESCRIPTION
 b234ee4d69f5fce4486a80fdaf4a4263 *LICENSE
 688088d8d0debe277691f49257653c36 *NAMESPACE
-25ac151270e37bdcfb186f9e0819a0be *R/taxa.R
+7e7469575ffb3e5e3a4649f8c3e41856 *R/taxa.R
 07a37ce6e5e561978e9fc0dc2d605722 *README.md
 72dc70c2e6a5b4d1cce5c702a45f72f3 *build/vignette.rds
 d1c065700946a6c0d1b7cde47b409a9e *inst/doc/usage.R
@@ -10,11 +10,11 @@ d1c065700946a6c0d1b7cde47b409a9e *inst/doc/usage.R
 2c6938a3b6c0303311c55f0e328ff746 *inst/testdata/fakeNamesNodes.tar.gz
 78ef5645c9c3719c5b9630c65fc46d62 *man/accessionToTaxa.Rd
 fc39f17d296a65ca038d98b5b3a69d28 *man/condenseTaxa.Rd
-5abd188f372981c90221c3f1b8caf705 *man/getAccession2taxid.Rd
+24895fc5e454a5f38201a2d56e69d589 *man/getAccession2taxid.Rd
 c43ce37f96837e90311d5cee78c678e8 *man/getAccessions.Rd
 af0074232ee13d06861fd9aac069d226 *man/getId.Rd
 f540636f46822a6c1447da68507bdd93 *man/getId2.Rd
-4dbf0c375b0cb150abae5982b9c30712 *man/getNamesAndNodes.Rd
+7153b0284b6fbe4e0ca5e7f9e7951556 *man/getNamesAndNodes.Rd
 cef82b6a160abc6bbc8a43de762df7e4 *man/getRawTaxonomy.Rd
 5324e8df544a6af8f1e0dfd874ce9610 *man/getTaxonomy.Rd
 939e0aeca4cdde8f33aa0604be8f61a5 *man/getTaxonomy2.Rd
@@ -33,5 +33,5 @@ d47fc7ed590032dbfa141b6952d998d3 *man/trimTaxa.Rd
 973c222136ffbf4008ff747d643bc6a3 *src/taxaTrim.c
 1b509a76cc18244a3847a2ab27ab9f77 *src/taxonomizr-init.c
 09570ff5fc4fe1c8f81b47295a3faafb *tests/testthat.R
-64b8a89268643477b85de702a03706ce *tests/testthat/test_taxa.R
+f4b2cac47fc0a9bf6f005801db45fbe5 *tests/testthat/test_taxa.R
 7ba485c7025dc680e494a717eaed2bcd *vignettes/usage.Rmd
diff --git a/R/taxa.R b/R/taxa.R
@@ -460,8 +460,8 @@ getParentNodes<-function(ids,sqlFile='nameNode.sqlite'){
 checkDownloadMd5<-function(url,file,errorIfNoMd5=FALSE){
   md5<-sprintf('%s.md5',url)
   tmp<-tempfile()
-  check<-tryCatch(utils::download.file(md5,tmp,mode='wb'),warning=function(xx)1,error=function(xx)1)
-  if(check!=0){
+  check<-tryCatch(curl::curl_download(md5,tmp,mode='wb',quiet=FALSE),warning=function(xx)FALSE,error=function(xx)FALSE)
+  if(check==FALSE){
     if(errorIfNoMd5)stop("Problem downloading md5 ",md5)
     else return(TRUE)
   }
@@ -782,7 +782,6 @@ condenseTaxa<-function(taxaTable,groupings=rep(1,nrow(taxaTable))){
 #' @param outDir the directory to put names.dmp and nodes.dmp in
 #' @param url the url where taxdump.tar.gz is located
 #' @param fileNames the filenames desired from the tar.gz file
-#' @param timeout time in seconds for the download to time out
 #' @return a vector of file path strings of the locations of the output files
 #' @seealso \code{\link{read.nodes.sql}}, \code{\link{read.names.sql}}
 #' @references \url{ftp://ftp.ncbi.nih.gov/pub/taxonomy/}, \url{https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/}
@@ -791,10 +790,7 @@ condenseTaxa<-function(taxaTable,groupings=rep(1,nrow(taxaTable))){
 #' \dontrun{
 #'   getNamesAndNodes()
 #' }
-getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz',fileNames=c('names.dmp','nodes.dmp'),timeout=36000){
-  oldTimeout<-getOption('timeout')
-  on.exit(options('timeout'=oldTimeout))
-  options('timeout'=timeout)
+getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz',fileNames=c('names.dmp','nodes.dmp')){
   outFiles<-file.path(outDir,fileNames)
   if(all(file.exists(outFiles))){
     message(paste(outFiles,collapse=', '),' already exist. Delete to redownload')
@@ -804,7 +800,7 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
   tmp<-tempfile()
   dir.create(tmp)
   tarFile<-file.path(tmp,base)
-  utils::download.file(url,tarFile,mode='wb')
+  curl::curl_download(url,tarFile,mode='wb',quiet=FALSE)
   if(!checkDownloadMd5(url,tarFile))stop('Downloaded file does not match ',url,' File corrupted or download ended early?')
   utils::untar(tarFile,fileNames,exdir=tmp,tar='internal')
   tmpFiles<-file.path(tmp,fileNames)
@@ -822,7 +818,6 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
 #' @param outDir the directory to put the accession2taxid.gz files in
 #' @param baseUrl the url of the directory where accession2taxid.gz files are located
 #' @param types the types if accession2taxid.gz files desired where type is the prefix of xxx.accession2taxid.gz. The default is to download all nucl_ accessions. For protein accessions, try \code{types=c('prot')}.
-#' @param timeout time in seconds for the download to time out
 #' @return a vector of file path strings of the locations of the output files
 #' @seealso \code{\link{read.accession2taxid}}
 #' @references \url{ftp://ftp.ncbi.nih.gov/pub/taxonomy/}, \url{https://www.ncbi.nlm.nih.gov/Sequin/acc.html}
@@ -837,10 +832,7 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
 #'
 #'   getAccession2taxid()
 #' }
-getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/',types=c('nucl_gb','nucl_wgs'),timeout=36000){
-  oldTimeout<-getOption('timeout')
-  on.exit(options('timeout'=oldTimeout))
-  options('timeout'=timeout)
+getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/',types=c('nucl_gb','nucl_wgs')){
   message('This can be a big (several gigabytes) download. Please be patient and use a fast connection.')
   fileNames<-sprintf('%s.accession2taxid.gz',types)
   outFiles<-file.path(outDir,fileNames)
@@ -851,7 +843,7 @@ getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxo
   if(!substring(baseUrl,nchar(baseUrl)) %in% c('/','\\'))baseUrl<-sprintf('%s/',baseUrl)
   urls<-paste(baseUrl,fileNames,sep='')
   mapply(function(xx,yy){
-    utils::download.file(xx,yy,mode='wb')
+    curl::curl_download(xx,yy,mode='wb',quiet=FALSE)
     if(!checkDownloadMd5(xx,yy))stop('Downloaded file does not match ',xx,' File corrupted or download ended early?')
   },urls,outFiles)
   return(outFiles)

diff --git a/man/getAccession2taxid.Rd b/man/getAccession2taxid.Rd
diff --git a/man/getNamesAndNodes.Rd b/man/getNamesAndNodes.Rd
diff --git a/tests/testthat/test_taxa.R b/tests/testthat/test_taxa.R
@@ -601,9 +601,6 @@ test_that("Test getNamesAndNodes",{
   tmp<-tempfile()
   with_mock(`file.copy`=function(...)TRUE,expect_error(getNamesAndNodes(tmp,fakeFile),'copying'))
   if(.Platform$OS.type == "windows")file.remove('fakeNamesNodes.tar')
-  options(timeout=46)
-  expect_error(getNamesAndNodes(tmp,fakeFile,'NOTREAL.FILE',timeout=2000))
-  expect_equal(getOption('timeout'),46)
   tmp<-tempfile()
   dir.create(tmp)
   newFake<-file.path(tmp,'fake')
@@ -628,10 +625,6 @@ test_that("Test getAccession2taxid",{
   expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),NA)
   expect_equal(sort(list.files(tmp2,'accession2taxid.gz$')),sort(targets))
   expect_message(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),'exist')
-  options(timeout=59)
-  expect_message(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX'),timeout=1000),'exist')
-  expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),NA)
-  expect_equal(getOption('timeout'),59)
   file.remove(list.files(tmp2,'^nucl_.*.gz$',full.names=TRUE))
   writeLines('NOTREALHASH EXTRATEXT',sprintf('%s.md5',file.path(tmp,targets[1])))
   expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),'match')