Skip to content

Commit

Permalink
version 0.8.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Scott Sherrill-Mix authored and cran-robot committed May 6, 2021
1 parent ad72a05 commit 94f19c8
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 39 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ Authors@R: c(person("Scott", "Sherrill-Mix", role = c("aut", "cre"),
email = "shescott@upenn.edu"))
BugReports: https://github.com/sherrillmix/taxonomizr/issues
Description: Functions for assigning taxonomy to NCBI accession numbers and taxon IDs based on NCBI's accession2taxid and taxdump files. This package allows the user to downloads NCBI data dumps and create a local database for fast and local taxonomic assignment.
Version: 0.7.1
Date: 2021-04-22
Version: 0.8.0
Date: 2021-05-05
Suggests: testthat, knitr, rmarkdown
Depends: R (>= 3.0.0)
Imports: RSQLite, R.utils, data.table
Imports: RSQLite, R.utils, data.table, curl
RoxygenNote: 7.1.1
VignetteBuilder: knitr
NeedsCompilation: yes
Packaged: 2021-04-22 20:16:20 UTC; scott
Packaged: 2021-05-05 15:26:16 UTC; scott
Repository: CRAN
Date/Publication: 2021-04-23 15:10:02 UTC
Date/Publication: 2021-05-06 23:20:02 UTC
10 changes: 5 additions & 5 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
5186759d07fd139b9c31ad797f318f11 *DESCRIPTION
b25528851520a8dfa654383d428a48d5 *DESCRIPTION
b234ee4d69f5fce4486a80fdaf4a4263 *LICENSE
688088d8d0debe277691f49257653c36 *NAMESPACE
25ac151270e37bdcfb186f9e0819a0be *R/taxa.R
7e7469575ffb3e5e3a4649f8c3e41856 *R/taxa.R
07a37ce6e5e561978e9fc0dc2d605722 *README.md
72dc70c2e6a5b4d1cce5c702a45f72f3 *build/vignette.rds
d1c065700946a6c0d1b7cde47b409a9e *inst/doc/usage.R
Expand All @@ -10,11 +10,11 @@ d1c065700946a6c0d1b7cde47b409a9e *inst/doc/usage.R
2c6938a3b6c0303311c55f0e328ff746 *inst/testdata/fakeNamesNodes.tar.gz
78ef5645c9c3719c5b9630c65fc46d62 *man/accessionToTaxa.Rd
fc39f17d296a65ca038d98b5b3a69d28 *man/condenseTaxa.Rd
5abd188f372981c90221c3f1b8caf705 *man/getAccession2taxid.Rd
24895fc5e454a5f38201a2d56e69d589 *man/getAccession2taxid.Rd
c43ce37f96837e90311d5cee78c678e8 *man/getAccessions.Rd
af0074232ee13d06861fd9aac069d226 *man/getId.Rd
f540636f46822a6c1447da68507bdd93 *man/getId2.Rd
4dbf0c375b0cb150abae5982b9c30712 *man/getNamesAndNodes.Rd
7153b0284b6fbe4e0ca5e7f9e7951556 *man/getNamesAndNodes.Rd
cef82b6a160abc6bbc8a43de762df7e4 *man/getRawTaxonomy.Rd
5324e8df544a6af8f1e0dfd874ce9610 *man/getTaxonomy.Rd
939e0aeca4cdde8f33aa0604be8f61a5 *man/getTaxonomy2.Rd
Expand All @@ -33,5 +33,5 @@ d47fc7ed590032dbfa141b6952d998d3 *man/trimTaxa.Rd
973c222136ffbf4008ff747d643bc6a3 *src/taxaTrim.c
1b509a76cc18244a3847a2ab27ab9f77 *src/taxonomizr-init.c
09570ff5fc4fe1c8f81b47295a3faafb *tests/testthat.R
64b8a89268643477b85de702a03706ce *tests/testthat/test_taxa.R
f4b2cac47fc0a9bf6f005801db45fbe5 *tests/testthat/test_taxa.R
7ba485c7025dc680e494a717eaed2bcd *vignettes/usage.Rmd
20 changes: 6 additions & 14 deletions R/taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,8 @@ getParentNodes<-function(ids,sqlFile='nameNode.sqlite'){
checkDownloadMd5<-function(url,file,errorIfNoMd5=FALSE){
md5<-sprintf('%s.md5',url)
tmp<-tempfile()
check<-tryCatch(utils::download.file(md5,tmp,mode='wb'),warning=function(xx)1,error=function(xx)1)
if(check!=0){
check<-tryCatch(curl::curl_download(md5,tmp,mode='wb',quiet=FALSE),warning=function(xx)FALSE,error=function(xx)FALSE)
if(check==FALSE){
if(errorIfNoMd5)stop("Problem downloading md5 ",md5)
else return(TRUE)
}
Expand Down Expand Up @@ -782,7 +782,6 @@ condenseTaxa<-function(taxaTable,groupings=rep(1,nrow(taxaTable))){
#' @param outDir the directory to put names.dmp and nodes.dmp in
#' @param url the url where taxdump.tar.gz is located
#' @param fileNames the filenames desired from the tar.gz file
#' @param timeout time in seconds for the download to time out
#' @return a vector of file path strings of the locations of the output files
#' @seealso \code{\link{read.nodes.sql}}, \code{\link{read.names.sql}}
#' @references \url{ftp://ftp.ncbi.nih.gov/pub/taxonomy/}, \url{https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/}
Expand All @@ -791,10 +790,7 @@ condenseTaxa<-function(taxaTable,groupings=rep(1,nrow(taxaTable))){
#' \dontrun{
#' getNamesAndNodes()
#' }
getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz',fileNames=c('names.dmp','nodes.dmp'),timeout=36000){
oldTimeout<-getOption('timeout')
on.exit(options('timeout'=oldTimeout))
options('timeout'=timeout)
getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz',fileNames=c('names.dmp','nodes.dmp')){
outFiles<-file.path(outDir,fileNames)
if(all(file.exists(outFiles))){
message(paste(outFiles,collapse=', '),' already exist. Delete to redownload')
Expand All @@ -804,7 +800,7 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
tmp<-tempfile()
dir.create(tmp)
tarFile<-file.path(tmp,base)
utils::download.file(url,tarFile,mode='wb')
curl::curl_download(url,tarFile,mode='wb',quiet=FALSE)
if(!checkDownloadMd5(url,tarFile))stop('Downloaded file does not match ',url,' File corrupted or download ended early?')
utils::untar(tarFile,fileNames,exdir=tmp,tar='internal')
tmpFiles<-file.path(tmp,fileNames)
Expand All @@ -822,7 +818,6 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
#' @param outDir the directory to put the accession2taxid.gz files in
#' @param baseUrl the url of the directory where accession2taxid.gz files are located
#' @param types the types if accession2taxid.gz files desired where type is the prefix of xxx.accession2taxid.gz. The default is to download all nucl_ accessions. For protein accessions, try \code{types=c('prot')}.
#' @param timeout time in seconds for the download to time out
#' @return a vector of file path strings of the locations of the output files
#' @seealso \code{\link{read.accession2taxid}}
#' @references \url{ftp://ftp.ncbi.nih.gov/pub/taxonomy/}, \url{https://www.ncbi.nlm.nih.gov/Sequin/acc.html}
Expand All @@ -837,10 +832,7 @@ getNamesAndNodes<-function(outDir='.',url='ftp://ftp.ncbi.nih.gov/pub/taxonomy/t
#'
#' getAccession2taxid()
#' }
getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/',types=c('nucl_gb','nucl_wgs'),timeout=36000){
oldTimeout<-getOption('timeout')
on.exit(options('timeout'=oldTimeout))
options('timeout'=timeout)
getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/',types=c('nucl_gb','nucl_wgs')){
message('This can be a big (several gigabytes) download. Please be patient and use a fast connection.')
fileNames<-sprintf('%s.accession2taxid.gz',types)
outFiles<-file.path(outDir,fileNames)
Expand All @@ -851,7 +843,7 @@ getAccession2taxid<-function(outDir='.',baseUrl='ftp://ftp.ncbi.nih.gov/pub/taxo
if(!substring(baseUrl,nchar(baseUrl)) %in% c('/','\\'))baseUrl<-sprintf('%s/',baseUrl)
urls<-paste(baseUrl,fileNames,sep='')
mapply(function(xx,yy){
utils::download.file(xx,yy,mode='wb')
curl::curl_download(xx,yy,mode='wb',quiet=FALSE)
if(!checkDownloadMd5(xx,yy))stop('Downloaded file does not match ',xx,' File corrupted or download ended early?')
},urls,outFiles)
return(outFiles)
Expand Down
5 changes: 1 addition & 4 deletions man/getAccession2taxid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions man/getNamesAndNodes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 0 additions & 7 deletions tests/testthat/test_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -601,9 +601,6 @@ test_that("Test getNamesAndNodes",{
tmp<-tempfile()
with_mock(`file.copy`=function(...)TRUE,expect_error(getNamesAndNodes(tmp,fakeFile),'copying'))
if(.Platform$OS.type == "windows")file.remove('fakeNamesNodes.tar')
options(timeout=46)
expect_error(getNamesAndNodes(tmp,fakeFile,'NOTREAL.FILE',timeout=2000))
expect_equal(getOption('timeout'),46)
tmp<-tempfile()
dir.create(tmp)
newFake<-file.path(tmp,'fake')
Expand All @@ -628,10 +625,6 @@ test_that("Test getAccession2taxid",{
expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),NA)
expect_equal(sort(list.files(tmp2,'accession2taxid.gz$')),sort(targets))
expect_message(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),'exist')
options(timeout=59)
expect_message(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX'),timeout=1000),'exist')
expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),NA)
expect_equal(getOption('timeout'),59)
file.remove(list.files(tmp2,'^nucl_.*.gz$',full.names=TRUE))
writeLines('NOTREALHASH EXTRATEXT',sprintf('%s.md5',file.path(tmp,targets[1])))
expect_error(getAccession2taxid(tmp2,baseUrl=sprintf('file://%s',tmp),types=c('nucl_XxXx','nucl_XyXyX')),'match')
Expand Down

0 comments on commit 94f19c8

Please sign in to comment.