Skip to content

Commit

Permalink
correction because of changing XML TOC
Browse files Browse the repository at this point in the history
  • Loading branch information
mmatyi committed Mar 25, 2024
1 parent ec4098a commit d0c1657
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 29 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
^\.Rproj\.user$
^docs$
^\.github$
^\.ipynb
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: restatapi
Type: Package
Title: Search and Retrieve Data from Eurostat Database
Date: 2024-03-14
Version: 0.22.9
Date: 2024-03-25
Version: 0.23.0
Encoding: UTF-8
Authors@R: c(person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre")),
person("Sebastian", "Weinand", role = "ctb"))
Expand Down
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# restatapi 0.23.0

- correction of `get_eurostat_toc()` function and the functions using the `check_toc` options because the content of the XML TOC has changed

# restatapi 0.22.9

- correction of caching when to DSD downloaded with different languages
Expand Down Expand Up @@ -27,11 +31,11 @@

# restatapi 0.22.3

- additional check in the get_eurostat_dsd() and get_eurostat_codelist() for failing writing data to disk because of failing network connection
- additional check in the `get_eurostat_dsd()` and `get_eurostat_codelist()` for failing writing data to disk because of failing network connection

# restatapi 0.22.2

- correcting the get_compressed_sdmx() function not closing connections
- correcting the `get_compressed_sdmx()` function not closing connections
- updating examples and tests

# restatapi 0.22.1
Expand Down
2 changes: 1 addition & 1 deletion R/extract_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ extract_data<-function(xml_lf,keep_flags=FALSE,stringsAsFactors=FALSE,bulk=TRUE,
dv<-xml2::xml_attrs(xml2::xml_children(xml_lf))
if (keep_flags){
flagc<-switch(rav,"1"="OBS_STATUS","2"="OBS_FLAG")
if (check_toc) {flagc<-"OBS_STATUS"}
# if (check_toc) {flagc<-"OBS_STATUS"}
cn<-c("TIME_PERIOD","OBS_VALUE",flagc)
} else {
cn<-c("TIME_PERIOD","OBS_VALUE")
Expand Down
10 changes: 5 additions & 5 deletions R/get_eurostat_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -307,30 +307,30 @@ get_eurostat_raw <- function(id,
# }
} else if (mode=="xml"){
format<-switch(rav, "1" = "zip", "2" = "gz")
if (check_toc) {format<-"zip"}
# if (check_toc) {format<-"zip"}
if (verbose) {message("get_eurostat_raw - file format: ",format)}
sdmx_file<-restatapi::get_compressed_sdmx(bulk_url,verbose=verbose,format=format)
if(!is.null(sdmx_file)){
xml_mark<-switch(rav, "1" = ".//data:Series", "2" = ".//Series")
if (check_toc) {xml_mark<-".//data:Series"}
# if (check_toc) {xml_mark<-".//data:Series"}
xml_leafs<-xml2::xml_find_all(sdmx_file,xml_mark)
if (verbose) {message("get_eurostat_raw - class(xml_leafs): ",class(xml_leafs),
"\nget_eurostat_raw - number of nodes: ",length(xml_leafs),
"\nget_eurostat_raw - number of cores: ",getOption("restatapi_cores",1L))}
if (Sys.info()[['sysname']]=='Windows'){
if (getOption("restatapi_cores",1L)==1) {
if (verbose) message("No parallel")
restat_raw<-data.table::rbindlist(lapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc))
restat_raw<-data.table::rbindlist(lapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors))
} else {
xml_leafs<-as.character(xml_leafs)
cl<-parallel::makeCluster(getOption("restatapi_cores",1L))
parallel::clusterEvalQ(cl,require(xml2))
parallel::clusterExport(cl,c("extract_data"))
restat_raw<-data.table::rbindlist(parallel::parLapply(cl,xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc))
restat_raw<-data.table::rbindlist(parallel::parLapply(cl,xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors))
parallel::stopCluster(cl)
}
}else{
restat_raw<-data.table::rbindlist(parallel::mclapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc,mc.cores=getOption("restatapi_cores",1L)))
restat_raw<-data.table::rbindlist(parallel::mclapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,mc.cores=getOption("restatapi_cores",1L)))
}
} else{
message("Could not download the SDMX file, use the verbose option to see the exact cause of the error.")
Expand Down
11 changes: 5 additions & 6 deletions R/get_eurostat_toc.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
#' \code{metadata.sdmx}\tab The link to the metadata in SDMX format, and this column exists only if the
#' download \code{mode} is "xml"\cr
#' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk
#' download facility and this column exists only if the download \code{mode} is "xml"\cr
#' \code{downloadLink.sdmx}\tab The link to the whole dataset/table in SDMX format in the bulk download
#' facility and this column exists only if the download \code{mode} is "xml"
#' download facility and this column exists only if the download \code{mode} is "xml"
#' }
#' @export
#' @seealso \code{\link{search_eurostat_toc}}, \code{\link{get_eurostat_dsd}}, \code{\link{get_eurostat_raw}}, \code{\link{get_eurostat_bulk}}, \code{\link{get_eurostat_data}}.
Expand Down Expand Up @@ -65,6 +63,7 @@ get_eurostat_toc<-function(mode="xml",
verbose=FALSE,...) {
toc<-xml_leafs<-NULL
tbc<-TRUE
verbose<-verbose|getOption("restatapi_verbose",FALSE)
if (verbose) {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
if((!exists(".restatapi_env")|(length(list(...))>0))){
if ((length(list(...))>0)) {
Expand All @@ -81,7 +80,7 @@ get_eurostat_toc<-function(mode="xml",
update_cache<-update_cache|getOption("restatapi_update",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if(any(grepl("get_eurostat_bulk|get_eurostat_data|get_eurostat_raw",as.character(sys.calls()),perl=TRUE))) {update_cache<-FALSE}
verbose<-verbose|getOption("restatapi_verbose",FALSE)

if ((cache) & (!update_cache)) {
toc<-restatapi::get_eurostat_cache(paste0("toc.",mode,".",lang),cache_dir,verbose=verbose)
}
Expand Down Expand Up @@ -177,9 +176,9 @@ get_eurostat_toc<-function(mode="xml",
type<-as.character(unlist(lapply(xml_leafs,xml2::xml_attr,attr="type")))
toc<-cbind(toc,type)
# names(toc)<-c(sub("\\.$","",paste(xml2::xml_name(xml2::xml_children(xml_leafs[1])),sub(".*)","",as.character(xml2::xml_attrs(xml2::xml_children(xml_leafs[1])))),sep="."),perl=TRUE),"type")
keep<-c(paste0("title.",lang),"code","type","lastUpdate","lastModified","dataStart","dataEnd","values",paste0("unit.",lang),paste0("shortDescription.",lang),"metadata.html","metadata.sdmx","downloadLink.tsv","downloadLink.sdmx")
keep<-c(paste0("title.",lang),"code","type","lastUpdate","lastModified","dataStart","dataEnd","values",paste0("unit.",lang),paste0("shortDescription.",lang),"metadata.html","metadata.sdmx","downloadLink.tsv")
toc<-toc[,keep,with=FALSE]
names(toc)<-c("title","code","type","lastUpdate","lastModified","dataStart","dataEnd","values","unit","shortDescription","metadata.html","metadata.sdmx","downloadLink.tsv","downloadLink.sdmx")
names(toc)<-c("title","code","type","lastUpdate","lastModified","dataStart","dataEnd","values","unit","shortDescription","metadata.html","metadata.sdmx","downloadLink.tsv")
}
}
}
Expand Down
3 changes: 1 addition & 2 deletions R/search_eurostat_toc.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
#' \code{shortDescription}\tab The short description of the values for tables in the language provided by the \code{lang} parameterif the \code{type} 'dataset' this column is empty\cr
#' \code{metadata.html}\tab The link to the metadata in html format\cr
#' \code{metadata.sdmx}\tab The link to the metadata in SDMX format\cr
#' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk download facility \cr
#' \code{downloadLink.sdmx}\tab The link to the whole dataset/table in SDMX format in the bulk download facility
#' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk download facility
#' }
#' The value in the \code{code} column can be used as an id in the \code{\link{get_eurostat_data}}, \code{\link{get_eurostat_bulk}}, \code{\link{get_eurostat_raw}} and \code{\link{get_eurostat_dsd}} functions.
#' If there is no hit for the search query, it returns \code{NULL}.
Expand Down
26 changes: 15 additions & 11 deletions inst/tinytest/test_restatapi.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ txt_toc<-get_eurostat_toc(mode="txt")
t2<-system.time({get_eurostat_toc()})[3]
expect_warning(get_eurostat_toc(mode="text")) # 1
if (!is.null(xml_toc)){
expect_equal(ncol(xml_toc),14) # 2
expect_equal(ncol(xml_toc),13) # 2
expect_true(exists("toc.xml.en",envir=restatapi::.restatapi_env)) # 3
if (!is.null(txt_toc)){
expect_equal(ncol(txt_toc),8) # 4
Expand Down Expand Up @@ -110,19 +110,23 @@ if (!is.null(dt1)&is.data.frame(dt1)&!is.null(dt2)&is.data.frame(dt2)){
} else {not_checked<-paste(not_checked,"18-21",sep=",")}

if (!is.null(xml_toc)){
testid3<-xml_toc$code[xml_toc$values==min(xml_toc$values)][1]
if (!is.na(testid3)){
expect_equal(nrow(get_eurostat_raw(testid3,verbose=FALSE)),min(xml_toc$values)) # 22
expect_equal(nrow(get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)),min(xml_toc$values)) # 23
expect_message(bt1<-get_eurostat_bulk("blabla",check_toc=TRUE,verbose=FALSE)) # 24
expect_equal(bt1,NULL) # 25
expect_equal(nrow(get_eurostat_data(testid3,verbose=FALSE)),min(xml_toc$values)) # 26
} else {not_checked<-paste(not_checked,"22-26",sep=",")}
testid3<-xml_toc$code[is.na(xml_toc$values)&is.na(xml_toc$lastUpdate)&is.na(xml_toc$downloadLink.tsv)][1]
# testid3<-xml_toc$code[(xml_toc$shortDescription=="")&is.na(xml_toc$metadata.html)&is.na(xml_toc$metadata.sdmx)][1]
if (!is.na(testid3)){
expect_message(rt1<-get_eurostat_raw(testid3,verbose=FALSE)) # 22
expect_equal(rt1,NULL) # 23
expect_message(rt2<-get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)) # 24
expect_equal(rt2,NULL) # 25
expect_message(bt1<-get_eurostat_bulk("blabla",check_toc=TRUE,verbose=FALSE)) # 26
expect_equal(bt1,NULL) # 27
expect_message(dt3<-get_eurostat_data(testid3,verbose=FALSE)) # 28
expect_equal(dt3,NULL) # 29
}
expect_message(rt1<-get_eurostat_raw(testid3,verbose=FALSE)) # 27
expect_message(rt2<-get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)) # 28
expect_message(dt3<-get_eurostat_data(testid3,verbose=FALSE)) # 29
} else {not_checked<-paste(not_checked,"26-29",sep=",")}
} else {not_checked<-paste(not_checked,"22-29",sep=",")}

rt3<-get_eurostat_raw(testid4,mode="xml",stringsAsFactors=TRUE,keep_flags=TRUE)
bt2<-get_eurostat_data(testid4,keep_flags=TRUE,stringsAsFactors=FALSE)
dt4<-get_eurostat_data(testid4,date_filter=2008,keep_flags=TRUE,stringsAsFactors=FALSE)
Expand All @@ -145,7 +149,7 @@ if (!is.null(bt3)&!is.null(bt4)){
} else {not_checked<-paste(not_checked,"32",sep=",")}
if (!is.null(rt4)&!is.null(rt5)){
expect_true(nrow(rt4)==nrow(rt5)) # 33
expect_true(ncol(rt4)+2==ncol(rt5)) # 34
expect_true(ncol(rt4)+1==ncol(rt5)) # 34
} else {not_checked<-paste(not_checked,"33-34",sep=",")}

#### test of filtering in the get_eurostat_data function
Expand Down

0 comments on commit d0c1657

Please sign in to comment.