Skip to content

Commit

Permalink
the new API is the default
Browse files Browse the repository at this point in the history
  • Loading branch information
mmatyi committed Feb 24, 2023
1 parent 130b36f commit a195920
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 65 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: restatapi
Type: Package
Title: Search and Retrieve Data from Eurostat Database
Date: 2023-02-23
Version: 0.20.2
Date: 2023-02-24
Version: 0.20.3
Encoding: UTF-8
Authors@R: person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre"))
Description: Eurostat is the statistical office of the European Union and provides high quality statistics for Europe.
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# restatapi 0.20.3

- the new API is the default one
- adjusting tests and documentation for the new API
- temporarily disabled parallel processing under Windows


# restatapi 0.20.2

- adding additional debug messages when the `option(restatapi_verbose=TRUE)` used for the `extract_data()`, `extract_dsd()`, `get_compressed_sdmx()`,`get_eurostat_dsd()`, `get_eurostat_raw()`, `get_eurostat_bulk()`, `get_eurostat_data()`and `get_eurostat_toc()` functions
Expand Down
83 changes: 43 additions & 40 deletions R/get_eurostat_dsd.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,60 +141,63 @@ get_eurostat_dsd <- function(id,
}else{
dsd<-data.frame(do.call(rbind,parallel::mclapply(concepts,restatapi::extract_dsd,dsd_xml=dsd_xml,lang=lang,mc.cores=getOption("restatapi_cores",1L))),stringsAsFactors=FALSE)
}
names(dsd)<-c("concept","code","name")
if (verbose) {message("get_eurostat_dsd - DSD NULL:",is.null(dsd))}
if (!is.null(dsd)) {names(dsd)<-c("concept","code","name")}

#get content constraint (cc)

cc_endpoint <- paste0(eval(parse(text=paste0("cfg$QUERY_BASE_URL$'",rav,"'$ESTAT$metadata$'2.1'$contentconstraint"))),"/",
eval(parse(text=paste0("cfg$QUERY_PRIOR_ID$'",rav,"'$ESTAT$metadata"))),id)
temp<-tempfile()
if (verbose) {
message("get_eurostat_dsd - Trying to download the CC from: ",cc_endpoint)
tryCatch({utils::download.file(cc_endpoint,temp,dmethod)},
error = function(e) {
message("get_eurostat_dsd - Error by the download of the CC file:",'\n',paste(unlist(e),collapse="\n"))
},
warning = function(w) {
message("get_eurostat_dsd - Warning by the download of the CC file:",'\n',paste(unlist(w),collapse="\n"))
})
if (file.size(temp)!=0) {
message("Trying to extract the CC from: ",temp)
tryCatch({cc_xml<-xml2::read_xml(temp)},
if (!is.null(dsd)){
cc_endpoint <- paste0(eval(parse(text=paste0("cfg$QUERY_BASE_URL$'",rav,"'$ESTAT$metadata$'2.1'$contentconstraint"))),"/",
eval(parse(text=paste0("cfg$QUERY_PRIOR_ID$'",rav,"'$ESTAT$metadata"))),id)
temp<-tempfile()
if (verbose) {
message("get_eurostat_dsd - Trying to download the CC from: ",cc_endpoint)
tryCatch({utils::download.file(cc_endpoint,temp,dmethod)},
error = function(e) {
message("get_eurostat_dsd - Error during the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(e),collapse="\n"))
cc_xml<-NULL
message("get_eurostat_dsd - Error by the download of the CC file:",'\n',paste(unlist(e),collapse="\n"))
},
warning = function(w) {
message("get_eurostat_dsd - There is warning by the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(w),collapse="\n"))
message("get_eurostat_dsd - Warning by the download of the CC file:",'\n',paste(unlist(w),collapse="\n"))
})
if (file.size(temp)!=0) {
message("Trying to extract the CC from: ",temp)
tryCatch({cc_xml<-xml2::read_xml(temp)},
error = function(e) {
message("get_eurostat_dsd - Error during the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(e),collapse="\n"))
cc_xml<-NULL
},
warning = function(w) {
message("get_eurostat_dsd - There is warning by the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(w),collapse="\n"))
})
} else {
cc_xml<-NULL
}
} else {
cc_xml<-NULL
}
} else {
tryCatch({utils::download.file(cc_endpoint,temp,dmethod,quiet=TRUE)},
error = function(e) {
},
warning = function(w) {
})
if (file.size(temp)!=0) {
tryCatch({cc_xml<-xml2::read_xml(temp)},
tryCatch({utils::download.file(cc_endpoint,temp,dmethod,quiet=TRUE)},
error = function(e) {
cc_xml<-NULL
},
warning = function(w) {
})
} else {
cc_xml<-NULL
if (file.size(temp)!=0) {
tryCatch({cc_xml<-xml2::read_xml(temp)},
error = function(e) {
cc_xml<-NULL
},
warning = function(w) {
})
} else {
cc_xml<-NULL
}
}
unlink(temp)
if (!is.null(cc_xml)){
cconcepts<-xml2::xml_attr(xml2::xml_find_all(cc_xml,"//c:KeyValue"),"id")
if (verbose) {message(class(cconcepts),"\nnumber of nodes: ",length(cconcepts),"\nnumber of cores: ",getOption("restatapi_cores",1L),"\n")}
}

ft_dsd<-data.frame(do.call(rbind,lapply(cconcepts,filter_dsd,cc_xml=cc_xml, dsd=dsd)),stringsAsFactors=FALSE)
dsd<-ft_dsd
}
unlink(temp)
if (!is.null(cc_xml)){
cconcepts<-xml2::xml_attr(xml2::xml_find_all(cc_xml,"//c:KeyValue"),"id")
if (verbose) {message(class(cconcepts),"\nnumber of nodes: ",length(cconcepts),"\nnumber of cores: ",getOption("restatapi_cores",1L),"\n")}
}

ft_dsd<-data.frame(do.call(rbind,lapply(cconcepts,filter_dsd,cc_xml=cc_xml, dsd=dsd)),stringsAsFactors=FALSE)
dsd<-ft_dsd

if (cache){
pl<-restatapi::put_eurostat_cache(dsd,paste0(id,".dsd"),update_cache,cache_dir,compress_file)
Expand Down
4 changes: 2 additions & 2 deletions R/get_eurostat_toc.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ get_eurostat_toc<-function(mode="xml",
verbose=FALSE,...) {
toc<-xml_leafs<-NULL
tbc<-TRUE
if (verbose) {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env))}
if (verbose) {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
if((!exists(".restatapi_env")|(length(list(...))>0))){
if ((length(list(...))>0)) {
if (all(names(list(...)) %in% c("api_version","load_toc","parallel","max_cores","verbose"))){
Expand All @@ -77,7 +77,7 @@ get_eurostat_toc<-function(mode="xml",
load_cfg()
}
}
if (verbose) {message("get_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env))}
if (verbose) {message("get_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
update_cache<-update_cache|getOption("restatapi_update",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if(any(grepl("get_eurostat_bulk|get_eurostat_data|get_eurostat_raw",as.character(sys.calls()),perl=TRUE))) {update_cache<-FALSE}
Expand Down
5 changes: 4 additions & 1 deletion R/load_cfg.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,11 @@ load_cfg<-function(api_version="default",cfg_file="github",load_toc=FALSE,parall
Linux={tryCatch({as.numeric(system("awk '/MemTotal/ {print $2}' /proc/meminfo",intern=TRUE,ignore.stderr=TRUE))/1024},error=function(e){0},warning=function(w){0})}
))
if (is.null(mem_size)|length(mem_size)==0){mem_size<-0}
# if (Sys.info()[['sysname']]=='Windows'){parallel<-FALSE}
if (parallel) {
if (max_cores){
if (Sys.info()[['sysname']]=='Windows'){
options(restatapi_cores=1)
} else if (max_cores){
options(restatapi_cores=parallel::detectCores()-1)
} else {
if (max(getOption("mc.cores"),Sys.getenv("MC_CORES"))>0){
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ status](https://github.com/eurostat/restatapi/workflows/R-CMD-check/badge.svg)](
[![all downloads](https://cranlogs.r-pkg.org/badges/grand-total/restatapi)](https://mmatyi.github.io/restatapi_logs/)
<!-- badges: end -->

# IMPORTANT changes with the new API
# <span style="color:red">IMPORTANT changes with the new API</span>

Version 0.20.0 enables all the functionality for the [new dissemination chain](https://wikis.ec.europa.eu/display/EUROSTATHELP/Developer%27s+corner). It has breaking changes concerning the `date_filter` as in the old dissemination the value was assigned to *the first day* of the month, quarter and year so it was enough to filter for one day to get the value. Under the new API the value belongs to the full period. If a date range does not cover the whole period no values will be returned. E.g. to get the value of the whole quarter the date filter should start at least on the first date of the quarter and end at least on the last day of the quarter. With concrete numbers to get the value for 2022/Q3, the `startDate` should be 2022-07-01 or earlier and the `endDate` 2022-09-30 or later. In the old version it was enough if the period included the day 2022-07-01 only.
Version 0.20.0 enables all the functionality for the [new dissemination chain](https://wikis.ec.europa.eu/display/EUROSTATHELP/Developer%27s+corner) and from version 0.20.3 it is the default API.

The new API has **breaking changes** concerning the `date_filter`, as in the old dissemination the value was assigned to *the first day* of the month, quarter and year so it was enough to filter for one day to get the value. Under the new API the value belongs to the full period. If a date range does not cover the whole period no values will be returned. E.g. to get the value of the whole quarter the date filter should start at least on the first date of the quarter and end at least on the last day of the quarter. With concrete numbers to get the value for 2022/Q3, the `startDate` should be 2022-07-01 or earlier and the `endDate` 2022-09-30 or later. In the old version it was enough if the period included the day 2022-07-01 only.

In addition, if the date filter is only one day (e.g. `startDate=2007-07-02&endDate=2007-07-02`) then the new API will give you back the values for all the time periods in the dataset applying the filter for the other concepts. But if the time period changes to more than one day (e.g. `startDate=2007-07-01&endDate=2007-07-02`) then the new API gives back only those values which are covered by the range.

# restatapi
An R package to search and retrieve data from Eurostat database using SDMX
Expand Down Expand Up @@ -99,9 +102,9 @@ options(restatapi_update=TRUE)
options(restatapi_cache_dir=file.path(tempdir(),"restatapi"))
```

**Example 6:** First download the annual (`select_freq="A"`) air passenger transport data for the main airports of Montenegro (`avia_par_me`) and do not cache any of the data (`cache=FALSE`). Then from the same table download the monthly (`select_freq="M"`) and quarterly (`filters="Q...`) data for 2 specific airport pairs/routes (`filters=...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK"`) in August 2016 and on 1 July 2017 (`date_filter=c("2016-08","2017-07-01")`). The filters are provided in the format how it is required by the [REST SDMX web service](https://wikis.ec.europa.eu/pages/viewpage.action?pageId=44165555).
Then download again the monthly and quarterly data (`filters=c("Quarterly","Monthly")`) where there is exact match in the DSD for "HU" for August 2016 and 1 March 2014 (`date_filter=c("2016-08","2014-03-01")`). This query will provide only monthly data for 2016, as the quarterly data is always assigned to the first month of the quarter and there is no data for 2014. Since there is no exact match for the "HU" pattern, it will return all the monthly data for August 2016 and put the labels (like the name of the airports and units) so the data can be easier understood (`label=TRUE`).
Finally, download only the quarterly data (`select_freq="Q"`) for several time periods (`date_filter=c("2017-03",2016,"2017-07-01",2012:2014)`, the order of the dates does not matter) where the "HU" pattern can be found anywhere, but only in the `code` column of the DSD (`filters="HU",exact_match=FALSE,name=FALSE`). The result will be all the statistics about flights from Montenegro to Hungary in the 3rd quarter of 2017, as there is no information for the other time periods.
**Example 6:** First download the annual (`select_freq="A"`) air passenger transport data for the main airports of Montenegro (`avia_par_me`) and do not cache any of the data (`cache=FALSE`). Then from the same table download the monthly (`select_freq="M"`) and quarterly (`filters="Q...`) data for 2 specific airport pairs/routes (`filters=...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK"`) in August 2016 and on 1 July 2017 (`date_filter=c("2016-08","2017-07-01")`). The filters are provided in the format how it is required by the [REST SDMX web service](https://wikis.ec.europa.eu/pages/viewpage.action?pageId=44165555). Under the old API it returned the value for the selected routes for the month August 2016, July 2017 and the 3rd quarter of 2017. Meanwhile under the new API it returns all the quarterly and monthly value as there is a single day in the `date_filter`.
Then download again the monthly and quarterly data (`filters=c("Quarterly","Monthly")`) where there is exact match in the DSD for "HU" for August 2016 and 1 March 2014 (`date_filter=c("2016-08","2014-03-01")`). This query will provide only monthly data for 2016, as the quarterly data is always assigned to the first month of the quarter and there is no data for 2014. Since there is no exact match for the "HU" pattern, it returned all the monthly data for August 2016 and put the labels (like the name of the airports and units) so the data can be easier understood (`label=TRUE`) under the old API. Under the new API it returns all the quarterly and monthly data as there is a single day in the `date_filter`.
Finally, download only the quarterly data (`select_freq="Q"`) for several time periods (`date_filter=c("2017-03",2016,"2017-07-01",2012:2014)`, the order of the dates does not matter) where the "HU" pattern can be found anywhere, but only in the `code` column of the DSD (`filters="HU",exact_match=FALSE,name=FALSE`). The result was all the statistics about flights from Montenegro to Hungary in the 3rd quarter of 2017, as there is no information for the other time periods under the old API. Under the new API it will give back all the quarterly data in dataset for flights from Montenegro to Hungary because in the `date_filter` there is a single day.

```R
dt<-get_eurostat_data("avia_par_me",select_freq="A",cache=FALSE)
Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/rest_api_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"old": 1,
"new": 2,
"test": 3,
"current": 1
"current": 2
},
"REST_VERSION": {
"1": 2.1,
Expand Down
33 changes: 19 additions & 14 deletions inst/tinytest/test_restatapi.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ if (parallel::detectCores()<=2){
options(restatapi_cores=1)
}else{
options(restatapi_cores=2)
}
}

if (Sys.info()[['sysname']]=='Windows'){
options(restatapi_cores=1)
}

if (capabilities("libcurl")){
options(restatapi_dmethod="libcurl")
}
Expand Down Expand Up @@ -230,11 +235,11 @@ if (!is.null(dsd1)&is.data.frame(dsd1)){
message("\n ########--------- 52 test of filtering in the get_eurostat_data function")
expect_equal(nr5,11)
} else {no_check<-paste(no_check,"52",sep=", ")}
nr6<-nrow(get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2017-07-01"),select_freq="M",cflags=TRUE))
# if (!is.null(nr6)){
# message("\n ########--------- 53 test of filtering in the get_eurostat_data function")
# expect_equal(nr6,48)
# } else {no_check<-paste(no_check,"53",sep=", ")}
nr6<-nrow(get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2017-07-01:2017-09-30"),select_freq="M",cflags=TRUE))
if (!is.null(nr6)){
message("\n ########--------- 53 test of filtering in the get_eurostat_data function")
expect_equal(nr6,96)
} else {no_check<-paste(no_check,"53",sep=", ")}
} else {no_check<-paste(no_check,"47-53",sep=", ")}

dsd2<-get_eurostat_dsd(testid6)
Expand Down Expand Up @@ -397,10 +402,10 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
message("\n ########--------- 97 additional tests for filtering in the get_eurostat_data function")
expect_equal(nr9,24)
} else {no_check<-paste(no_check,"97",sep=", ")}
nr10<-nrow(get_eurostat_data(testid6,date_filter=c(2016,"2017-03","2017-05","2017-07-01"),select_freq="Q",cflags=TRUE))
nr10<-nrow(get_eurostat_data(testid6,date_filter=c(2016,"2017-03","2017-05","2017-07-01:2017-09-30"),select_freq="Q",cflags=TRUE))
if (!is.null(nr10)){
# message("\n ########--------- 98 additional tests for filtering in the get_eurostat_data function")
# expect_equal(nr10,1232)
message("\n ########--------- 98 additional tests for filtering in the get_eurostat_data function")
expect_equal(nr10,1232)
} else {no_check<-paste(no_check,"98",sep=", ")}
dt5<-get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2016-08","2017-07-01"),select_freq="M")
dt6<-get_eurostat_data(testid6,filters=c("HU","Quarterly","Monthly"),date_filter=c("2016-08","2017-07-01"),stringsAsFactors=FALSE,label=TRUE)
Expand All @@ -419,10 +424,10 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
message("\n ########--------- 102 additional tests for filtering in the get_eurostat_data function")
expect_false(any(sapply(dt6,is.factor)))
} else {no_check<-paste(no_check,"102",sep=", ")}
dt8<-get_eurostat_data(testid6,filters="BE$",date_filter=c("2017-03",2016,"2017-07-01",2012:2014),select_freq="Q",label=TRUE,verbose=FALSE,name=FALSE)
dt8<-get_eurostat_data(testid6,filters="BE$",date_filter=c("2017-03",2016,"2017-07-01:2017-09-30",2012:2014),select_freq="Q",label=TRUE,verbose=FALSE,name=FALSE)
if (!is.null(dt8)){
# message("\n ########--------- 103 additional tests for filtering in the get_eurostat_data function")
# expect_true(nrow(dt8)<=5040)
message("\n ########--------- 103 additional tests for filtering in the get_eurostat_data function")
expect_true(nrow(dt8)<=5040)
message("\n ########--------- 104 additional tests for filtering in the get_eurostat_data function")
expect_true(ncol(dt8)<=5)
} else {no_check<-paste(no_check,"103-104",sep=", ")}
Expand All @@ -443,8 +448,8 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
if (!is.null(dsd3)&is.data.frame(dsd3)){
nr11<-nrow(get_eurostat_data(testid9,filters="Monthly",exact_match=FALSE,date_filter=c("<2018-07-01"),select_freq="A",label=TRUE,name=FALSE))
if (!is.null(nr11)){
# message("\n ########--------- 107 additional tests for filtering in the get_eurostat_data function")
# expect_equal(nr11,5565)
message("\n ########--------- 107 additional tests for filtering in the get_eurostat_data function")
expect_equal(nr11,4845)
} else {no_check<-paste(no_check,"107",sep=", ")}
} else {no_check<-paste(no_check,"107",sep=", ")}
dsd4<-get_eurostat_dsd(testid10)
Expand Down

0 comments on commit a195920

Please sign in to comment.