the new API is the default

eurostat · Feb 24, 2023 · a195920 · a195920
1 parent 130b36f
commit a195920
Show file tree

Hide file tree

Showing 8 changed files with 86 additions and 65 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: restatapi
 Type: Package
 Title: Search and Retrieve Data from Eurostat Database
-Date: 2023-02-23
-Version: 0.20.2
+Date: 2023-02-24
+Version: 0.20.3
 Encoding: UTF-8
 Authors@R: person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre"))
 Description: Eurostat is the statistical office of the European Union and provides high quality statistics for Europe.

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# restatapi 0.20.3
+
+- the new API is the default one
+- adjusting tests and documentation for the new API
+- temporarily disabled parallel processing under Windows
+
+
 # restatapi 0.20.2
 
 - adding additional debug messages when the `option(restatapi_verbose=TRUE)` used for the `extract_data()`, `extract_dsd()`, `get_compressed_sdmx()`,`get_eurostat_dsd()`, `get_eurostat_raw()`, `get_eurostat_bulk()`, `get_eurostat_data()`and `get_eurostat_toc()` functions

diff --git a/R/get_eurostat_dsd.R b/R/get_eurostat_dsd.R
@@ -141,60 +141,63 @@ get_eurostat_dsd <- function(id,
         }else{
           dsd<-data.frame(do.call(rbind,parallel::mclapply(concepts,restatapi::extract_dsd,dsd_xml=dsd_xml,lang=lang,mc.cores=getOption("restatapi_cores",1L))),stringsAsFactors=FALSE)
         }  
-        names(dsd)<-c("concept","code","name")
+        if (verbose) {message("get_eurostat_dsd - DSD NULL:",is.null(dsd))}
+        if (!is.null(dsd)) {names(dsd)<-c("concept","code","name")}
 
   #get content constraint (cc)
 
-        cc_endpoint <- paste0(eval(parse(text=paste0("cfg$QUERY_BASE_URL$'",rav,"'$ESTAT$metadata$'2.1'$contentconstraint"))),"/", 
-                               eval(parse(text=paste0("cfg$QUERY_PRIOR_ID$'",rav,"'$ESTAT$metadata"))),id)
-        temp<-tempfile()
-        if (verbose) {
-          message("get_eurostat_dsd - Trying to download the CC from: ",cc_endpoint)
-          tryCatch({utils::download.file(cc_endpoint,temp,dmethod)},
-                   error = function(e) {
-                     message("get_eurostat_dsd - Error by the download of the CC file:",'\n',paste(unlist(e),collapse="\n"))
-                   },
-                   warning = function(w) {
-                     message("get_eurostat_dsd - Warning by the download of the CC file:",'\n',paste(unlist(w),collapse="\n"))
-                   })
-          if (file.size(temp)!=0) {
-            message("Trying to extract the CC from: ",temp)
-            tryCatch({cc_xml<-xml2::read_xml(temp)},
+        if (!is.null(dsd)){
+          cc_endpoint <- paste0(eval(parse(text=paste0("cfg$QUERY_BASE_URL$'",rav,"'$ESTAT$metadata$'2.1'$contentconstraint"))),"/", 
+                                eval(parse(text=paste0("cfg$QUERY_PRIOR_ID$'",rav,"'$ESTAT$metadata"))),id)
+          temp<-tempfile()
+          if (verbose) {
+            message("get_eurostat_dsd - Trying to download the CC from: ",cc_endpoint)
+            tryCatch({utils::download.file(cc_endpoint,temp,dmethod)},
                      error = function(e) {
-                       message("get_eurostat_dsd - Error during the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(e),collapse="\n"))
-                       cc_xml<-NULL
+                       message("get_eurostat_dsd - Error by the download of the CC file:",'\n',paste(unlist(e),collapse="\n"))
                      },
                      warning = function(w) {
-                       message("get_eurostat_dsd - There is warning by the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(w),collapse="\n"))
+                       message("get_eurostat_dsd - Warning by the download of the CC file:",'\n',paste(unlist(w),collapse="\n"))
                      })
+            if (file.size(temp)!=0) {
+              message("Trying to extract the CC from: ",temp)
+              tryCatch({cc_xml<-xml2::read_xml(temp)},
+                       error = function(e) {
+                         message("get_eurostat_dsd - Error during the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(e),collapse="\n"))
+                         cc_xml<-NULL
+                       },
+                       warning = function(w) {
+                         message("get_eurostat_dsd - There is warning by the extraction of the XML from the downloaded CC file:",'\n',paste(unlist(w),collapse="\n"))
+                       })
+            } else {
+              cc_xml<-NULL
+            }
           } else {
-            cc_xml<-NULL
-          }
-        } else {
-          tryCatch({utils::download.file(cc_endpoint,temp,dmethod,quiet=TRUE)},
-                   error = function(e) {
-                   },
-                   warning = function(w) {
-                   })
-          if (file.size(temp)!=0) {
-            tryCatch({cc_xml<-xml2::read_xml(temp)},
+            tryCatch({utils::download.file(cc_endpoint,temp,dmethod,quiet=TRUE)},
                      error = function(e) {
-                       cc_xml<-NULL
                      },
                      warning = function(w) {
                      })
-          } else {
-            cc_xml<-NULL
+            if (file.size(temp)!=0) {
+              tryCatch({cc_xml<-xml2::read_xml(temp)},
+                       error = function(e) {
+                         cc_xml<-NULL
+                       },
+                       warning = function(w) {
+                       })
+            } else {
+              cc_xml<-NULL
+            }
           }
+          unlink(temp)
+          if (!is.null(cc_xml)){
+            cconcepts<-xml2::xml_attr(xml2::xml_find_all(cc_xml,"//c:KeyValue"),"id")
+            if (verbose) {message(class(cconcepts),"\nnumber of nodes: ",length(cconcepts),"\nnumber of cores: ",getOption("restatapi_cores",1L),"\n")}
+          }
+
+          ft_dsd<-data.frame(do.call(rbind,lapply(cconcepts,filter_dsd,cc_xml=cc_xml, dsd=dsd)),stringsAsFactors=FALSE)
+          dsd<-ft_dsd
         }
-        unlink(temp)
-        if (!is.null(cc_xml)){
-          cconcepts<-xml2::xml_attr(xml2::xml_find_all(cc_xml,"//c:KeyValue"),"id")
-          if (verbose) {message(class(cconcepts),"\nnumber of nodes: ",length(cconcepts),"\nnumber of cores: ",getOption("restatapi_cores",1L),"\n")}
-        }
-
-        ft_dsd<-data.frame(do.call(rbind,lapply(cconcepts,filter_dsd,cc_xml=cc_xml, dsd=dsd)),stringsAsFactors=FALSE)
-        dsd<-ft_dsd
 
         if (cache){
           pl<-restatapi::put_eurostat_cache(dsd,paste0(id,".dsd"),update_cache,cache_dir,compress_file)

diff --git a/R/get_eurostat_toc.R b/R/get_eurostat_toc.R
@@ -65,7 +65,7 @@ get_eurostat_toc<-function(mode="xml",
                            verbose=FALSE,...) {
   toc<-xml_leafs<-NULL
   tbc<-TRUE
-  if (verbose)  {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env))}
+  if (verbose)  {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
   if((!exists(".restatapi_env")|(length(list(...))>0))){
     if ((length(list(...))>0)) {
       if (all(names(list(...)) %in% c("api_version","load_toc","parallel","max_cores","verbose"))){
@@ -77,7 +77,7 @@ get_eurostat_toc<-function(mode="xml",
       load_cfg()
     }  
   }
-  if (verbose)  {message("get_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env))}
+  if (verbose)  {message("get_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
   update_cache<-update_cache|getOption("restatapi_update",FALSE)
   dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
   if(any(grepl("get_eurostat_bulk|get_eurostat_data|get_eurostat_raw",as.character(sys.calls()),perl=TRUE))) {update_cache<-FALSE}

diff --git a/R/load_cfg.R b/R/load_cfg.R
@@ -109,8 +109,11 @@ load_cfg<-function(api_version="default",cfg_file="github",load_toc=FALSE,parall
                                       Linux={tryCatch({as.numeric(system("awk '/MemTotal/ {print $2}' /proc/meminfo",intern=TRUE,ignore.stderr=TRUE))/1024},error=function(e){0},warning=function(w){0})}
     ))
     if (is.null(mem_size)|length(mem_size)==0){mem_size<-0}
+    # if (Sys.info()[['sysname']]=='Windows'){parallel<-FALSE}
     if (parallel) {
-      if (max_cores){
+      if (Sys.info()[['sysname']]=='Windows'){
+        options(restatapi_cores=1)
+      } else if (max_cores){
         options(restatapi_cores=parallel::detectCores()-1)
       } else {
         if (max(getOption("mc.cores"),Sys.getenv("MC_CORES"))>0){

diff --git a/README.md b/README.md
@@ -11,10 +11,13 @@ status](https://github.com/eurostat/restatapi/workflows/R-CMD-check/badge.svg)](
 [![all downloads](https://cranlogs.r-pkg.org/badges/grand-total/restatapi)](https://mmatyi.github.io/restatapi_logs/)
 <!-- badges: end -->
 
-# IMPORTANT changes with the new API
+# <span style="color:red">IMPORTANT changes with the new API</span>
 
-Version 0.20.0 enables all the functionality for the [new dissemination chain](https://wikis.ec.europa.eu/display/EUROSTATHELP/Developer%27s+corner). It has breaking changes concerning the `date_filter` as in the old dissemination the value was assigned to *the first day* of the month, quarter and year so it was enough to filter for one day to get the value. Under the new API the value belongs to the full period. If a date range does not cover the whole period no values will be returned. E.g. to get the value of the whole quarter the date filter should start at least on the first date of the quarter and end at least on the last day of the quarter. With concrete numbers to get the value for 2022/Q3, the `startDate` should be 2022-07-01 or earlier and the `endDate` 2022-09-30 or later. In the old version it was enough if the period included the day 2022-07-01 only. 
+Version 0.20.0 enables all the functionality for the [new dissemination chain](https://wikis.ec.europa.eu/display/EUROSTATHELP/Developer%27s+corner) and from version 0.20.3 it is the default API.
 
+The new API has **breaking changes** concerning the `date_filter`, as in the old dissemination the value was assigned to *the first day* of the month, quarter and year so it was enough to filter for one day to get the value. Under the new API the value belongs to the full period. If a date range does not cover the whole period no values will be returned. E.g. to get the value of the whole quarter the date filter should start at least on the first date of the quarter and end at least on the last day of the quarter. With concrete numbers to get the value for 2022/Q3, the `startDate` should be 2022-07-01 or earlier and the `endDate` 2022-09-30 or later. In the old version it was enough if the period included the day 2022-07-01 only. 
+
+In addition, if the date filter is only one day (e.g. `startDate=2007-07-02&endDate=2007-07-02`) then the new API will give you back the values for all the time periods in the dataset applying the filter for the other concepts. But if the time period changes to more than one day (e.g. `startDate=2007-07-01&endDate=2007-07-02`) then the new API gives back only those values which are covered by the range.  
 
 # restatapi
 An R package to search and retrieve data from Eurostat database using SDMX  
@@ -99,9 +102,9 @@ options(restatapi_update=TRUE)
 options(restatapi_cache_dir=file.path(tempdir(),"restatapi"))
 ```
 
-**Example 6:** First download the annual (`select_freq="A"`) air passenger transport data for the main airports of Montenegro (`avia_par_me`) and do not cache any of the data (`cache=FALSE`). Then from the same table download the monthly (`select_freq="M"`) and quarterly (`filters="Q...`) data for 2 specific airport pairs/routes (`filters=...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK"`) in August 2016 and on 1 July 2017 (`date_filter=c("2016-08","2017-07-01")`). The filters are provided in the format how it is required by the [REST SDMX web service](https://wikis.ec.europa.eu/pages/viewpage.action?pageId=44165555).
-Then download again the monthly and quarterly data (`filters=c("Quarterly","Monthly")`) where there is exact match in the DSD for "HU" for August 2016 and 1 March 2014 (`date_filter=c("2016-08","2014-03-01")`). This query will provide only monthly data for 2016, as the quarterly data is always assigned to the first month of the quarter and there is no data for 2014. Since there is no exact match for the "HU" pattern, it will return all the monthly data for August 2016 and put the labels (like the name of the airports and units) so the data can be easier understood (`label=TRUE`). 
-Finally, download only the quarterly data (`select_freq="Q"`) for several time periods (`date_filter=c("2017-03",2016,"2017-07-01",2012:2014)`, the order of the dates does not matter) where the "HU" pattern can be found anywhere, but only in the `code` column of the DSD (`filters="HU",exact_match=FALSE,name=FALSE`). The result will be all the statistics about flights from Montenegro to Hungary in the 3rd quarter of 2017, as there is no information for the other time periods. 
+**Example 6:** First download the annual (`select_freq="A"`) air passenger transport data for the main airports of Montenegro (`avia_par_me`) and do not cache any of the data (`cache=FALSE`). Then from the same table download the monthly (`select_freq="M"`) and quarterly (`filters="Q...`) data for 2 specific airport pairs/routes (`filters=...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK"`) in August 2016 and on 1 July 2017 (`date_filter=c("2016-08","2017-07-01")`). The filters are provided in the format how it is required by the [REST SDMX web service](https://wikis.ec.europa.eu/pages/viewpage.action?pageId=44165555). Under the old API it returned the value for the selected routes for the month August 2016, July 2017 and the 3rd quarter of 2017. Meanwhile under the new API it returns all the quarterly and monthly value as there is a single day in the `date_filter`.
+Then download again the monthly and quarterly data (`filters=c("Quarterly","Monthly")`) where there is exact match in the DSD for "HU" for August 2016 and 1 March 2014 (`date_filter=c("2016-08","2014-03-01")`). This query will provide only monthly data for 2016, as the quarterly data is always assigned to the first month of the quarter and there is no data for 2014. Since there is no exact match for the "HU" pattern, it returned all the monthly data for August 2016 and put the labels (like the name of the airports and units) so the data can be easier understood (`label=TRUE`) under the old API. Under the new API it returns all the quarterly and monthly data as there is a single day in the `date_filter`.
+Finally, download only the quarterly data (`select_freq="Q"`) for several time periods (`date_filter=c("2017-03",2016,"2017-07-01",2012:2014)`, the order of the dates does not matter) where the "HU" pattern can be found anywhere, but only in the `code` column of the DSD (`filters="HU",exact_match=FALSE,name=FALSE`). The result was all the statistics about flights from Montenegro to Hungary in the 3rd quarter of 2017, as there is no information for the other time periods under the old API. Under the new API it will give back all the quarterly data in dataset for flights from Montenegro to Hungary because in the `date_filter` there is a single day. 
 
 ```R
 dt<-get_eurostat_data("avia_par_me",select_freq="A",cache=FALSE)

diff --git a/inst/extdata/rest_api_config.json b/inst/extdata/rest_api_config.json
@@ -12,7 +12,7 @@
     "old": 1,
     "new": 2,
     "test": 3,
-    "current": 1
+    "current": 2
   },
   "REST_VERSION": {
     "1": 2.1,

diff --git a/inst/tinytest/test_restatapi.R b/inst/tinytest/test_restatapi.R
@@ -9,7 +9,12 @@ if (parallel::detectCores()<=2){
   options(restatapi_cores=1)
 }else{
   options(restatapi_cores=2)
-}    
+}  
+
+if (Sys.info()[['sysname']]=='Windows'){
+  options(restatapi_cores=1)
+}
+
 if (capabilities("libcurl")){
   options(restatapi_dmethod="libcurl")
 }
@@ -230,11 +235,11 @@ if (!is.null(dsd1)&is.data.frame(dsd1)){
     message("\n ########--------- 52 test of filtering in the get_eurostat_data function")
     expect_equal(nr5,11)
   } else {no_check<-paste(no_check,"52",sep=", ")} 
-  nr6<-nrow(get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2017-07-01"),select_freq="M",cflags=TRUE))
-#  if (!is.null(nr6)){
-#    message("\n ########--------- 53 test of filtering in the get_eurostat_data function")
-#    expect_equal(nr6,48)
-#  } else {no_check<-paste(no_check,"53",sep=", ")} 
+  nr6<-nrow(get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2017-07-01:2017-09-30"),select_freq="M",cflags=TRUE))
+  if (!is.null(nr6)){
+    message("\n ########--------- 53 test of filtering in the get_eurostat_data function")
+    expect_equal(nr6,96)
+  } else {no_check<-paste(no_check,"53",sep=", ")}
 } else {no_check<-paste(no_check,"47-53",sep=", ")} 
 
 dsd2<-get_eurostat_dsd(testid6)
@@ -397,10 +402,10 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
       message("\n ########--------- 97 additional tests for filtering in the get_eurostat_data function")
       expect_equal(nr9,24)
     } else {no_check<-paste(no_check,"97",sep=", ")}
-    nr10<-nrow(get_eurostat_data(testid6,date_filter=c(2016,"2017-03","2017-05","2017-07-01"),select_freq="Q",cflags=TRUE))
+    nr10<-nrow(get_eurostat_data(testid6,date_filter=c(2016,"2017-03","2017-05","2017-07-01:2017-09-30"),select_freq="Q",cflags=TRUE))
     if (!is.null(nr10)){
-    #  message("\n ########--------- 98 additional tests for filtering in the get_eurostat_data function")
-    #  expect_equal(nr10,1232)
+      message("\n ########--------- 98 additional tests for filtering in the get_eurostat_data function")
+      expect_equal(nr10,1232)
     } else {no_check<-paste(no_check,"98",sep=", ")}
     dt5<-get_eurostat_data(testid6,filters="Q...ME_LYPG_HU_LHBP+ME_LYTV_UA_UKKK",date_filter=c("2016-08","2017-07-01"),select_freq="M")
     dt6<-get_eurostat_data(testid6,filters=c("HU","Quarterly","Monthly"),date_filter=c("2016-08","2017-07-01"),stringsAsFactors=FALSE,label=TRUE)
@@ -419,10 +424,10 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
       message("\n ########--------- 102 additional tests for filtering in the get_eurostat_data function")
       expect_false(any(sapply(dt6,is.factor)))
     } else {no_check<-paste(no_check,"102",sep=", ")}
-    dt8<-get_eurostat_data(testid6,filters="BE$",date_filter=c("2017-03",2016,"2017-07-01",2012:2014),select_freq="Q",label=TRUE,verbose=FALSE,name=FALSE)
+    dt8<-get_eurostat_data(testid6,filters="BE$",date_filter=c("2017-03",2016,"2017-07-01:2017-09-30",2012:2014),select_freq="Q",label=TRUE,verbose=FALSE,name=FALSE)
     if (!is.null(dt8)){
-      # message("\n ########--------- 103 additional tests for filtering in the get_eurostat_data function")
-      # expect_true(nrow(dt8)<=5040)
+      message("\n ########--------- 103 additional tests for filtering in the get_eurostat_data function")
+      expect_true(nrow(dt8)<=5040)
       message("\n ########--------- 104 additional tests for filtering in the get_eurostat_data function")
       expect_true(ncol(dt8)<=5)
     } else {no_check<-paste(no_check,"103-104",sep=", ")}
@@ -443,8 +448,8 @@ if (grepl("\\.amzn|-aws|5.4.109+",Sys.info()['release'])) {
   if (!is.null(dsd3)&is.data.frame(dsd3)){
     nr11<-nrow(get_eurostat_data(testid9,filters="Monthly",exact_match=FALSE,date_filter=c("<2018-07-01"),select_freq="A",label=TRUE,name=FALSE))
     if (!is.null(nr11)){
-    #  message("\n ########--------- 107 additional tests for filtering in the get_eurostat_data function")
-    #  expect_equal(nr11,5565)
+      message("\n ########--------- 107 additional tests for filtering in the get_eurostat_data function")
+      expect_equal(nr11,4845)
     } else {no_check<-paste(no_check,"107",sep=", ")}
   } else {no_check<-paste(no_check,"107",sep=", ")}
   dsd4<-get_eurostat_dsd(testid10)