Skip to content

Commit

Permalink
correction for check_toc=TRUE
Browse files Browse the repository at this point in the history
  • Loading branch information
mmatyi committed Mar 1, 2024
1 parent a017dc3 commit 90a7f9e
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 26 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: restatapi
Type: Package
Title: Search and Retrieve Data from Eurostat Database
Date: 2024-02-22
Version: 0.22.7
Date: 2024-03-01
Version: 0.22.8
Encoding: UTF-8
Authors@R: c(person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre")),
person("Sebastian", "Weinand", role = "ctb"))
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# restatapi 0.22.8

- correction when `check_toc=TRUE` option is used

# restatapi 0.22.7

- correction for detection of cores to be able to load the package in WebR
Expand Down
2 changes: 1 addition & 1 deletion R/clean_restatapi_cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ clean_restatapi_cache<-function(cache_dir=NULL,verbose=FALSE){
td<-ls(envir=restatapi::.restatapi_env)
td<-td[!(td %in% c("cfg","rav","cc","dmethod"))]
rm(list=td,envir=restatapi::.restatapi_env)
if (verbose){message("\nclean_restatapi_cache - All objects except from 'cfg', 'rav', 'cc' and 'dmethod' are removed from '.restatapi_env'.")}
if (verbose){message("\nclean_restatapi_cache - All objects (outside of 'cfg', 'rav', 'cc' and 'dmethod') are removed from '.restatapi_env'.")}
}
if (!is.null(cache_dir)){
if (dir.exists(cache_dir)){
Expand Down
12 changes: 6 additions & 6 deletions R/get_eurostat_bulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ get_eurostat_bulk <- function(id,
message("The TOC is missing. Could not get the download link.")
tbc<-FALSE
} else {
if (any(grepl(id,toc$code,ignore.case=TRUE))){
udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)]
if (id %in% toc$code){
udate<-toc$lastUpdate[toc$code %in% id]
if (verbose) {message("get_eurostat_bulk - TOC rows: ",nrow(toc),
"\nget_eurostat_bulk - bulk url: ",toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)],
"\nget_eurostat_bulk - ndata rowcount in TOC: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])}
"\nget_eurostat_bulk - tsv bulk url from TOC: ",toc$downloadLink.tsv[toc$code %in% id],
"\nget_eurostat_bulk - ndata rowcount in TOC: ",toc$values[toc$code %in% id])}
} else {
message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled."))
tbc<-FALSE
Expand All @@ -150,15 +150,15 @@ get_eurostat_bulk <- function(id,
if ((!cache)|is.null(restat_bulk)|(update_cache)){
if (verbose) {message("get_eurostat_bulk - class of id, cache, update_cache, cache_dir, compress_file, stringsAsFactors, keep_flags, check_toc, melt, verbose:\n", class(id)," - ",class(cache)," -",class(update_cache),
" - ",class(cache_dir)," - ",class(compress_file)," - ",class(stringsAsFactors)," - ",class(keep_flags),
" - ",class(check_toc)," - ",class(melt)," - ",class(verbose))}
" - ",class(check_toc)," - ",class(TRUE)," - ",class(verbose))}
restat_bulk<-restatapi::get_eurostat_raw(id,"txt",cache,update_cache,cache_dir,compress_file,stringsAsFactors,keep_flags,check_toc,melt=TRUE,verbose=verbose)
}
}

if (!is.null(restat_bulk)){
restat_bulk[]
drop<-NULL
if ("freq" %in% colnames(restat_bulk)) {setnames(restat_bulk,"freq","FREQ")}
if ("freq" %in% colnames(restat_bulk)) {data.table::setnames(restat_bulk,"freq","FREQ")}
if ("FREQ" %in% colnames(restat_bulk)) {drop=c("FREQ")}
if ("TIME_FORMAT" %in% colnames(restat_bulk)) {drop<-c(drop,"TIME_FORMAT")}
if (is.null(select_freq)){
Expand Down
6 changes: 3 additions & 3 deletions R/get_eurostat_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,9 @@ get_eurostat_data <- function(id,
message("The TOC is missing. Could not get the download link.")
tbc<-FALSE
} else {
if (any(grepl(id,toc$code,ignore.case=TRUE))){
udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)]
if (verbose) {message("get_eurostat_data - data TOC rows: ",nrow(toc),"\nbulk url: ",toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)],"\ndata rowcount: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])}
if (id %in% toc$code){
udate<-toc$lastUpdate[toc$code %in% id]
if (verbose) {message("get_eurostat_data - data TOC rows: ",nrow(toc),"\n\tbulk url from TOC: ",toc$downloadLink.tsv[toc$code %in% id],"\n\tdata rowcount in TOC: ",toc$values[toc$code %in% id])}
} else {
message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled."))
tbc<-FALSE
Expand Down
27 changes: 16 additions & 11 deletions R/get_eurostat_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,16 @@ get_eurostat_raw <- function(id,
message("The TOC is missing. Could not get the download link.")
tbc<-FALSE
} else {
if (any(grepl(id,toc$code,ignore.case=TRUE))){
udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)]
if (id %in% toc$code){
udate<-toc$lastUpdate[toc$code %in% id]
if (mode=="txt") {
bulk_url<-toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)]
bulk_url_base<-eval(parse(text=paste0("cfg$BULK_BASE_URL$'",rav,"'$ESTAT")))
bulk_url_end<- switch(rav,"1" = paste0("?file=data/",id,".tsv.gz"),"2"= paste0(id,"?format=TSV&compressed=true"))
bulk_url<-paste0(bulk_url_base,bulk_url_end)
} else if (mode=="xml") {
bulk_url<-toc$downloadLink.sdmx[grepl(id,toc$code,ignore.case=TRUE)]
bulk_url_base<-eval(parse(text=paste0("cfg$BULK_BASE_URL$'",rav,"'$ESTAT")))
bulk_url_end<- switch(rav,"1" = paste0("?file=data/",id,".sdmx.zip"),"2"= paste0(id,"?format=sdmx_2.1_structured&compressed=true"))
bulk_url<-paste0(bulk_url_base,bulk_url_end)
} else {
message("Incorrect mode:",mode,"\n It should be either 'txt' or 'xml'." )
tbc<-FALSE
Expand All @@ -147,8 +151,9 @@ get_eurostat_raw <- function(id,
tbc<-FALSE
}
if (verbose) {message("get_eurostat_raw - raws of TOC: ",nrow(toc),
"\nget_eurostat_raw - bulk url: ",bulk_url,
"\nget_eurostat_raw - data rowcount in TOC: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])}
"\nget_eurostat_raw - txt bulk url from TOC:",toc$downloadLink.tsv[toc$code %in% id],
"\nget_eurostat_raw - txt bulk url from cfg:",bulk_url,
"\nget_eurostat_raw - data rowcount in TOC: ",toc$values[toc$code %in% id])}
} else {
message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled."))
tbc<-FALSE
Expand Down Expand Up @@ -278,14 +283,14 @@ get_eurostat_raw <- function(id,
rm(raw)
data.table::setnames(raw_melted,2:3,c(rname,"values"))
raw_melted<-raw_melted[raw_melted$values!=":",]
if (check_toc|rav==1){
FREQ<-gsub("MD","D",gsub('[0-9\\.\\-]',"",raw_melted$time))
FREQ[FREQ==""]<-"A"
}
# if (check_toc|rav==1){
# FREQ<-gsub("MD","D",gsub('[0-9\\.\\-]',"",raw_melted$time))
# FREQ[FREQ==""]<-"A"
# }
restat_raw<-data.table::as.data.table(data.table::tstrsplit(raw_melted$bdown,",",fixed=TRUE),stringsAsFactors=stringsAsFactors)
data.table::setnames(restat_raw,cnames)
restat_raw<-data.table::data.table(restat_raw,raw_melted[,2:3],stringsAsFactors=stringsAsFactors)
if (check_toc|rav==1) {restat_raw<-data.table::data.table(FREQ,restat_raw)}
# if (check_toc|rav==1) {restat_raw<-data.table::data.table(FREQ,restat_raw)}
if (keep_flags) {restat_raw$flags<-gsub('[0-9\\.\\-\\s\\:]',"",restat_raw$values,perl=TRUE)}
restat_raw$values<-gsub('^\\:$',"",restat_raw$values,perl=TRUE)
restat_raw$values<-gsub('[^0-9\\.\\-\\:]',"",restat_raw$values,perl=TRUE)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Next to the functions the package contains a list of country codes for different
options(restatapi_cores=3)
get_eurostat_toc()
options(restatapi_dmethod="libcurl")
get_get_eurostat_toc(mode="txt",verbose=TRUE)
get_eurostat_toc(mode="txt",verbose=TRUE)
search_eurostat_toc("energie",lang="de",ignore.case=TRUE)
```

Expand Down
4 changes: 2 additions & 2 deletions inst/tinytest/test_restatapi.R
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ if (grepl("\\.amzn|-aws|5.4.109+|-azure ",Sys.info()['release'])) {
if (!is.null(bulk1)&!is.null(bulk2)){
kc<-colnames(bulk1)
bulk1<-bulk1[,..kc]
bulk1<-bulk2[,..kc]
bulk2<-bulk2[,..kc]
data.table::setorder(bulk1)
data.table::setorder(bulk2)
expect_true(identical(bulk1,bulk2)) # a44
Expand Down Expand Up @@ -584,7 +584,7 @@ if (grepl("\\.amzn|-aws|5.4.109+|-azure ",Sys.info()['release'])) {
expect_equal(nrow(estat_data4),nrow(bulk2)) # a79
expect_true(nrow(raw4)>nrow(estat_data4)) # a80
} else {not_checked<-paste(not_checked,"a50-a80",sep=",")}

if (tolower(testid1) %in% xml_toc$code) expect_true(!is.null(get_eurostat_data(testid1,update_cache=TRUE,check_toc=TRUE))) #a81

}

Expand Down

0 comments on commit 90a7f9e

Please sign in to comment.