version 0.1.1

cran · Sep 11, 2023 · 6fb873c · 6fb873c
1 parent f3d9db4
commit 6fb873c
Show file tree

Hide file tree

Showing 41 changed files with 1,142 additions and 509 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: censobr
 Title: Download Data from Brazil's Population Census
-Version: 0.1.0
+Version: 0.1.1
 Authors@R: 
     c(person(given="Rafael H. M.", family="Pereira", 
              email="rafa.pereira.br@gmail.com", 
@@ -25,10 +25,10 @@ VignetteBuilder: knitr
 Encoding: UTF-8
 NeedsCompilation: yes
 RoxygenNote: 7.2.3
-Packaged: 2023-09-05 18:46:04 UTC; user
+Packaged: 2023-09-10 21:37:31 UTC; user
 Author: Rafael H. M. Pereira [aut, cre]
     (<https://orcid.org/0000-0003-2125-7465>),
   Ipea - Institute for Applied Economic Research [cph, fnd]
 Maintainer: Rafael H. M. Pereira <rafa.pereira.br@gmail.com>
 Repository: CRAN
-Date/Publication: 2023-09-06 17:50:05 UTC
+Date/Publication: 2023-09-11 12:00:09 UTC
diff --git a/MD5 b/MD5
@@ -1,52 +1,58 @@
-9e1528d98491a1229fa18b51cdbbe75a *DESCRIPTION
+6823d5b910080c24437a5b8d4fb879af *DESCRIPTION
 3e30d3756ddb8c51acc9c524103f5e79 *LICENSE
 951e46f3c22cb623bfec077bbdfa1daf *NAMESPACE
-6e9ebd22224344bbcdebcc706244f72a *NEWS.md
-6072c9d9fe367166bf84887f523e3de2 *R/add_labels_emigration.R
-714e936337c2a47311721677b377290a *R/add_labels_families.R
-714cef6ea97b0d23e8eae02ffc35f397 *R/add_labels_households.R
-085e492fb8c65f21b4dac905440bbad0 *R/add_labels_mortality.R
-460107a764726638e45f28380395e400 *R/add_labels_population.R
-7023dd836734ff428b953874fe2874f0 *R/censobr.R
-0d765e212529b76c56c9d4772aaceae7 *R/censobr_cache.R
-707e04233c86afd39ba09bd6d94d0520 *R/read_emigration.R
-5d97a106754ac92199e87a27c89aa95a *R/read_families.R
-1d4c45499e3b71888704f55206503693 *R/read_households.R
-3352c981817e7e18ba04912ae92529a2 *R/read_mortality.R
-5bc51e4b3510ea44a1f49b6f383a7f44 *R/read_population.R
-290257a55583c92dd6f18678a607ce40 *R/utils.R
-625b4ed8766778b756d4e05b3498ceb4 *README.md
+a3b5b21df2dba6b0ae2b93d3e5ab8928 *NEWS.md
+50ceedf9155831fdb61be13a3ddf635b *R/add_labels_emigration.R
+147ff2b51428da402a1bfcb2dc672a25 *R/add_labels_families.R
+924ee082323f6718913ba9c5b5c6d4df *R/add_labels_households.R
+8f6ecbd7a36a79c209c57d6d540ea50a *R/add_labels_mortality.R
+bfe9f25530e79d908a730036ff54e567 *R/add_labels_population.R
+4f184a43d030cbd1bc69216ab1e3e5d1 *R/censobr.R
+a0ac9afd112c83c4f9926f3457c1ebfe *R/censobr_cache.R
+e24fb284e4ea7ad63c64bfb0c759e77f *R/onLoad.R
+a9150b36582060e1184ef7285e233842 *R/read_emigration.R
+3e39a08a8cd067098de9c5b4cb1ec137 *R/read_families.R
+57762cf5fe24ad69f7a1335132c5c3b6 *R/read_households.R
+957d45a403cc5abc7ce6e8e8fd61beb3 *R/read_mortality.R
+e177c89d81dca5ebea8473a912017a1c *R/read_population.R
+da6c798eef5276dae0a3d2e253c3d970 *R/utils.R
+97bc94616711d782d176f8385a2295b1 *README.md
 6d27e6f80558fd5db0d3119ba75b99d4 *build/vignette.rds
-318421dd44b22229f75f58fe4a0858bc *inst/doc/censobr.R
-9827d783f7348e793e8ae7d40e5d32ac *inst/doc/censobr.Rmd
-6aeafc85f83c99ba5e3edb1f4d7fcc3a *inst/doc/censobr.html
+66a71d25366c1c8055cffc66ff566654 *inst/doc/censobr.R
+82919ff3280a5fd1a1ec06af6620ba88 *inst/doc/censobr.Rmd
+f2de202ba82176e3ff420df0561c9af5 *inst/doc/censobr.html
 f28e5663dfcdd5d7274e1504032cf9f5 *inst/img/vig_output_pop1.png
 1b48e3117c78dc490ca54081b9231ef5 *man/cache_message.Rd
 36c6751d3e0287ef94deb7ed708caa5d *man/censobr.Rd
-8c80c6bd5758433cea8519d237967a1e *man/censobr_cache.Rd
+10a09fc8d92936859512e4db4c7e231e *man/censobr_cache.Rd
 248aa9b96723be11c43ad41a71ed05c0 *man/download_file.Rd
 b99bafee7c0cc94c8cf6b0f409bc3499 *man/figures/censobr_hexsticker.R
 48219fca1ec818a8f49f1fd7b04efc68 *man/figures/ipea_logo.png
 58bce978de1ff44ddff45c565fe6e4e2 *man/figures/logo.png
 4b2af118a659294ac2cf09051d69970d *man/figures/person_icon.xcf
 0122f64d230ba374dad15f0cf2f413f7 *man/figures/person_icon_white.png
-311fe2b1071e6753e12e4049bf9bf1d4 *man/read_emigration.Rd
-359305937fca359f05a49e177c83e7b7 *man/read_families.Rd
-e3969e8f7dbbf1a7e80b35057c745eee *man/read_households.Rd
-1ee3a11f431d0af0bb219daa91d26846 *man/read_mortality.Rd
-050edfc04e92cc2fb897038b93ebc1de *man/read_population.Rd
+6788763f73b67b41099049d016a4fef2 *man/read_emigration.Rd
+1011daf9c1688b80fc4ff76aef2bf080 *man/read_families.Rd
+7c8c3c6bd49734f0a01856681f5ccf05 *man/read_households.Rd
+578a147a1930aee35303bde9799e486f *man/read_mortality.Rd
+855e14827b5eab0483aa4b2e3a677a9a *man/read_population.Rd
 612385be109e4192883eb7f8aa849c87 *man/roxygen/templates/add_labels.R
 d5d889932073de579e60949cd9f841d0 *man/roxygen/templates/as_data_frame.R
 1eee990802cf2b626122a37d1807c6d0 *man/roxygen/templates/cache.R
 ae2ba005e0f39daa9c2dc28b9818ce78 *man/roxygen/templates/columns.R
 d4d9d9cd6cc072a6d02469509b649dd5 *man/roxygen/templates/showProgress.R
 82201c1a3cba4093e9246c6a630100e4 *man/roxygen/templates/year.R
-8a419b0d2f8c6e081ffb7825bbe59a7f *tests/tests_rafa/test_rafa.R
-cc349a47c388a49a388f296f48b3ec52 *tests/testthat.R
-0c76a2bd213e8255722b3b20471201c6 *tests/testthat/test_censobr_cache.R
-ef37b8cf6cd856877153b8050694d02f *tests/testthat/test_read_emigration.R
-90276ef02d7923225248d23f2fd5f1e3 *tests/testthat/test_read_families.R
-167fd72ec96af6088abe3d4c25f5e3ed *tests/testthat/test_read_households.R
-447e2c51e641fd2a89b323ff92d12517 *tests/testthat/test_read_mortality.R
-fdbb4006ba53267b8b1a0bc1d7de6a66 *tests/testthat/test_read_population.R
-9827d783f7348e793e8ae7d40e5d32ac *vignettes/censobr.Rmd
+875d97378c8f68d41ff3928c5e84fa6a *tests/tests_rafa/test_rafa.R
+09e92dd42e558cee7f874b83df70ef78 *tests/testthat.R
+cd6c09779ea978e67ddcd86ced7d4bf6 *tests/testthat/test_labels_emigration.R
+6ec3662a4a95c4f94afe60b7b3b4b8fd *tests/testthat/test_labels_families.R
+72a96831f3ca3cb7965757e876d42978 *tests/testthat/test_labels_households.R
+0d728110290d982cf047e8d297446940 *tests/testthat/test_labels_mortality.R
+8ade068614f808154cb639d2725949b1 *tests/testthat/test_labels_population.R
+854c97124ad9fcb1af57d37000327701 *tests/testthat/test_read_emigration.R
+760dcfe253fede62baaeabc60861861d *tests/testthat/test_read_families.R
+af2e9ce5b357c81251d742f5b5b9f3e8 *tests/testthat/test_read_households.R
+266b6e2dc43188e70ee7293870083f62 *tests/testthat/test_read_mortality.R
+70bd47c116ec4413085c5dc922c34aaa *tests/testthat/test_read_population.R
+166aae9cc26a67c8eab748d96586efc6 *tests/testthat/test_z_censobr_cache.R
+82919ff3280a5fd1a1ec06af6620ba88 *vignettes/censobr.Rmd
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# censobr v0.1.0999 
+
+* Minor changes
+  * Using cache_dir and data_release as global variables. Closes [#13](https://github.com/ipeaGIT/censobr/issues/13)
+  * Running `censobr_cache(delete_file = 'all')`now also remove data from old data releases. Closes [#14](https://github.com/ipeaGIT/censobr/issues/14).
+  * Large improvement in code coverage 
+
+* Changes requested by CRAN team
+  * Changed location of cached data to directory inside tools::R_user_dir("censobr", which = "cache"). 
+  * The package now automatically deletes cached data from previous data releases that might exist from previous versions of the package
+  * Clean cache after intro vignette and testhat checks
+
 # censobr v0.1.0
 
 * Launch of **censobr** v0.1.0 on CRAN https://cran.r-project.org/package=censobr

diff --git a/R/add_labels_emigration.R b/R/add_labels_emigration.R
@@ -1,17 +1,18 @@
 # Add labels to categorical variables of emigration datasets
 #' @keywords internal
 add_labels_emigration <- function(arrw,
-                                   year = parent.frame()$year,
-                                   lang = 'pt'){
+                                  year = parent.frame()$year,
+                                  lang = 'pt'){
 
-  # check languate input
+  # check input
   checkmate::assert_string(lang, pattern = 'pt', na.ok = TRUE)
+  if (year != 2010){stop('Labels for this data are only available for the year 2010')}
 
   # names of columns present in the data
   cols <- names(arrw)
 
   ### YEAR 2010
-  if(year == 2010 & lang == 'pt'){
+  if(year == 2010 & lang == 'pt'){ # nocov start
 
 
     # urban vs rural
@@ -245,7 +246,7 @@ add_labels_emigration <- function(arrw,
         V3061 == '8000998' ~ 'Ignorado',
         V3061 == '8000999' ~ paste0('N\u00e3o sabia pa\u00eds estrangeiro')))
     }
-  }
+  }  # nocov end
 
   return(arrw)
 }
diff --git a/R/add_labels_families.R b/R/add_labels_families.R
@@ -4,13 +4,14 @@ add_labels_families <- function(arrw,
                                 year = parent.frame()$year,
                                 lang = 'pt'){
 
-  # check languate input
+  # check input
   checkmate::assert_string(lang, pattern = 'pt', na.ok = TRUE)
+  if (!(year %in% c(2000, 2010))) {stop('Labels for this data are only available for the years c(2000, 2010)')}
 
   # names of columns present in the data
   cols <- names(arrw)
 
-  if(year == 2000 & lang == 'pt'){
+  if(year == 2000 & lang == 'pt'){ # nocov start
   ### YEAR 2010
 
     # REGIÃO METROPOLITANA
@@ -128,7 +129,7 @@ add_labels_families <- function(arrw,
       CODV4615_7400 == '11' ~ 'Mais de 30 sal\u00e1rios m\u00ednimos',
       CODV4615_7400 == '12' ~ 'Sem rendimento'))
     }
-  }
+  } # nocov end
 
   return(arrw)
 }
diff --git a/R/add_labels_households.R b/R/add_labels_households.R
@@ -4,11 +4,12 @@ add_labels_households <- function(arrw,
                                   year = parent.frame()$year,
                                   lang = 'pt'){
 
-  # check languate input
+  # check input
   checkmate::assert_string(lang, pattern = 'pt', na.ok = TRUE)
+  if (!(year %in% c(2000, 2010))) {stop('Labels for this data are only available for the years c(2000, 2010)')}
 
   # names of columns present in the data
-  cols <- names(arrw)
+  cols <- names(arrw) # nocov start
 
   # ALL YEARS ------------------------------------------------------------------
 
@@ -346,7 +347,7 @@ add_labels_households <- function(arrw,
       arrw <- dplyr::mutate(arrw, dplyr::across(all_of(vars_sim_nao_present),
                                                 ~ if_else(.x == '1', 'Sim', 'N\u00e3o')
                                                 ))
-  }
+  } # nocov end
 
   return(arrw)
 }
diff --git a/R/add_labels_mortality.R b/R/add_labels_mortality.R
@@ -4,14 +4,15 @@ add_labels_mortality <- function(arrw,
                                  year = parent.frame()$year,
                                  lang = 'pt'){
 
-  # check languate input
+  # check input
   checkmate::assert_string(lang, pattern = 'pt', na.ok = TRUE)
+  if (!(year %in% c(2010))) {stop('Labels for this data are only available for the year c(2010)')}
 
   # names of columns present in the data
   cols <- names(arrw)
 
   ### YEAR 2010
-  if(year == 2010 & lang == 'pt'){
+  if(year == 2010 & lang == 'pt'){ # nocov start
     # urban vs rural
     if ('V1006' %in% cols) {
       arrw <- arrw |> mutate(V1006 = case_when(
@@ -57,7 +58,7 @@ add_labels_mortality <- function(arrw,
         V1005 == '7' ~ 'Aglomerado rural (outros)',
         V1005 == '8' ~ paste0('\u00c1rea rural exclusive aglomerado rural')))
     }
-  }
+  } # nocov end
 
   return(arrw)
 }
diff --git a/R/add_labels_population.R b/R/add_labels_population.R
@@ -4,11 +4,12 @@ add_labels_population <- function(arrw,
                                   year = parent.frame()$year,
                                   lang = 'pt'){
 
-  # check languate input
+  # check input
   checkmate::assert_string(lang, pattern = 'pt', na.ok = TRUE)
+  if (!(year %in% c(2010))) {stop('Labels for this data are only available for the year c(2010)')}
 
   # names of columns present in the data
-  cols <- names(arrw)
+  cols <- names(arrw) # nocov start
 
   # ALL YEARS ------------------------------------------------------------------
 
@@ -433,7 +434,7 @@ add_labels_population <- function(arrw,
           V1005 == '8' ~ '\u00c1rea rural exclusive aglomerado rural'))
         }
 
-    }
+    } # nocov end
 
   # YEAR 2000----------------------------------------------------------------
 

diff --git a/R/censobr.R b/R/censobr.R
@@ -15,8 +15,7 @@
 #' @keywords internal
 "_PACKAGE"
 
-
-## quiets concerns of R CMD check re: the .'s that appear in pipelines
+## quiets concerns of R CMD check:
 utils::globalVariables( c('year',
                           'temp_local_file') )
 

diff --git a/R/censobr_cache.R b/R/censobr_cache.R
@@ -10,7 +10,7 @@
 #' @return A message indicating which file exist and/or which ones have been
 #'         deleted from local cache directory.
 #' @export
-#' @family support
+#' @family Cache data
 #' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
 #' # list all files cached
 #' censobr_cache(list_files = TRUE)
@@ -26,13 +26,10 @@ censobr_cache <- function(list_files = TRUE,
   checkmate::assert_character(delete_file, null.ok = TRUE)
 
   # find / create local dir
-  # pkgv <- paste0('censobr_', utils::packageVersion("censobr") )
-  pkgv <- paste0('censobr_', 'v0.1.0' )
-  cache_dir <- tools::R_user_dir(pkgv, which = 'cache')
-  if (!dir.exists(cache_dir)) { dir.create(cache_dir, recursive=TRUE) }
+  if (!dir.exists(censobr_env$cache_dir)) { dir.create(censobr_env$cache_dir, recursive=TRUE) }
 
   # list cached files
-  files <- list.files(cache_dir, full.names = TRUE)
+  files <- list.files(censobr_env$cache_dir, full.names = TRUE)
 
   # if wants to dele file
   # delete_file = "2_families.parquet"
@@ -52,13 +49,24 @@ censobr_cache <- function(list_files = TRUE,
 
     # Delete ALL file
     if (delete_file=='all') {
+
+      # delete files from current release
       unlink(files, recursive = TRUE)
       message(paste0("All files have been removed."))
+
+      ## also delete any files from old data releases
+        # determine old cache
+        dir_above <- dirname(censobr_env$cache_dir)
+        all_cache <- list.files(dir_above, pattern = 'data_release',full.names = TRUE)
+        old_cache <- all_cache[!grepl(censobr_env$data_release, all_cache)]
+        # delete
+        unlink(old_cache, recursive = TRUE)
+
     }
   }
 
   # list cached files
-  files <- list.files(cache_dir, full.names = TRUE)
+  files <- list.files(censobr_env$cache_dir, full.names = TRUE)
 
   # print file names
   if(isTRUE(list_files)){

diff --git a/R/onLoad.R b/R/onLoad.R
@@ -0,0 +1,20 @@
+# package global variables
+censobr_env <- new.env(parent = emptyenv()) # nocov start
+
+.onAttach <- function(libname, pkgname){
+
+  # data release
+  censobr_env$data_release <- 'v0.1.0'
+
+  # local cache dir
+  cache_d <- paste0('censobr/data_release_',censobr_env$data_release)
+  censobr_env$cache_dir <- tools::R_user_dir(cache_d, which = 'cache')
+  # gsub("\\\\", "/", censobr_env$cache_dir)
+
+  ## delete any files from old data releases
+  dir_above <- dirname(censobr_env$cache_dir)
+  all_cache <- list.files(dir_above, pattern = 'data_release',full.names = TRUE)
+  old_cache <- all_cache[!grepl(censobr_env$data_release, all_cache)]
+  unlink(old_cache, recursive = TRUE)
+
+} # nocov end
diff --git a/R/read_emigration.R b/R/read_emigration.R
@@ -13,7 +13,7 @@
 #'
 #' @return An arrow `Dataset` or a `"data.frame"` object.
 #' @export
-#' @family download microdata
+#' @family Microdata
 #' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
 #' # return data as arrow Dataset
 #' df <- read_emigration(year = 2010)
@@ -32,14 +32,15 @@ read_emigration <- function(year = 2010,
   checkmate::assert_numeric(year)
   checkmate::assert_vector(columns, null.ok = TRUE)
   checkmate::assert_logical(as_data_frame)
+  checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)
 
   # data available for the years:
   years <- c(2010)
   if (isFALSE(year %in% years)) { stop(paste0("Error: Data currently only available for the years ",
                                               paste(years), collapse = " "))}
 
   ### Get url
-  file_url <- paste0('https://github.com/ipeaGIT/censobr/releases/download/v0.1.0/',year,'_emigration.parquet')
+  file_url <- paste0("https://github.com/ipeaGIT/censobr/releases/download/", censobr_env$data_release, "/", year, "_emigration.parquet")
 
 
   ### Download

diff --git a/R/read_families.R b/R/read_families.R
@@ -13,7 +13,7 @@
 #'
 #' @return An arrow `Dataset` or a `"data.frame"` object.
 #' @export
-#' @family download microdata
+#' @family Microdata
 #' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true")
 #' # return data as arrow Dataset
 #' df <- read_families(year = 2000)
@@ -29,14 +29,15 @@ read_families <- function(year = 2000,
   checkmate::assert_numeric(year)
   checkmate::assert_vector(columns, null.ok = TRUE)
   checkmate::assert_logical(as_data_frame)
+  checkmate::assert_string(add_labels, pattern = 'pt', null.ok = TRUE)
 
   # data available for the years:
   years <- c(2000)
   if (isFALSE(year %in% years)) { stop(paste0("Error: Data currently only available for the years ",
                                               paste(years), collapse = " "))}
 
   ### Get url
-  file_url <- paste0('https://github.com/ipeaGIT/censobr/releases/download/v0.1.0/',year,'_families.parquet')
+  file_url <- paste0("https://github.com/ipeaGIT/censobr/releases/download/", censobr_env$data_release, "/", year, "_families.parquet")
 
 
   ### Download