/
utils.R
309 lines (291 loc) · 9.51 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#' @importFrom dplyr %>%
#' @export
dplyr::`%>%`
#' Custom CSV reading function
#'
#' @description Checks for use of memoise and then uses vroom::vroom.
#' @param file A URL or filepath to a CSV
#' @param guess_max Maximum number of records to use for guessing column types.
#' Defaults to a 1000.
#' @param ... extra parameters to be passed to vroom::vroom
#' @inheritParams message_verbose
#' @return A data table
#' @importFrom memoise memoise cache_filesystem
#' @importFrom vroom vroom
#' @importFrom dplyr tibble
#' @concept utility
csv_reader <- function(file, verbose = FALSE, guess_max = 1000, ...) {
read_csv_fun <- vroom
if (!is.null(getOption("useMemoise"))) {
if (getOption("useMemoise")) {
ch <- cache_filesystem(getOption("cache_path"))
read_csv_fun <- memoise(vroom, cache = ch)
}
}
if (verbose) {
message("Downloading data from ", file)
data <- read_csv_fun(file, progress = TRUE, ..., guess_max = guess_max)
} else {
data <- suppressWarnings(
suppressMessages(
read_csv_fun(file, progress = FALSE, ..., guess_max = guess_max)
)
)
}
return(tibble(data))
}
#' Custom JSON reading function
#'
#' @description Checks for use of memoise and then uses vroom::vroom.
#' @param file A URL or filepath to a JSON
#' @param ... extra parameters to be passed to jsonlite::fromJSON
#' @inheritParams message_verbose
#' @return A data table
#' @importFrom dplyr tibble
#' @importFrom jsonlite fromJSON
#' @concept utility
json_reader <- function(file, verbose = FALSE, ...) {
if (verbose) {
message("Downloading data from ", file)
data <- fromJSON(file, ...)
} else {
data <- suppressWarnings(
suppressMessages(
fromJSON(file, ...)
)
)
}
return(tibble(data))
}
#' Wrapper for message
#'
#' @description A wrapper for `message` that only prints output when
#' `verbose = TRUE`.
#' @param verbose Logical, defaults to `TRUE`. Should verbose processing
#' messages and warnings be returned.
#' @param ... Additional arguments passed to `message`.
#' @concept utility
message_verbose <- function(verbose = TRUE, ...) {
if (verbose) {
message(...)
}
return(invisible(NULL))
}
#' Add useMemoise to options
#' @param path Path to cache directory, defaults to a temporary directory.
#' @inheritParams message_verbose
#' @description Adds useMemoise to options meaning memoise is
#' used when reading data in.
#' @concept utility
#' @export
#' @concept utility
start_using_memoise <- function(path = tempdir(), verbose = TRUE) {
message_verbose(verbose, "Using a cache at: ", path)
options("useMemoise" = TRUE, cache_path = path)
}
#' Stop using useMemoise
#'
#' @description Sets useMemoise in options to NULL, meaning memoise isn't used
#' when reading data in
#' @concept utility
#' @export
#' @concept utility
stop_using_memoise <- function() {
if (!is.null(options("useMemoise"))) {
options("useMemoise" = NULL)
}
}
#' Reset Cache and Update all Local Data
#'
#' @return Null
#' @export
#' @importFrom memoise cache_filesystem
#' @concept utility
reset_cache <- function() {
unlink(getOption("cache_path"), recursive = TRUE)
cache_filesystem(getOption("cache_path"))
return(invisible(NULL))
}
#' Control data return
#'
#' @description Controls data return for `get_reigonal_data` and
#' `get_national_data`
#' @param obj A Class based on a `DataClass`
#' @param class Logical, defaults to FALSE. If TRUE returns the
#' `DataClass` object rather than a tibble or a list of tibbles.
#' Overrides `steps`.
#' @concept utility
return_data <- function(obj, class = FALSE) {
if (class) {
return(obj)
} else {
obj <- obj$return()
return(obj)
}
}
#' Control Grouping Variables used in process_internal
#'
#' @description Controls the grouping variables used in
#' `process_internal` based on the supported regions present in the
#' class.
#' @param level A character string indicating the current level.
#' @param all_levels A character vector indicating all the levels supported.
#' @param region_names A named list of region names named after the levels
#' supported.
#' @param region_codes A named list of region codes named after the levels
#' supported.
#' @importFrom purrr map
#' @concept utility
region_dispatch <- function(level, all_levels, region_names, region_codes) {
sel_levels <- all_levels[1:grep(level, all_levels)]
region_vars <- map(sel_levels, function(l) {
rn <- c()
if (!is.null(region_names[[l]])) {
rn <- c(region_names[[l]])
names(rn) <- glue_level(l)
}
rc <- c()
if (!is.null(region_codes[[l]])) {
rc <- c(region_codes[[l]])
names(rc) <- paste0(glue_level(l), "_code")
}
region_vars <- c(rn, rc)
return(region_vars)
})
region_vars <- unlist(region_vars)
region_vars <- region_vars[!is.null(region_vars)]
region_vars <- region_vars[!is.na(region_vars)]
return(region_vars)
}
#' Glue the spatial level into a variable name
#'
#' @inheritParams region_dispatch
#' @return A string in the form "level_1_region".
#' @concept utility
glue_level <- function(level) {
paste0("level_", level, "_region")
}
#' Checks a given level is supported
#' @param supported_levels A character vector of supported levels
#' @inheritParams region_dispatch
#' @return NULL
#' @concept utility
check_level <- function(level, supported_levels) {
if (!any(supported_levels %in% level)) {
stop(
level,
" is not a supported level check supported_levels for options"
)
}
return(invisible(NULL))
}
#' Download Excel Documents
#'
#' @param url Character string containing the full URL to the Excel document.
#' @param archive Character string naming the file name to assign in the
#' temporary directory.
#' @param transpose Logical, should the read in data be transposed
#' @param ... Additional parameters to pass to `read_excel()`.
#' @inheritParams message_verbose
#' @importFrom readxl read_excel
#' @return A `data.frame`.
#' @concept utility
download_excel <- function(url, archive, verbose = FALSE,
transpose = TRUE, ...) {
# download
archive <- file.path(tempdir(), archive)
download.file(
url = url,
destfile = archive,
mode = "wb", quiet = !(verbose)
)
# read in
dt <- suppressMessages(
read_excel(archive, ...)
)
if (transpose) {
dt <- t(dt)
}
dt <- as.data.frame(dt)
return(dt)
}
#' Create github action for a given source
#' @description Makes a github workflow yaml file for a given source to be used
#' as an action to check the data as a github action.
#' @param source character_array The name of the class to create the workflow
#' for.
#' @param workflow_path character_array The path to where the workflow file
#' should be saved. Defaults to '.github/workflows/'
#' @param cron character_array the cron time to run the tests, defaults to
#' 36 12 * * *, following the minute, hour, day(month), month and day(week)
#' format.
#' @concept utility
#' @export
make_github_workflow <- function(source,
workflow_path = paste0(
".github/workflows/", source, ".yaml"
), cron = "36 12 * * *") {
template_path <- system.file(
"github_workflow_template.yaml",
package = "covidregionaldata"
)
template <- readLines(template_path)
newfile <- gsub("_SOURCE_", source, template)
newfile <- gsub("_CRON_", paste0("'", cron, "'"), newfile)
writeLines(newfile, workflow_path)
message(
paste("workflow created for", source, "at", workflow_path)
)
}
#' Create new country class for a given source
#' @description Makes a new regional or national country class with the name
#' provided as the source. This forms a basic template for the user to fill in
#' with the specific field values and cleaning functions required. This also
#' creates a github workflow file for the same country.
#' @param source character_array The name of the class to create. Must start
#' with a capital letter (be upper camel case or an acronym in all caps such as
#' WHO).
#' @param type character_array the type of class to create, subnational or
#' National defaults to subnational. Regional classes are individual countries,
#' such as UK, Italy, India, etc. These inherit from `DataClass`, whilst
#' national classes are sources for multiple countries data, such as JRC, JHU,
#' Google, etc. These inherit from `CountryDataClass`.
#' @param newfile_path character_array the place to save the class file
#' @concept utility
#' @export
make_new_data_source <- function(source, type = "subnational",
newfile_path = paste0("R/", source, ".R")) {
if (!(type %in% c("subnational", "national"))) {
stop(
"type must be 'subnational' or 'national'"
)
}
if (!grepl("^[A-Z]", source)) {
stop(
"New countries should start with a capital letter. E.g. Italy not italy."
)
}
if (file.exists(newfile_path)) {
stop(
paste0(newfile_path, " exists, Will not overwrite. Remove manually.")
)
}
template_path <- system.file(
"CountryTemplate.R",
package = "covidregionaldata"
)
template <- readLines(template_path)
newfile <- gsub("CountryTemplate", source, template)
if (type == "national") {
newfile <- gsub("DataClass", "CountryDataClass", newfile)
newfile <- gsub("subnational", "national", newfile)
}
writeLines(newfile, newfile_path)
message(
paste(type, "Class created for", source, "at", newfile_path)
)
make_github_workflow(source)
}
# Hack to work around the fact that `where()` is not exported
# (https://github.com/r-lib/tidyselect/issues/201)
utils::globalVariables("where")