-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 414d883
Showing
42 changed files
with
1,875 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
Package: filecacher | ||
Type: Package | ||
Title: File Cacher | ||
Version: 0.2.3 | ||
Authors@R: | ||
person("Or", "Gadish", email = "orgadish@gmail.com", role = c("aut", "cre", "cph")) | ||
Description: | ||
The main functions in this package are with_cache() and cached_read(). | ||
The former is a simple way to cache an R object into a file on disk, | ||
using 'cachem'. The latter is a wrapper around any standard read function, | ||
but caches both the output and the file list info. If the input file list | ||
info hasn't changed, the cache is used; otherwise, the original files are | ||
re-read. This can save time if the original operation requires reading from | ||
many files, and/or involves lots of processing. | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
Imports: cachem, glue, here, purrr, rlang, vctrs | ||
RoxygenNote: 7.2.3 | ||
Suggests: arrow, data.table, dplyr, fs, readr, testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 | ||
URL: https://github.com/orgadish/filecacher | ||
BugReports: https://github.com/orgadish/filecacher/issues | ||
NeedsCompilation: no | ||
Packaged: 2023-11-09 08:17:04 UTC; orgadish | ||
Author: Or Gadish [aut, cre, cph] | ||
Maintainer: Or Gadish <orgadish@gmail.com> | ||
Repository: CRAN | ||
Date/Publication: 2023-11-09 10:30:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: filecacher authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
ab5e50bc7a3736ac7b611ea2b8663d75 *DESCRIPTION | ||
a699489fbaa4c98f0be448cb32b5ce39 *LICENSE | ||
030e9df7f37ee257cff118182f32630b *NAMESPACE | ||
da6460df974ce8421fcc06a1bcca6a18 *NEWS.md | ||
3ed23ba66095a20862982211659b34b9 *R/cached_read.R | ||
db56f626aaf2ceb024cc59d2fb749043 *R/file_cache.R | ||
7cd4505a59b22a915f14ec3bd64fff52 *R/interpret_type.R | ||
57f8c3d43fca95fdb6eba3265ddde21f *R/util-dfs_equal.R | ||
47ba852b5c44ae23c92ab08a6fb43ed7 *R/util-fns_equal.R | ||
8e0b2dc83dd19e55a19f75d89c11e2dd *R/util-get_file_info.R | ||
370946680ccdc94145b2051f41eacd6d *R/vectorize_reader.R | ||
de2b0df4045c92ae0afc83c555991e67 *R/with_cache.R | ||
1e871536b2d1ba54e17007186c3a29e5 *README.md | ||
071c2fc2ab835760ab14c7ec44ab8d85 *inst/WORDLIST | ||
6087ff9f569743b5b84d6e0114aea7e5 *inst/examples/cache.R | ||
8b028e4421ceec1db98e204aba5adef2 *inst/examples/cached_read.R | ||
0be50cdfc61e4711032a55aaef73fa01 *inst/examples/vectorize_reader.R | ||
fa33100515046319623e73627fb82a50 *inst/extdata/create_extdata_from_iris.R | ||
d100dbed6c5ce6f74e0ba16127fa7e16 *inst/extdata/iris_complete.csv | ||
97a71b453f64dee5d29a4de436bbd739 *inst/extdata/iris_setosa_only.csv | ||
892af90bb2a229c30e8140658e2fc1d4 *inst/extdata/iris_versicolor_only.csv | ||
904ae0155a55b8618059a144f9e7e525 *inst/extdata/iris_virginica_only.csv | ||
a910434f5b1f1ededf32aea20efa7195 *man/cached_read.Rd | ||
f1f3a5d5d050fee4013f1a277d8baefd *man/dfs_equal.Rd | ||
abcc22cc06204a12f6067331f6711798 *man/file_cache.Rd | ||
24db7f85a969168ff6c54e66fe88cb60 *man/fns_equal.Rd | ||
ce98425ac564644c7590657217df6da2 *man/get_csv_fns.Rd | ||
1cc345250e7fa7d4cb305749701754bb *man/get_csv_read_fn.Rd | ||
d0f2ad5d3f676040e055272dfab3d664 *man/get_file_info.Rd | ||
48108a8b173d37ed634da43bd2f2947b *man/interpret_cache_type.Rd | ||
6bddb3b8d0d7c02d9c1db5da895d8d2c *man/vectorize_reader.Rd | ||
17f9bab2ff57b42f977e0dc72c5ed760 *man/with_cache.Rd | ||
7347c3a61edadfd418402153d613c76c *tests/testthat.R | ||
a943b01dbddfcd0b5067767b3d47ed6a *tests/testthat/test-cached_read.R | ||
e5321ba8ccb06d3fe9e9f0f720adfb5b *tests/testthat/test-file_cache.R | ||
f1169a22866b9364f64c0f42d71f277b *tests/testthat/test-interpret_type.R | ||
9b7423ed0fa99644850bdd05f0e3f6fc *tests/testthat/test-util-dfs_equal.R | ||
3d1ff45ccb9389492678f3a49eba2c34 *tests/testthat/test-util-fns_equal.R | ||
e11e606c90be9f3fb6d63a0ea0b60593 *tests/testthat/test-util-get_file_info.R | ||
14d1411a2e03dc3e5846f8cec10db5ae *tests/testthat/test-vectorize_reader.R | ||
5d2464789c4ea4ba83932c3125686316 *tests/testthat/test-with_cache.R |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(cached_read) | ||
export(cached_read_csv) | ||
export(file_cache) | ||
export(vectorize_reader) | ||
export(with_cache) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# filecacher 0.2.3 | ||
|
||
* Initial CRAN submission. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#' Read files via cache of file list and contents | ||
#' | ||
#' @description | ||
#' Reads data and save to a local file for easier management and re-reading. | ||
#' | ||
#' By default, also saves the file info to determine whether the cache | ||
#' is valid, or whether the contents need to be updated because the files | ||
#' have been modified. To skip this, or force reading from scratch, set | ||
#' `skip_file_info=TRUE` or `force=TRUE`, respectively. | ||
#' | ||
#' If updating is called for, all the files are re-read. | ||
#' | ||
#' `cached_read_csv()` is a convenience function using a csv read function | ||
#' based on `read_type`. | ||
#' | ||
#' @param files A file or files to read with `read_fn`. | ||
#' @param read_fn A function which takes file(s) as its first parameter and | ||
#' reads them. To use a single-input read function such as | ||
#' `arrow::read_csv_arrow()` with multiple files, use [vectorize_reader()], | ||
#' e.g. `read_fn = vectorize_reader(arrow::read_csv_arrow)`. | ||
#' @param skip_file_info Whether to skip saving and/or checking the file info. | ||
#' Use this when just querying the file system (without opening files) is slow. | ||
#' @inheritParams with_cache | ||
#' | ||
#' @seealso [vectorize_reader()] to convert a single-input read function into a | ||
#' multiple-input function. | ||
#' | ||
#' @return The result of `read_fn(files)`. | ||
#' @export | ||
#' @example inst/examples/cached_read.R | ||
cached_read <- function(files, label, read_fn, | ||
cache = NULL, type = NULL, force = FALSE, | ||
skip_file_info = FALSE) { | ||
.cache <- file_cache(cache = cache, type = type) | ||
|
||
read_with_cache <- function() { | ||
with_cache( | ||
read_fn(files), | ||
label = label, cache = .cache, type = type, force = force | ||
) | ||
} | ||
|
||
# Option 1: If skipping file info, simply call `with_cache`. | ||
if (skip_file_info) { | ||
return(read_with_cache()) | ||
} | ||
|
||
# Option 2: Caching via file info. | ||
file_info_label <- paste0(label, "-file_info") | ||
cached_file_info <- .cache$get(file_info_label) | ||
# cached_file_info = <key_missing> if doesn't exist. | ||
new_file_info <- get_file_info(files) | ||
|
||
if (!dfs_equal(cached_file_info, new_file_info)) { | ||
.cache$set(file_info_label, new_file_info) | ||
} | ||
|
||
read_with_cache() | ||
} | ||
|
||
|
||
#' @rdname cached_read | ||
#' | ||
#' @inheritParams get_csv_read_fn | ||
#' | ||
#' @export | ||
cached_read_csv <- function(files, label, read_type = NULL, | ||
cache = NULL, type = NULL, | ||
skip_file_info = FALSE, force = FALSE) { | ||
cached_read( | ||
files = files, | ||
label = label, | ||
read_fn = get_csv_read_fn(read_type), | ||
cache = cache, | ||
type = type, | ||
skip_file_info = skip_file_info, | ||
force = force | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Gets or creates a `cachem` object for use with other functions. | ||
#' | ||
#' @param cache The path to an existing directory to use for caching. | ||
#' If `NULL` (default) uses the current path, using [here::here()]. | ||
#' | ||
#' For advanced use, also accepts (and passes on) an existing | ||
#' `cachem` object. If so, all other parameters are ignored. | ||
#' @inheritParams interpret_cache_type | ||
#' | ||
#' @return A [cachem::cache_disk()] object. | ||
#' @export | ||
#' | ||
#' @seealso [cachem::cache_disk()] | ||
#' | ||
#' @example inst/examples/cache.R | ||
file_cache <- function(cache = NULL, type = NULL, ext_prefix = "cache_") { | ||
if (inherits(cache, "cachem")) { | ||
return(cache) | ||
} | ||
|
||
if (is.null(cache)) { | ||
cache <- here::here() | ||
} else if (!is.character(cache) || !dir.exists(cache)) { | ||
stop( | ||
"`cache` must be an existing cache or the path to an existing directory." | ||
) | ||
} | ||
|
||
cache_type <- interpret_cache_type(type, ext_prefix = ext_prefix) | ||
cachem::cache_disk( | ||
dir = cache, | ||
max_size = Inf, | ||
read_fn = cache_type$read_fn, | ||
write_fn = cache_type$write_fn, | ||
extension = cache_type$extension | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#' Get the CSV read/write function | ||
#' | ||
#' @description | ||
#' Read functions are vectorized. | ||
#' | ||
#' @param type Type of csv read/write functions to get. | ||
#' If `NULL`, returns the first installed. | ||
#' | ||
#' @return List of read/write functions. | ||
get_csv_fns <- function(type = NULL) { | ||
if (!is.null(type)) vctrs::vec_assert(type, character(), size = 1) | ||
|
||
rw_fns <- list( | ||
"readr" = if (rlang::is_installed("readr")) { | ||
list( | ||
read = \(f) readr::read_csv(f, id = "file_path"), | ||
write = readr::write_csv | ||
) | ||
}, | ||
"arrow" = if (rlang::is_installed("arrow")) { | ||
list( | ||
read = vectorize_reader(arrow::read_csv_arrow, "file_path"), | ||
write = arrow::write_csv_arrow | ||
) | ||
}, | ||
"data.table" = if (rlang::is_installed("data.table")) { | ||
list( | ||
read = vectorize_reader(data.table::fread, "file_path"), | ||
write = data.table::fwrite | ||
) | ||
}, | ||
"base" = list( | ||
read = vectorize_reader(utils::read.csv, "file_path"), | ||
write = utils::write.csv | ||
) | ||
) | ||
|
||
if (is.null(type)) { | ||
installed_fns <- rw_fns[!is.null(rw_fns)] | ||
fn_list <- installed_fns[[1]] | ||
} else if (type %in% names(rw_fns)) { | ||
fn_list <- rw_fns[[type]] | ||
} else { | ||
collapsed_names <- glue::glue_collapse( | ||
glue::glue("'{names(rw_fns)}'"), | ||
sep = ", ", last = ", or " | ||
) | ||
stop(glue::glue( | ||
"`type` must be NULL or one of {collapsed_names}, not '{type}'." | ||
)) | ||
} | ||
|
||
fn_list | ||
} | ||
|
||
|
||
#' Get the first CSV Read function installed | ||
#' | ||
#' @param read_type Type of csv read function to use. One of: | ||
#' * "readr": `readr::read_csv()` | ||
#' * "arrow": `vectorize_reader(arrow::read_csv_arrow)()` | ||
#' * "data.table": `vectorize_reader(data.table::fread)()` | ||
#' * "base": `vectorize_reader(utils::read.csv)()` | ||
#' * `NULL` (default): uses the first installed. | ||
#' | ||
#' @return Function that reads multiple paths to CSVs. | ||
get_csv_read_fn <- function(read_type = NULL) { | ||
if (!is.null(read_type)) vctrs::vec_assert(read_type, character(), size = 1) | ||
|
||
get_csv_fns(read_type)$read | ||
} | ||
|
||
|
||
#' Generate cache parameters from preexisting shorthand types. | ||
#' | ||
#' @param type A string describing the type of cache. | ||
#' Must be `NULL` or one of 'rds', 'parquet', or 'csv'. | ||
#' If `NULL` (default), uses 'rds'. | ||
#' @param ext_prefix The prefix to use with the file extension, | ||
#' e.g. "cache_csv", instead of "csv". | ||
#' | ||
#' @return List of `read_fn`, `write_fn`, and `extension` for use with | ||
#' [cachem::cache_disk()]. | ||
interpret_cache_type <- function(type, ext_prefix = "cache_") { | ||
if (is.null(type)) { | ||
type <- "rds" | ||
} else { | ||
vctrs::vec_assert(type, character(), size = 1) | ||
} | ||
|
||
arrow_is_installed <- rlang::is_installed("arrow") | ||
if (type == "parquet" && !arrow_is_installed) { | ||
stop("The `arrow` package must be installed to use `type='parquet'`.") | ||
} | ||
|
||
|
||
build_ext <- function(ext) { | ||
paste0(".", ext_prefix, ext) | ||
} | ||
|
||
csv_fn_list <- get_csv_fns() | ||
|
||
types <- list( | ||
"rds" = list( | ||
read_fn = NULL, | ||
write_fn = NULL, | ||
extension = build_ext("rds") | ||
), | ||
"parquet" = if (arrow_is_installed) { | ||
list( | ||
read_fn = arrow::read_parquet, | ||
write_fn = arrow::write_parquet, | ||
extension = build_ext("parquet") | ||
) | ||
}, | ||
"csv" = list( | ||
read_fn = csv_fn_list$read, | ||
write_fn = csv_fn_list$write, | ||
extension = build_ext("csv") | ||
) | ||
) | ||
|
||
if (!type %in% names(types)) { | ||
collapsed_names <- glue::glue_collapse( | ||
glue::glue("'{names(types)}'"), | ||
sep = ", ", last = ", or " | ||
) | ||
stop(glue::glue( | ||
"`type` must be NULL or one of {collapsed_names}, not '{type}'." | ||
)) | ||
} | ||
|
||
types[[type]] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#' Compare two data frames (ignoring row order) and ensure they are equal. | ||
#' | ||
#' @description | ||
#' Similar to `dplyr::all_equal(x, y, ignore_row_order=TRUE)`, | ||
#' which is now deprecated. | ||
#' | ||
#' If either argument is not a data.frame it returns `FALSE`, | ||
#' rather than raise an error. | ||
#' | ||
#' @inheritParams base::all.equal | ||
dfs_equal <- function(target, current) { | ||
if (!is.data.frame(target) || !is.data.frame(current)) { | ||
return(FALSE) | ||
} | ||
|
||
df_sort <- function(df) df[do.call(order, df), , drop = FALSE] | ||
|
||
target_df <- df_sort(target) | ||
current_df <- df_sort(current) | ||
|
||
target_names <- names(target_df) | ||
current_names <- names(current_df) | ||
|
||
( | ||
length(target_names) == length(current_names) && | ||
all(target_names == current_names) && | ||
setequal(target_df, current_df) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' Check whether two function objects have the same text definition. | ||
#' | ||
#' @param x First function to compare. | ||
#' @param y Second function to compare. | ||
#' | ||
#' @return Logical | ||
fns_equal <- function(x, y) { | ||
if (!rlang::is_function(x) || !rlang::is_function(y)) { | ||
stop("`x` and `y` must be functions.") | ||
} | ||
|
||
# Replacement for `base::deparse1`, which is only available in R >= 4.0.0. | ||
deparse_1 <- function(z) paste(deparse(z), collapse = " ") | ||
|
||
deparse_1(x) == deparse_1(y) | ||
} |
Oops, something went wrong.