Skip to content

Commit

Permalink
remove file path from cached file hash and make cached file names mor…
Browse files Browse the repository at this point in the history
…e sensible

#115
  • Loading branch information
sebkopf committed Apr 22, 2020
1 parent 90de4b3 commit b2633d7
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 27 deletions.
44 changes: 17 additions & 27 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -666,42 +666,32 @@ match_to_supported_file_types <- function(filepaths_df, extensions_df) {

# caching ====

# generates the cash file paths for iso_files
# generates the cash file paths for iso_files inclulding a hash
# hash includes the file name, the file size and last modified, as well as the the read options
# does NOT include: file path, isoreader version, file contents
generate_cache_filepaths <- function(filepaths, read_options = list()) {

# global vars
rowname <- size <- mtime <- filepath <- modified <- hash <- cache_file <- NULL

calculate_unf_hash <- function(filepath, size, modified) {
obj <- c(list(filepath, size, modified), read_options)
unf(obj)$hash %>% str_c(collapse = "")
# calculate the hash
calculate_unf_hash <- function(filename, size, modified) {
obj <- c(list(filename, size, modified), read_options)
paste(unf(obj)$hash, collapse = "")
}

# cached files versioning -->
# include minor if v < 1.0, afterwards go by major version (2.0, 3.0, etc.)
iso_v <-
packageVersion("isoreader") %>% {
if (.$major < 1) paste0(.$major, ".", .$minor)
else paste0(.$major, ".0")
}

file_info <- file.info(filepaths) %>%
dplyr::as_tibble() %>%
rownames_to_column() %>%
select(filepath = rowname, size = size, modified = mtime) %>%
mutate(
hash = mapply(calculate_unf_hash, filepath, size, modified),
cache_file = sprintf("iso_file_v%s_%s_%s.rds", !!iso_v, basename(filepath), hash),

# generate cache filepaths
file.info(filepaths) %>%
tibble::rownames_to_column(var = "filepath") %>%
dplyr::mutate(
hash = purrr::pmap_chr(list(filename = basename(filepath), size = size, modified = mtime), calculate_unf_hash),
cache_file = sprintf("iso_file_%s_%s.rds", basename(filepath), hash),
cache_filepath = file.path(default("cache_dir"), cache_file)
)

return(file_info$cache_filepath)
) %>%
dplyr::pull(cache_filepath)
}

# Cache iso_file
cache_iso_file <- function(iso_file, cachepath) {
if (!file.exists(default("cache_dir"))) dir.create(default("cache_dir"))
saveRDS(iso_file, file = cachepath)
readr::write_rds(iso_file, path = cachepath)
}

# Load cached iso_file
Expand Down
33 changes: 33 additions & 0 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
context("Utility functions")

# package version comparisons =====

test_that("package version comparisons works correctly", {

# versions < 1.0
Expand Down Expand Up @@ -29,6 +31,8 @@ test_that("package version comparisons works correctly", {

})

# example files =====

test_that("retrieving example files works correctly", {

expect_true(is.data.frame(iso_get_reader_examples()))
Expand All @@ -38,6 +42,8 @@ test_that("retrieving example files works correctly", {

})

# file extensions helpers ======

test_that("test that file extension helpers work correctly", {

# get file extension
Expand Down Expand Up @@ -350,3 +356,30 @@ test_that("test that info concatenation works", {
expect_equal(get_info_message_concat(rlang::exprs(a = xyz, b = abc), include_names = TRUE, quotes = FALSE, names_sep = " -> "), "a -> xyz, b -> abc")
expect_equal(get_info_message_concat(rlang::quos(a = xyz, b = abc), include_names = TRUE, quotes = FALSE, names_sep = " -> "), "a -> xyz, b -> abc")
})

# cached file paths ======

test_that("test that cached file path hashes work okay", {

test_folder <- "test_data" # test_folder <- file.path("tests", "testthat", "test_data") # for direct testing

file_paths <- file.path(test_folder, c(
"cache_test.did",
file.path("cache_test1", "cache_test.did"),
file.path("cache_test2", "cache_test.did"),
file.path("cache_test3", "cache_test.did")
))

cache_paths <- generate_cache_filepaths(file_paths)

# exact same file in different locations
expect_true(identical(cache_paths[1], cache_paths[2]))

# same names but different file sizes
expect_false(identical(cache_paths[1], cache_paths[3]))

# same names but different modified different dates
expect_false(identical(cache_paths[1], cache_paths[4]))


})

0 comments on commit b2633d7

Please sign in to comment.