Improve interface of functions input_to_output

forestgeo · Jun 20, 2018 · be23c40 · be23c40
1 parent 37e30fd
commit be23c40
Show file tree

Hide file tree

Showing 24 changed files with 1,065 additions and 428 deletions.
diff --git a/R/input_to_output.R → R/inout_dfs_to_output.R b/R/input_to_output.R → R/inout_dfs_to_output.R
@@ -1,28 +1,3 @@
-#' Read one excel workbook and map each spreadsheet to a dataframe in a list.
-#'
-#' A useful complement of this function is [dfs_to_csv()].
-#'
-#' @param path A path to an excel file.
-#'
-#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX).
-#' @return A list of dataframes.
-#'
-#' @seealso [dfs_to_csv()].
-#' @family functions to handle multiple spreadsheets of an excel workbook.
-#'
-#' @export
-#' @examples
-#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx"))
-xlsheets_to_dfs <- function(path) {
-  # Piping to avoid useless intermediate variables
-  path %>%
-    readxl::excel_sheets() %>%
-    rlang::set_names() %>%
-    purrr::map(readxl::read_excel, path = path)
-}
-
-
-
 #' Save each dataframe in a list to a different .csv file.
 #'
 #' A useful complement of this function is [xlsheets_to_dfs()].
@@ -53,10 +28,7 @@ dfs_to_csv <- function(dfs, dir, prefix = NULL) {
   }
   validate_dir(dir = dir, dir_name = "`dir`")
 
-  purrr::walk2(
-    dfs, names(dfs),
-    dfs_to_csv_, prefix = prefix, dir = dir
-  )
+  purrr::walk2(dfs, names(dfs), dfs_to_csv_, prefix = prefix, dir = dir)
 }
 
 validate_dir <- function(dir, dir_name) {

diff --git a/R/files_to_df.R → R/inout_files_to_df.R b/R/files_to_df.R → R/inout_files_to_df.R
@@ -1,14 +1,14 @@
-#' Read flat .csv or flat excel files from a directory into dataframes.
+#' Read all .csv or flat excel files from a directory into dataframes.
 #' 
 #' Read all .csv (`csv_*()`), or excel files (`xl_*()`) from a directory into a
 #' single dataframe (`*_df()`) or a list of dataframes (`*_dfs()`) -- where each
 #' element of the list is named as the source file. Excel files are treated as
 #' if they were flat -- meaning that these `xl_*()` functions can read only a
 #' single sheet (specified via the argument `sheet` passed to
-#' [readr::read_excel()] via `...`). For reading multiple all sheets in a single
+#' [readxl::read_excel()] via `...`). For reading multiple all sheets in a single
 #' file see [xlsheets_to_dfs()].
 #' 
-#' @param input_dir String giving the directory containing the excel workbooks
+#' @param dir String giving the directory containing the excel workbooks
 #'   to read from.
 #' @param ... Arguments passed to [readr::read_csv()] (`csv_*()`) or
 #'   [readxl::read_excel()] (`xl_*()`).
@@ -26,21 +26,21 @@
 #' 
 #' xl_to_df(tool_example("multiple_workbooks"))
 #' 
-#' # Pass an argument to `reaxl::read_excel()` via `...`
+#' # Pass an argument to `read_excel()` via `...`
 #' xl_to_dfs(tool_example("multiple_workbooks"), sheet = 2)
 #' 
 #' csv_to_dfs(tool_example("multiple_csv"))
 #' 
 #' csv_to_df(tool_example("multiple_csv"))
 #' 
-#' # Pass an argument to `readr::read_csv()` via `...`
+#' # Pass an argument to `read_csv()` via `...`
 #' csv_to_dfs(tool_example("multiple_csv"), n_max = 2)
 #' @name files_to_df
 NULL
 
 files_to_df <- function(.map, .read, ext) {
-  function(input_dir, ...) {
-    files <- fs::dir_ls(input_dir, regexp = ext)
+  function(dir, ...) {
+    files <- fs::dir_ls(dir, regexp = ext)
     dfs <- .map(files, .read, ...)
     rlang::set_names(dfs, fs::path_file(names(dfs)))
   }
@@ -59,3 +59,27 @@ xl_to_df <- files_to_df(purrr::map_df,  readxl::read_excel, "xls|xlsx")
 #' @name files_to_df
 xl_to_dfs <- files_to_df(purrr::map, readxl::read_excel, "xls|xlsx")
 
+
+
+#' Read one excel workbook and map each spreadsheet to a dataframe in a list.
+#'
+#' A useful complement of this function is [dfs_to_csv()].
+#'
+#' @param path A path to an excel file.
+#'
+#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX).
+#' @return A list of dataframes.
+#'
+#' @seealso [dfs_to_csv()].
+#' @family functions to handle multiple spreadsheets of an excel workbook.
+#'
+#' @export
+#' @examples
+#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx"))
+xlsheets_to_dfs <- function(path) {
+  # Piping to avoid useless intermediate variables
+  path %>%
+    readxl::excel_sheets() %>%
+    rlang::set_names() %>%
+    purrr::map(readxl::read_excel, path = path)
+}
diff --git a/R/xlff_to_output.R → R/inout_xlff_to_output.R b/R/xlff_to_output.R → R/inout_xlff_to_output.R
@@ -22,9 +22,9 @@
 #' * Drops fake stems.
 #' * Output a common data structure of your choice.
 #'
-#' @param input_dir String giving the directory containing the excel workbooks
+#' @param dir_in String giving the directory containing the excel workbooks
 #'   to read from.
-#' @param output_dir String giving the directory where to write .csv files to.
+#' @param dir_out String giving the directory where to write .csv files to.
 #' @param first_census Use `TRUE` if this is your first census. Use `FALSE`
 #'   (default) if this is not your first census but a recensus.
 #' 
@@ -45,45 +45,45 @@
 #' 
 #' # NOT A FIRST CENSUS
 #' # Path to the folder I want to read excel files from
-#' input_dir <- dirname(tool_example("two_files/new_stem_1.xlsx"))
-#' input_dir
+#' dir_in <- dirname(tool_example("two_files/new_stem_1.xlsx"))
+#' dir_in
 #' 
 #' # Files I want to read
-#' dir(input_dir, pattern = "xlsx")
+#' dir(dir_in, pattern = "xlsx")
 #' 
 #' # Path to the folder I want to write .csv files to
-#' output_dir <- tempdir()
+#' dir_out <- tempdir()
 #' 
 #' # Output a csv file
-#' xlff_to_csv(input_dir, output_dir)
+#' xlff_to_csv(dir_in, dir_out)
 #' 
 #' # Confirm
-#' path_file(dir_ls(output_dir, regexp = "new_stem.*csv$"))
+#' path_file(dir_ls(dir_out, regexp = "new_stem.*csv$"))
 #' 
 #' # Also possible to output excel and a list of dataframe. See next section.
 #' 
 #' # FIRST CENSUS
-#' input_dir <- dirname(tool_example("first_census/census.xlsx"))
+#' dir_in <- dirname(tool_example("first_census/census.xlsx"))
 #' # As a reminder you'll get a warning of missing sheets
 #' # Output list of dataframes (one per input workbook -- here only one)
-#' dfs <- xlff_to_dfs(input_dir, first_census = TRUE)
+#' dfs <- xlff_to_dfs(dir_in, first_census = TRUE)
 #' str(dfs, give.attr = FALSE)
 #' 
 #' # Output excel
-#' xlff_to_xl(input_dir, output_dir, first_census = TRUE)
+#' xlff_to_xl(dir_in, dir_out, first_census = TRUE)
 #' # Read back
-#' filename <- path(output_dir, "census.xlsx")
+#' filename <- path(dir_out, "census.xlsx")
 #' out <- read_excel(filename)
 #' str(out, give.attr = FALSE)
 #' @name xlff_to_output
 NULL
 
 xlff_to_file <- function(ext, fun_write) {
-    function(input_dir, output_dir = "./", first_census = FALSE) {
-    check_output_dir(output_dir = output_dir, print_as = "`output_dir`")
-    dfs <- xlff_to_dfs(input_dir = input_dir, first_census = first_census)
+    function(dir_in, dir_out = "./", first_census = FALSE) {
+    check_dir_out(dir_out = dir_out, print_as = "`dir_out`")
+    dfs <- xlff_to_dfs(dir_in = dir_in, first_census = first_census)
     files <- fs::path_ext_remove(names(dfs))
-    paths <- fs::path(output_dir, fs::path_ext_set(files, ext))
+    paths <- fs::path(dir_out, fs::path_ext_set(files, ext))
     purrr::walk2(dfs, paths, fun_write)
   }
 }
@@ -98,10 +98,10 @@ xlff_to_xl <- xlff_to_file("xlsx", writexl::write_xlsx)
 
 #' @export
 #' @rdname xlff_to_output
-xlff_to_dfs <- function(input_dir, first_census = FALSE) {
-  check_input_dir(input_dir = input_dir, print_as = "`input_dir`")
+xlff_to_dfs <- function(dir_in, first_census = FALSE) {
+  check_dir_in(dir_in = dir_in, print_as = "`dir_in`")
   out <- purrr::map(
-    xl_workbooks_to_chr(input_dir), 
+    xl_workbooks_to_chr(dir_in), 
     xlff_to_dfs_, first_census = first_census
   )
   purrr::set_names(out, basename(names(out)))
@@ -222,20 +222,20 @@ join_and_date <- function(.x) {
     dplyr::left_join(date, by = "submission_id")
 }
 
-check_input_dir <- function(input_dir, print_as) {
-  stopifnot(is.character(input_dir))
-  validate_dir(input_dir, "`input_dir`")
-  msg <- "`input_dir` must contain at least one excel file."
-  file_names <- xl_workbooks_to_chr(input_dir)
+check_dir_in <- function(dir_in, print_as) {
+  stopifnot(is.character(dir_in))
+  validate_dir(dir_in, "`dir_in`")
+  msg <- "`dir_in` must contain at least one excel file."
+  file_names <- xl_workbooks_to_chr(dir_in)
   if (length(file_names) == 0) {
     abort(msg)
   }
   invisible()
 }
 
-check_output_dir <- function(output_dir, print_as) {
-  stopifnot(is.character(output_dir))
-  validate_dir(output_dir, "`output_dir`")
+check_dir_out <- function(dir_out, print_as) {
+  stopifnot(is.character(dir_out))
+  validate_dir(dir_out, "`dir_out`")
   invisible()
 }
 
@@ -252,6 +252,6 @@ validate_dir <- function(dir, dir_name) {
   }
 }
 
-xl_workbooks_to_chr <- function(input_dir) {
-  fs::dir_ls(input_dir, regexp = "\\.xls")
+xl_workbooks_to_chr <- function(dir_in) {
+  fs::dir_ls(dir_in, regexp = "\\.xls")
 }
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -42,24 +42,23 @@ reference:
     - type_fgeo
 
   - title: Input to output
-    desc: General (not specificto ForestGEO)
+    desc: General (not specific to ForestGEO)
     contents:
-    - files_to_df
     - csv_to_df
     - csv_to_dfs
+
+    - dfs_to_csv
+    - dfs_to_df
+
     - xl_to_df
     - xl_to_dfs
 
-    - xlff_to_output
+    - xlsheets_to_dfs
+
     - xlff_to_csv
     - xlff_to_dfs
     - xlff_to_xl
 
-    - xlsheets_to_dfs
-    - dfs_to_csv
-    - ls_join_df
-
-
   - title: Pick/drop rows with matching conditions 
     desc: Specific to ForestGEO
     contents:
@@ -80,7 +79,7 @@ reference:
     - recode_subquad
 
   - title: Edit dataframe columns or vectors
-    desc: General (not specificto ForestGEO)
+    desc: General (not specific to ForestGEO)
     contents:
     - conv_unit_at
     - conv_unit
@@ -104,7 +103,7 @@ reference:
     - fgeo_habitat
 
   - title: For developers
-    desc: General (not specificto ForestGEO)
+    desc: General (not specific to ForestGEO)
     contents:
     - hide_data_of_class
     - show_data_of_class

diff --git a/docs/articles/read_df_write.html b/docs/articles/read_df_write.html