Skip to content

Commit

Permalink
Improve interface of functions input_to_output
Browse files Browse the repository at this point in the history
  • Loading branch information
maurolepore committed Jun 20, 2018
1 parent 37e30fd commit be23c40
Show file tree
Hide file tree
Showing 24 changed files with 1,065 additions and 428 deletions.
30 changes: 1 addition & 29 deletions R/input_to_output.R → R/inout_dfs_to_output.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,3 @@
#' Read one excel workbook and map each spreadsheet to a dataframe in a list.
#'
#' A useful complement of this function is [dfs_to_csv()].
#'
#' @param path A path to an excel file.
#'
#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX).
#' @return A list of dataframes.
#'
#' @seealso [dfs_to_csv()].
#' @family functions to handle multiple spreadsheets of an excel workbook.
#'
#' @export
#' @examples
#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx"))
xlsheets_to_dfs <- function(path) {
# Piping to avoid useless intermediate variables
path %>%
readxl::excel_sheets() %>%
rlang::set_names() %>%
purrr::map(readxl::read_excel, path = path)
}



#' Save each dataframe in a list to a different .csv file.
#'
#' A useful complement of this function is [xlsheets_to_dfs()].
Expand Down Expand Up @@ -53,10 +28,7 @@ dfs_to_csv <- function(dfs, dir, prefix = NULL) {
}
validate_dir(dir = dir, dir_name = "`dir`")

purrr::walk2(
dfs, names(dfs),
dfs_to_csv_, prefix = prefix, dir = dir
)
purrr::walk2(dfs, names(dfs), dfs_to_csv_, prefix = prefix, dir = dir)
}

validate_dir <- function(dir, dir_name) {
Expand Down
38 changes: 31 additions & 7 deletions R/files_to_df.R → R/inout_files_to_df.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' Read flat .csv or flat excel files from a directory into dataframes.
#' Read all .csv or flat excel files from a directory into dataframes.
#'
#' Read all .csv (`csv_*()`), or excel files (`xl_*()`) from a directory into a
#' single dataframe (`*_df()`) or a list of dataframes (`*_dfs()`) -- where each
#' element of the list is named as the source file. Excel files are treated as
#' if they were flat -- meaning that these `xl_*()` functions can read only a
#' single sheet (specified via the argument `sheet` passed to
#' [readr::read_excel()] via `...`). For reading multiple all sheets in a single
#' [readxl::read_excel()] via `...`). For reading multiple all sheets in a single
#' file see [xlsheets_to_dfs()].
#'
#' @param input_dir String giving the directory containing the excel workbooks
#' @param dir String giving the directory containing the excel workbooks
#' to read from.
#' @param ... Arguments passed to [readr::read_csv()] (`csv_*()`) or
#' [readxl::read_excel()] (`xl_*()`).
Expand All @@ -26,21 +26,21 @@
#'
#' xl_to_df(tool_example("multiple_workbooks"))
#'
#' # Pass an argument to `reaxl::read_excel()` via `...`
#' # Pass an argument to `read_excel()` via `...`
#' xl_to_dfs(tool_example("multiple_workbooks"), sheet = 2)
#'
#' csv_to_dfs(tool_example("multiple_csv"))
#'
#' csv_to_df(tool_example("multiple_csv"))
#'
#' # Pass an argument to `readr::read_csv()` via `...`
#' # Pass an argument to `read_csv()` via `...`
#' csv_to_dfs(tool_example("multiple_csv"), n_max = 2)
#' @name files_to_df
NULL

files_to_df <- function(.map, .read, ext) {
function(input_dir, ...) {
files <- fs::dir_ls(input_dir, regexp = ext)
function(dir, ...) {
files <- fs::dir_ls(dir, regexp = ext)
dfs <- .map(files, .read, ...)
rlang::set_names(dfs, fs::path_file(names(dfs)))
}
Expand All @@ -59,3 +59,27 @@ xl_to_df <- files_to_df(purrr::map_df, readxl::read_excel, "xls|xlsx")
#' @name files_to_df
xl_to_dfs <- files_to_df(purrr::map, readxl::read_excel, "xls|xlsx")



#' Read one excel workbook and map each spreadsheet to a dataframe in a list.
#'
#' A useful complement of this function is [dfs_to_csv()].
#'
#' @param path A path to an excel file.
#'
#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX).
#' @return A list of dataframes.
#'
#' @seealso [dfs_to_csv()].
#' @family functions to handle multiple spreadsheets of an excel workbook.
#'
#' @export
#' @examples
#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx"))
xlsheets_to_dfs <- function(path) {
# Piping to avoid useless intermediate variables
path %>%
readxl::excel_sheets() %>%
rlang::set_names() %>%
purrr::map(readxl::read_excel, path = path)
}
58 changes: 29 additions & 29 deletions R/xlff_to_output.R → R/inout_xlff_to_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
#' * Drops fake stems.
#' * Output a common data structure of your choice.
#'
#' @param input_dir String giving the directory containing the excel workbooks
#' @param dir_in String giving the directory containing the excel workbooks
#' to read from.
#' @param output_dir String giving the directory where to write .csv files to.
#' @param dir_out String giving the directory where to write .csv files to.
#' @param first_census Use `TRUE` if this is your first census. Use `FALSE`
#' (default) if this is not your first census but a recensus.
#'
Expand All @@ -45,45 +45,45 @@
#'
#' # NOT A FIRST CENSUS
#' # Path to the folder I want to read excel files from
#' input_dir <- dirname(tool_example("two_files/new_stem_1.xlsx"))
#' input_dir
#' dir_in <- dirname(tool_example("two_files/new_stem_1.xlsx"))
#' dir_in
#'
#' # Files I want to read
#' dir(input_dir, pattern = "xlsx")
#' dir(dir_in, pattern = "xlsx")
#'
#' # Path to the folder I want to write .csv files to
#' output_dir <- tempdir()
#' dir_out <- tempdir()
#'
#' # Output a csv file
#' xlff_to_csv(input_dir, output_dir)
#' xlff_to_csv(dir_in, dir_out)
#'
#' # Confirm
#' path_file(dir_ls(output_dir, regexp = "new_stem.*csv$"))
#' path_file(dir_ls(dir_out, regexp = "new_stem.*csv$"))
#'
#' # Also possible to output excel and a list of dataframe. See next section.
#'
#' # FIRST CENSUS
#' input_dir <- dirname(tool_example("first_census/census.xlsx"))
#' dir_in <- dirname(tool_example("first_census/census.xlsx"))
#' # As a reminder you'll get a warning of missing sheets
#' # Output list of dataframes (one per input workbook -- here only one)
#' dfs <- xlff_to_dfs(input_dir, first_census = TRUE)
#' dfs <- xlff_to_dfs(dir_in, first_census = TRUE)
#' str(dfs, give.attr = FALSE)
#'
#' # Output excel
#' xlff_to_xl(input_dir, output_dir, first_census = TRUE)
#' xlff_to_xl(dir_in, dir_out, first_census = TRUE)
#' # Read back
#' filename <- path(output_dir, "census.xlsx")
#' filename <- path(dir_out, "census.xlsx")
#' out <- read_excel(filename)
#' str(out, give.attr = FALSE)
#' @name xlff_to_output
NULL

xlff_to_file <- function(ext, fun_write) {
function(input_dir, output_dir = "./", first_census = FALSE) {
check_output_dir(output_dir = output_dir, print_as = "`output_dir`")
dfs <- xlff_to_dfs(input_dir = input_dir, first_census = first_census)
function(dir_in, dir_out = "./", first_census = FALSE) {
check_dir_out(dir_out = dir_out, print_as = "`dir_out`")
dfs <- xlff_to_dfs(dir_in = dir_in, first_census = first_census)
files <- fs::path_ext_remove(names(dfs))
paths <- fs::path(output_dir, fs::path_ext_set(files, ext))
paths <- fs::path(dir_out, fs::path_ext_set(files, ext))
purrr::walk2(dfs, paths, fun_write)
}
}
Expand All @@ -98,10 +98,10 @@ xlff_to_xl <- xlff_to_file("xlsx", writexl::write_xlsx)

#' @export
#' @rdname xlff_to_output
xlff_to_dfs <- function(input_dir, first_census = FALSE) {
check_input_dir(input_dir = input_dir, print_as = "`input_dir`")
xlff_to_dfs <- function(dir_in, first_census = FALSE) {
check_dir_in(dir_in = dir_in, print_as = "`dir_in`")
out <- purrr::map(
xl_workbooks_to_chr(input_dir),
xl_workbooks_to_chr(dir_in),
xlff_to_dfs_, first_census = first_census
)
purrr::set_names(out, basename(names(out)))
Expand Down Expand Up @@ -222,20 +222,20 @@ join_and_date <- function(.x) {
dplyr::left_join(date, by = "submission_id")
}

check_input_dir <- function(input_dir, print_as) {
stopifnot(is.character(input_dir))
validate_dir(input_dir, "`input_dir`")
msg <- "`input_dir` must contain at least one excel file."
file_names <- xl_workbooks_to_chr(input_dir)
check_dir_in <- function(dir_in, print_as) {
stopifnot(is.character(dir_in))
validate_dir(dir_in, "`dir_in`")
msg <- "`dir_in` must contain at least one excel file."
file_names <- xl_workbooks_to_chr(dir_in)
if (length(file_names) == 0) {
abort(msg)
}
invisible()
}

check_output_dir <- function(output_dir, print_as) {
stopifnot(is.character(output_dir))
validate_dir(output_dir, "`output_dir`")
check_dir_out <- function(dir_out, print_as) {
stopifnot(is.character(dir_out))
validate_dir(dir_out, "`dir_out`")
invisible()
}

Expand All @@ -252,6 +252,6 @@ validate_dir <- function(dir, dir_name) {
}
}

xl_workbooks_to_chr <- function(input_dir) {
fs::dir_ls(input_dir, regexp = "\\.xls")
xl_workbooks_to_chr <- function(dir_in) {
fs::dir_ls(dir_in, regexp = "\\.xls")
}
19 changes: 9 additions & 10 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,23 @@ reference:
- type_fgeo

- title: Input to output
desc: General (not specificto ForestGEO)
desc: General (not specific to ForestGEO)
contents:
- files_to_df
- csv_to_df
- csv_to_dfs

- dfs_to_csv
- dfs_to_df

- xl_to_df
- xl_to_dfs

- xlff_to_output
- xlsheets_to_dfs

- xlff_to_csv
- xlff_to_dfs
- xlff_to_xl

- xlsheets_to_dfs
- dfs_to_csv
- ls_join_df


- title: Pick/drop rows with matching conditions
desc: Specific to ForestGEO
contents:
Expand All @@ -80,7 +79,7 @@ reference:
- recode_subquad

- title: Edit dataframe columns or vectors
desc: General (not specificto ForestGEO)
desc: General (not specific to ForestGEO)
contents:
- conv_unit_at
- conv_unit
Expand All @@ -104,7 +103,7 @@ reference:
- fgeo_habitat

- title: For developers
desc: General (not specificto ForestGEO)
desc: General (not specific to ForestGEO)
contents:
- hide_data_of_class
- show_data_of_class
Expand Down
9 changes: 3 additions & 6 deletions docs/articles/read_df_write.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit be23c40

Please sign in to comment.