From be23c401ad3bc1b16d1cbd642e12721c46c4c117 Mon Sep 17 00:00:00 2001 From: maurolepore Date: Wed, 20 Jun 2018 18:55:26 -0400 Subject: [PATCH] Improve interface of functions input_to_output --- ...nput_to_output.R => inout_dfs_to_output.R} | 30 +-- R/{files_to_df.R => inout_files_to_df.R} | 38 ++- ...lff_to_output.R => inout_xlff_to_output.R} | 58 ++--- _pkgdown.yml | 19 +- docs/articles/read_df_write.html | 9 +- docs/index.html | 238 ++++++++---------- docs/reference/dfs_to_csv.html | 205 +++++++++++++++ docs/reference/dfs_to_df.html | 222 ++++++++++++++++ docs/reference/files_to_df.html | 226 ++++++++++------- docs/reference/index.html | 32 +-- docs/reference/xlff_to_output.html | 32 +-- docs/reference/xlsheets_to_dfs.html | 211 ++++++++++++++++ docs/sitemap.xml | 6 +- man/dfs_to_csv.Rd | 6 +- man/dfs_to_df.Rd | 12 +- man/files_to_df.Rd | 20 +- man/xlff_to_output.Rd | 32 +-- man/xlsheets_to_dfs.Rd | 2 +- tests/testthat/test-files_to_df.R | 16 -- ...to_output.R => test-inout_dfs_to_output.R} | 31 +-- tests/testthat/test-inout_files_to_df.R | 26 ++ ...o_output.R => test-inout_xlff_to_output.R} | 10 +- tmp.R | 8 +- vignettes/read_df_write.Rmd | 4 +- 24 files changed, 1065 insertions(+), 428 deletions(-) rename R/{input_to_output.R => inout_dfs_to_output.R} (81%) rename R/{files_to_df.R => inout_files_to_df.R} (61%) rename R/{xlff_to_output.R => inout_xlff_to_output.R} (81%) create mode 100644 docs/reference/dfs_to_csv.html create mode 100644 docs/reference/dfs_to_df.html create mode 100644 docs/reference/xlsheets_to_dfs.html delete mode 100644 tests/testthat/test-files_to_df.R rename tests/testthat/{test-input_to_output.R => test-inout_dfs_to_output.R} (57%) create mode 100644 tests/testthat/test-inout_files_to_df.R rename tests/testthat/{test-xlff_to_output.R => test-inout_xlff_to_output.R} (95%) diff --git a/R/input_to_output.R b/R/inout_dfs_to_output.R similarity index 81% rename from R/input_to_output.R rename to R/inout_dfs_to_output.R index 1f7f26c1..501edbec 100644 --- a/R/input_to_output.R +++ b/R/inout_dfs_to_output.R @@ -1,28 +1,3 @@ -#' Read one excel workbook and map each spreadsheet to a dataframe in a list. -#' -#' A useful complement of this function is [dfs_to_csv()]. -#' -#' @param path A path to an excel file. -#' -#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX). -#' @return A list of dataframes. -#' -#' @seealso [dfs_to_csv()]. -#' @family functions to handle multiple spreadsheets of an excel workbook. -#' -#' @export -#' @examples -#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx")) -xlsheets_to_dfs <- function(path) { - # Piping to avoid useless intermediate variables - path %>% - readxl::excel_sheets() %>% - rlang::set_names() %>% - purrr::map(readxl::read_excel, path = path) -} - - - #' Save each dataframe in a list to a different .csv file. #' #' A useful complement of this function is [xlsheets_to_dfs()]. @@ -53,10 +28,7 @@ dfs_to_csv <- function(dfs, dir, prefix = NULL) { } validate_dir(dir = dir, dir_name = "`dir`") - purrr::walk2( - dfs, names(dfs), - dfs_to_csv_, prefix = prefix, dir = dir - ) + purrr::walk2(dfs, names(dfs), dfs_to_csv_, prefix = prefix, dir = dir) } validate_dir <- function(dir, dir_name) { diff --git a/R/files_to_df.R b/R/inout_files_to_df.R similarity index 61% rename from R/files_to_df.R rename to R/inout_files_to_df.R index 74a42ed8..b7469ba5 100644 --- a/R/files_to_df.R +++ b/R/inout_files_to_df.R @@ -1,14 +1,14 @@ -#' Read flat .csv or flat excel files from a directory into dataframes. +#' Read all .csv or flat excel files from a directory into dataframes. #' #' Read all .csv (`csv_*()`), or excel files (`xl_*()`) from a directory into a #' single dataframe (`*_df()`) or a list of dataframes (`*_dfs()`) -- where each #' element of the list is named as the source file. Excel files are treated as #' if they were flat -- meaning that these `xl_*()` functions can read only a #' single sheet (specified via the argument `sheet` passed to -#' [readr::read_excel()] via `...`). For reading multiple all sheets in a single +#' [readxl::read_excel()] via `...`). For reading multiple all sheets in a single #' file see [xlsheets_to_dfs()]. #' -#' @param input_dir String giving the directory containing the excel workbooks +#' @param dir String giving the directory containing the excel workbooks #' to read from. #' @param ... Arguments passed to [readr::read_csv()] (`csv_*()`) or #' [readxl::read_excel()] (`xl_*()`). @@ -26,21 +26,21 @@ #' #' xl_to_df(tool_example("multiple_workbooks")) #' -#' # Pass an argument to `reaxl::read_excel()` via `...` +#' # Pass an argument to `read_excel()` via `...` #' xl_to_dfs(tool_example("multiple_workbooks"), sheet = 2) #' #' csv_to_dfs(tool_example("multiple_csv")) #' #' csv_to_df(tool_example("multiple_csv")) #' -#' # Pass an argument to `readr::read_csv()` via `...` +#' # Pass an argument to `read_csv()` via `...` #' csv_to_dfs(tool_example("multiple_csv"), n_max = 2) #' @name files_to_df NULL files_to_df <- function(.map, .read, ext) { - function(input_dir, ...) { - files <- fs::dir_ls(input_dir, regexp = ext) + function(dir, ...) { + files <- fs::dir_ls(dir, regexp = ext) dfs <- .map(files, .read, ...) rlang::set_names(dfs, fs::path_file(names(dfs))) } @@ -59,3 +59,27 @@ xl_to_df <- files_to_df(purrr::map_df, readxl::read_excel, "xls|xlsx") #' @name files_to_df xl_to_dfs <- files_to_df(purrr::map, readxl::read_excel, "xls|xlsx") + + +#' Read one excel workbook and map each spreadsheet to a dataframe in a list. +#' +#' A useful complement of this function is [dfs_to_csv()]. +#' +#' @param path A path to an excel file. +#' +#' @source Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX). +#' @return A list of dataframes. +#' +#' @seealso [dfs_to_csv()]. +#' @family functions to handle multiple spreadsheets of an excel workbook. +#' +#' @export +#' @examples +#' xlsheets_to_dfs(tool_example("multiple_sheets.xlsx")) +xlsheets_to_dfs <- function(path) { + # Piping to avoid useless intermediate variables + path %>% + readxl::excel_sheets() %>% + rlang::set_names() %>% + purrr::map(readxl::read_excel, path = path) +} diff --git a/R/xlff_to_output.R b/R/inout_xlff_to_output.R similarity index 81% rename from R/xlff_to_output.R rename to R/inout_xlff_to_output.R index 83620b76..eb2c2b02 100644 --- a/R/xlff_to_output.R +++ b/R/inout_xlff_to_output.R @@ -22,9 +22,9 @@ #' * Drops fake stems. #' * Output a common data structure of your choice. #' -#' @param input_dir String giving the directory containing the excel workbooks +#' @param dir_in String giving the directory containing the excel workbooks #' to read from. -#' @param output_dir String giving the directory where to write .csv files to. +#' @param dir_out String giving the directory where to write .csv files to. #' @param first_census Use `TRUE` if this is your first census. Use `FALSE` #' (default) if this is not your first census but a recensus. #' @@ -45,45 +45,45 @@ #' #' # NOT A FIRST CENSUS #' # Path to the folder I want to read excel files from -#' input_dir <- dirname(tool_example("two_files/new_stem_1.xlsx")) -#' input_dir +#' dir_in <- dirname(tool_example("two_files/new_stem_1.xlsx")) +#' dir_in #' #' # Files I want to read -#' dir(input_dir, pattern = "xlsx") +#' dir(dir_in, pattern = "xlsx") #' #' # Path to the folder I want to write .csv files to -#' output_dir <- tempdir() +#' dir_out <- tempdir() #' #' # Output a csv file -#' xlff_to_csv(input_dir, output_dir) +#' xlff_to_csv(dir_in, dir_out) #' #' # Confirm -#' path_file(dir_ls(output_dir, regexp = "new_stem.*csv$")) +#' path_file(dir_ls(dir_out, regexp = "new_stem.*csv$")) #' #' # Also possible to output excel and a list of dataframe. See next section. #' #' # FIRST CENSUS -#' input_dir <- dirname(tool_example("first_census/census.xlsx")) +#' dir_in <- dirname(tool_example("first_census/census.xlsx")) #' # As a reminder you'll get a warning of missing sheets #' # Output list of dataframes (one per input workbook -- here only one) -#' dfs <- xlff_to_dfs(input_dir, first_census = TRUE) +#' dfs <- xlff_to_dfs(dir_in, first_census = TRUE) #' str(dfs, give.attr = FALSE) #' #' # Output excel -#' xlff_to_xl(input_dir, output_dir, first_census = TRUE) +#' xlff_to_xl(dir_in, dir_out, first_census = TRUE) #' # Read back -#' filename <- path(output_dir, "census.xlsx") +#' filename <- path(dir_out, "census.xlsx") #' out <- read_excel(filename) #' str(out, give.attr = FALSE) #' @name xlff_to_output NULL xlff_to_file <- function(ext, fun_write) { - function(input_dir, output_dir = "./", first_census = FALSE) { - check_output_dir(output_dir = output_dir, print_as = "`output_dir`") - dfs <- xlff_to_dfs(input_dir = input_dir, first_census = first_census) + function(dir_in, dir_out = "./", first_census = FALSE) { + check_dir_out(dir_out = dir_out, print_as = "`dir_out`") + dfs <- xlff_to_dfs(dir_in = dir_in, first_census = first_census) files <- fs::path_ext_remove(names(dfs)) - paths <- fs::path(output_dir, fs::path_ext_set(files, ext)) + paths <- fs::path(dir_out, fs::path_ext_set(files, ext)) purrr::walk2(dfs, paths, fun_write) } } @@ -98,10 +98,10 @@ xlff_to_xl <- xlff_to_file("xlsx", writexl::write_xlsx) #' @export #' @rdname xlff_to_output -xlff_to_dfs <- function(input_dir, first_census = FALSE) { - check_input_dir(input_dir = input_dir, print_as = "`input_dir`") +xlff_to_dfs <- function(dir_in, first_census = FALSE) { + check_dir_in(dir_in = dir_in, print_as = "`dir_in`") out <- purrr::map( - xl_workbooks_to_chr(input_dir), + xl_workbooks_to_chr(dir_in), xlff_to_dfs_, first_census = first_census ) purrr::set_names(out, basename(names(out))) @@ -222,20 +222,20 @@ join_and_date <- function(.x) { dplyr::left_join(date, by = "submission_id") } -check_input_dir <- function(input_dir, print_as) { - stopifnot(is.character(input_dir)) - validate_dir(input_dir, "`input_dir`") - msg <- "`input_dir` must contain at least one excel file." - file_names <- xl_workbooks_to_chr(input_dir) +check_dir_in <- function(dir_in, print_as) { + stopifnot(is.character(dir_in)) + validate_dir(dir_in, "`dir_in`") + msg <- "`dir_in` must contain at least one excel file." + file_names <- xl_workbooks_to_chr(dir_in) if (length(file_names) == 0) { abort(msg) } invisible() } -check_output_dir <- function(output_dir, print_as) { - stopifnot(is.character(output_dir)) - validate_dir(output_dir, "`output_dir`") +check_dir_out <- function(dir_out, print_as) { + stopifnot(is.character(dir_out)) + validate_dir(dir_out, "`dir_out`") invisible() } @@ -252,6 +252,6 @@ validate_dir <- function(dir, dir_name) { } } -xl_workbooks_to_chr <- function(input_dir) { - fs::dir_ls(input_dir, regexp = "\\.xls") +xl_workbooks_to_chr <- function(dir_in) { + fs::dir_ls(dir_in, regexp = "\\.xls") } diff --git a/_pkgdown.yml b/_pkgdown.yml index 34d75e2e..d922b22f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -42,24 +42,23 @@ reference: - type_fgeo - title: Input to output - desc: General (not specificto ForestGEO) + desc: General (not specific to ForestGEO) contents: - - files_to_df - csv_to_df - csv_to_dfs + + - dfs_to_csv + - dfs_to_df + - xl_to_df - xl_to_dfs - - xlff_to_output + - xlsheets_to_dfs + - xlff_to_csv - xlff_to_dfs - xlff_to_xl - - xlsheets_to_dfs - - dfs_to_csv - - ls_join_df - - - title: Pick/drop rows with matching conditions desc: Specific to ForestGEO contents: @@ -80,7 +79,7 @@ reference: - recode_subquad - title: Edit dataframe columns or vectors - desc: General (not specificto ForestGEO) + desc: General (not specific to ForestGEO) contents: - conv_unit_at - conv_unit @@ -104,7 +103,7 @@ reference: - fgeo_habitat - title: For developers - desc: General (not specificto ForestGEO) + desc: General (not specific to ForestGEO) contents: - hide_data_of_class - show_data_of_class diff --git a/docs/articles/read_df_write.html b/docs/articles/read_df_write.html index 4f47c524..c0031a66 100644 --- a/docs/articles/read_df_write.html +++ b/docs/articles/read_df_write.html @@ -108,12 +108,9 @@

2018-06-20

library(writexl)

If you have multiple files in a directory, you may want to combine them all into a single dataframe and export it as a .csv or excel file. Here is how you can do this and a a little more.

Suppose you have these files in a directory:

-
path <- system.file("extdata", "files/01.csv", package = "fgeo.tool")
-files <- path_dir(path)
-files
-#> C:/Users/LeporeM/Documents/R/R-3.5.0/library/fgeo.tool/extdata/files
-dir(files)
-#> [1] "01.csv"  "01.xls"  "02.csv"  "02.xlsx"
+
files <- tool_example("files")
+dir(files)
+#> [1] "01.csv"  "01.xls"  "02.csv"  "02.xlsx"

Let’s read all the .csv files into a single dataframe.

combined <- csv_to_df(files)
 #> Parsed with column specification:
diff --git a/docs/index.html b/docs/index.html
index 11bd2120..632491fa 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1,57 +1,25 @@
-
 
-
-  
-  
+
+
+
+
 
 
-
 Functions for General Purposes • fgeo.tool
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-  
-
-  
+
+
     
-
- + +
+ - - - -
+
- +
-

Functions for general purposes

-

lifecycle Travis build status Coverage status

+ +

The goal of fgeo.tool is to provide functions for general purposes. Many of its functions are used in multiple other fgeo packages so fgeo.tool acts as a central repository of code. In particular, the packages. For general porpose functions with no expternal dependency see fgeo.base.

-

Installation

-
# install.packages("remotes")
-remotes::install_github("forestgeo/fgeo.tool")
+

+Installation

+
# install.packages("remotes")
+remotes::install_github("forestgeo/fgeo.tool")

For details on how to install packages from GitHub, see this article.

-

Example

+

+Example

Setup.

Manipulate data.

# Mutate a data set
 
 # Determine the status of each tree based on the status of its stems
-df <- add_status_tree(df)
+df <- add_status_tree(df)
 #> Warning: No observation has .status = D, A
 #>   * Detected values: alive, dead
 
 # Filter a data set
 
 # Filter from the head or tail of a variable
-pick_top(df, Tag)
+pick_top(df, Tag)
 #> # A tibble: 4 x 4
 #>   CensusID   Tag Status status_tree
 #>      <dbl> <dbl> <chr>  <chr>      
@@ -187,7 +149,7 @@ 

Example

#> 2 1 1 dead A #> 3 2 1 alive A #> 4 2 1 alive A -pick_top(df, Tag, -1) +pick_top(df, Tag, -1) #> # A tibble: 4 x 4 #> CensusID Tag Status status_tree #> <dbl> <dbl> <chr> <chr> @@ -196,7 +158,7 @@

Example

#> 3 2 3 dead A #> 4 2 3 dead A # Remove trees found dead in two or more censuses -drop_twice_dead(df) +drop_twice_dead(df) #> # A tibble: 12 x 4 #> CensusID Tag Status status_tree #> <dbl> <dbl> <chr> <chr> @@ -216,19 +178,19 @@

Example

You can combine fgeo.tool with dplyr.

-
edited <- add_status_tree(pick_top(df, CensusID, -1))
+
 
 
-

Acknowledgements

+

+Acknowledgements

Thanks to all partners of ForestGEO, for sharing their ideas and code.

diff --git a/docs/reference/dfs_to_csv.html b/docs/reference/dfs_to_csv.html new file mode 100644 index 00000000..d5732a10 --- /dev/null +++ b/docs/reference/dfs_to_csv.html @@ -0,0 +1,205 @@ + + + + + + + + +Save each dataframe in a list to a different .csv file. — dfs_to_csv • fgeo.tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

A useful complement of this function is xlsheets_to_dfs().

+ +
+ +
dfs_to_csv(dfs, dir, prefix = NULL)
+ +

Arguments

+ + + + + + + + + + + + + + +
dfs

A list of dataframes.

dir

Character; the directory where the files will be saved.

prefix

Character; a prefix to add to the file names.

+ +

Source

+ +

Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX).

+ +

See also

+ +

xlsheets_to_dfs

+

Other functions to handle multiple spreadsheets of an excel workbook.: xlsheets_to_dfs

+ + +

Examples

+
dfs <- xlsheets_to_dfs(tool_example("multiple_sheets.xlsx")) + +# Saving the output to a temporary file +output <- tempdir() +dfs_to_csv(dfs, output, prefix = "myfile-") + +# Look inside the output directory to confirm it worked +dir(output, pattern = "myfile")
#> [1] "myfile-Sheet1.csv" "myfile-Sheet2.csv"
+
+ +
+ + +
+ + + + + + diff --git a/docs/reference/dfs_to_df.html b/docs/reference/dfs_to_df.html new file mode 100644 index 00000000..a0402bfe --- /dev/null +++ b/docs/reference/dfs_to_df.html @@ -0,0 +1,222 @@ + + + + + + + + +Reduce a list of dataframes into a single dataframe via dplyr::full_join() — dfs_to_df • fgeo.tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

This function wraps purrr::reduce() and dplyr::full_join() to reduce +all or some dataframes in a list into a single dataframe.

+ +
+ +
dfs_to_df(dfs, df_names = NULL, by = NULL)
+ +

Arguments

+ + + + + + + + + + + + + + +
dfs

A list of dataframes.

df_names

Names of the list elements to join. NULL defaults to use +all list elements.

by

A character vector of variables to join by. If NULL, the default, +*_join() will do a natural join, using all variables with common names +across the two tables. To join by different variables on x and y use a +named vector. For example, by = c("a" = "b") will match x.a to y.b. Passed +to dplyr::full_join()

+ +

Value

+ +

A dataframe.

+ +

See also

+ + + + +

Examples

+
dfs <- list( + a = data.frame(x = 1), + b = data.frame(x = 2, y = 2), + c = data.frame(x = 1, z = 3) +) + +dfs_to_df(dfs, df_names = c("a", "c"))
#> x z +#> 1 1 3
+dfs_to_df(dfs, df_names = c("b", "c"))
#> x y z +#> 1 2 2 NA +#> 2 1 NA 3
+dfs_to_df(list(data.frame(1)))
#> X1 +#> 1 1
# Use argument `by` if dataframes have no matching variable, +dfs_to_df( + list(data.frame(x = 1), data.frame(z = 2)), + by = c("x" = "z") +)
#> x +#> 1 1 +#> 2 2
+
+ +
+ + +
+ + + + + + diff --git a/docs/reference/files_to_df.html b/docs/reference/files_to_df.html index 57b59b96..877d35ea 100644 --- a/docs/reference/files_to_df.html +++ b/docs/reference/files_to_df.html @@ -6,7 +6,7 @@ -Read all files from a directory into dataframes. — files_to_df • fgeo.tool +Read all .csv or flat excel files from a directory into dataframes. — files_to_df • fgeo.tool @@ -30,11 +30,15 @@ - + - + @@ -123,34 +127,43 @@
-

Read all .csv, .xls, or .xlsx files from a directory into a single dataframe -or a list of dataframes -- where each element of the list is named as the -source file.

+

Read all .csv (csv_*()), or excel files (xl_*()) from a directory into a +single dataframe (*_df()) or a list of dataframes (*_dfs()) -- where each +element of the list is named as the source file. Excel files are treated as +if they were flat -- meaning that these xl_*() functions can read only a +single sheet (specified via the argument sheet passed to +readxl::read_excel() via ...). For reading multiple all sheets in a single +file see xlsheets_to_dfs().

-
csv_to_df(input_dir)
+    
csv_to_df(dir, ...)
 
-csv_to_dfs(input_dir)
+csv_to_dfs(dir, ...)
 
-xl_to_df(input_dir)
+xl_to_df(dir, ...)
 
-xl_to_dfs(input_dir)
+xl_to_dfs(dir, ...)

Arguments

- + + + + +
input_dirdir

String giving the directory containing the excel workbooks to read from.

...

Arguments passed to readr::read_csv() (csv_*()) or +readxl::read_excel() (xl_*()).

@@ -164,93 +177,118 @@

See also

+ + +

Examples

-
path <- system.file("extdata", "files/01.csv", package = "fgeo.tool") -input_dir <- fs::path_dir(path) -input_dir
#> C:/Users/LeporeM/Dropbox/git_repos/fgeo.tool/inst/extdata/files
dir(input_dir)
#> [1] "01.csv" "01.xls" "02.csv" "02.xlsx"
-# Read all .csv files -csv_to_df(input_dir)
#> Parsed with column specification: +
xl_to_dfs(tool_example("multiple_workbooks"))
#> $workbook1.xlsx +#> # A tibble: 3 x 2 +#> numbers letters +#> <dbl> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> +#> $workbook2.xlsx +#> # A tibble: 3 x 2 +#> numbers letters +#> <dbl> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#>
+xl_to_df(tool_example("multiple_workbooks"))
#> # A tibble: 6 x 2 +#> numbers letters +#> <dbl> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 1 a +#> 5 2 b +#> 6 3 c
+# Pass an argument to `read_excel()` via `...` +xl_to_dfs(tool_example("multiple_workbooks"), sheet = 2)
#> $workbook1.xlsx +#> # A tibble: 3 x 2 +#> colors LETTERS +#> <chr> <chr> +#> 1 red A +#> 2 yellow B +#> 3 blue C +#> +#> $workbook2.xlsx +#> # A tibble: 3 x 2 +#> colors LETTERS +#> <chr> <chr> +#> 1 red A +#> 2 yellow B +#> 3 blue C +#>
+csv_to_dfs(tool_example("multiple_csv"))
#> Parsed with column specification: #> cols( -#> x = col_integer(), -#> y = col_character() +#> numbers = col_integer(), +#> letters = col_character() #> )
#> Parsed with column specification: #> cols( -#> x = col_integer(), -#> y = col_character() -#> )
#> # A tibble: 10 x 2 -#> x y -#> <int> <chr> -#> 1 1 a -#> 2 2 b -#> 3 3 c -#> 4 4 d -#> 5 5 e -#> 6 1 k -#> 7 2 l -#> 8 3 m -#> 9 4 n -#> 10 5 o
csv_to_dfs(input_dir)
#> Parsed with column specification: +#> numbers = col_integer(), +#> letters = col_character() +#> )
#> $csv1.csv +#> # A tibble: 3 x 2 +#> numbers letters +#> <int> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> +#> $csv2.csv +#> # A tibble: 3 x 2 +#> numbers letters +#> <int> <chr> +#> 1 4 a +#> 2 5 b +#> 3 6 c +#>
+csv_to_df(tool_example("multiple_csv"))
#> Parsed with column specification: #> cols( -#> x = col_integer(), -#> y = col_character() +#> numbers = col_integer(), +#> letters = col_character() #> )
#> Parsed with column specification: #> cols( -#> x = col_integer(), -#> y = col_character() -#> )
#> $`01.csv` -#> # A tibble: 5 x 2 -#> x y -#> <int> <chr> -#> 1 1 a -#> 2 2 b -#> 3 3 c -#> 4 4 d -#> 5 5 e -#> -#> $`02.csv` -#> # A tibble: 5 x 2 -#> x y -#> <int> <chr> -#> 1 1 k -#> 2 2 l -#> 3 3 m -#> 4 4 n -#> 5 5 o -#>
-# Read all .xls or .xlsx files -xl_to_dfs(input_dir)
#> $`01.xls` -#> # A tibble: 5 x 2 -#> x y -#> <dbl> <chr> -#> 1 1 a -#> 2 2 b -#> 3 3 c -#> 4 4 d -#> 5 5 e +#> numbers = col_integer(), +#> letters = col_character() +#> )
#> # A tibble: 6 x 2 +#> numbers letters +#> <int> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 4 a +#> 5 5 b +#> 6 6 c
+# Pass an argument to `read_csv()` via `...` +csv_to_dfs(tool_example("multiple_csv"), n_max = 2)
#> Parsed with column specification: +#> cols( +#> numbers = col_integer(), +#> letters = col_character() +#> )
#> Parsed with column specification: +#> cols( +#> numbers = col_integer(), +#> letters = col_character() +#> )
#> $csv1.csv +#> # A tibble: 2 x 2 +#> numbers letters +#> <int> <chr> +#> 1 1 a +#> 2 2 b #> -#> $`02.xlsx` -#> # A tibble: 5 x 2 -#> x y -#> <dbl> <chr> -#> 1 1 k -#> 2 2 l -#> 3 3 m -#> 4 4 n -#> 5 5 o -#>
xl_to_df(input_dir)
#> # A tibble: 10 x 2 -#> x y -#> <dbl> <chr> -#> 1 1 a -#> 2 2 b -#> 3 3 c -#> 4 4 d -#> 5 5 e -#> 6 1 k -#> 7 2 l -#> 8 3 m -#> 9 4 n -#> 10 5 o
+#> $csv2.csv +#> # A tibble: 2 x 2 +#> numbers letters +#> <int> <chr> +#> 1 4 a +#> 2 5 b +#>
+ +
+ + +
+ + + + + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 97e76bd4..1dfab1a0 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -18,6 +18,9 @@ https://forestgeo.github.io/fgeo.tool//reference/dfs_to_csv.html + + https://forestgeo.github.io/fgeo.tool//reference/dfs_to_df.html + https://forestgeo.github.io/fgeo.tool//reference/drop_twice_dead.html @@ -42,9 +45,6 @@ https://forestgeo.github.io/fgeo.tool//reference/lookup.html - - https://forestgeo.github.io/fgeo.tool//reference/ls_join_df.html - https://forestgeo.github.io/fgeo.tool//reference/nms.html diff --git a/man/dfs_to_csv.Rd b/man/dfs_to_csv.Rd index cded0da9..39666acd 100644 --- a/man/dfs_to_csv.Rd +++ b/man/dfs_to_csv.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/input_to_output.R +% Please edit documentation in R/inout_dfs_to_output.R \name{dfs_to_csv} \alias{dfs_to_csv} \title{Save each dataframe in a list to a different .csv file.} @@ -7,10 +7,10 @@ Adapted from an article by Jenny Bryan (https://goo.gl/ah8qkX). } \usage{ -dfs_to_csv(df_list, dir, prefix = NULL) +dfs_to_csv(dfs, dir, prefix = NULL) } \arguments{ -\item{df_list}{A list of dataframes.} +\item{dfs}{A list of dataframes.} \item{dir}{Character; the directory where the files will be saved.} diff --git a/man/dfs_to_df.Rd b/man/dfs_to_df.Rd index 586bbd4e..e6bec093 100644 --- a/man/dfs_to_df.Rd +++ b/man/dfs_to_df.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/input_to_output.R +% Please edit documentation in R/inout_dfs_to_output.R \name{dfs_to_df} \alias{dfs_to_df} \title{Reduce a list of dataframes into a single dataframe via dplyr::full_join()} @@ -7,6 +7,8 @@ dfs_to_df(dfs, df_names = NULL, by = NULL) } \arguments{ +\item{dfs}{A list of dataframes.} + \item{df_names}{Names of the list elements to join. \code{NULL} defaults to use all list elements.} @@ -15,8 +17,6 @@ all list elements.} across the two tables. To join by different variables on x and y use a named vector. For example, by = c("a" = "b") will match x.a to y.b. Passed to \code{\link[dplyr:full_join]{dplyr::full_join()}}} - -\item{df_list}{A list of dataframes.} } \value{ A dataframe. @@ -26,15 +26,15 @@ This function wraps \code{\link[purrr:reduce]{purrr::reduce()}} and \code{\link[ all or some dataframes in a list into a single dataframe. } \examples{ -df_list <- list( +dfs <- list( a = data.frame(x = 1), b = data.frame(x = 2, y = 2), c = data.frame(x = 1, z = 3) ) -dfs_to_df(df_list, df_names = c("a", "c")) +dfs_to_df(dfs, df_names = c("a", "c")) -dfs_to_df(df_list, df_names = c("b", "c")) +dfs_to_df(dfs, df_names = c("b", "c")) dfs_to_df(list(data.frame(1))) # Use argument `by` if dataframes have no matching variable, diff --git a/man/files_to_df.Rd b/man/files_to_df.Rd index 5c10cf69..fe92f70b 100644 --- a/man/files_to_df.Rd +++ b/man/files_to_df.Rd @@ -1,23 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/files_to_df.R +% Please edit documentation in R/inout_files_to_df.R \name{files_to_df} \alias{files_to_df} \alias{csv_to_df} \alias{csv_to_dfs} \alias{xl_to_df} \alias{xl_to_dfs} -\title{Read flat .csv or flat excel files from a directory into dataframes.} +\title{Read all .csv or flat excel files from a directory into dataframes.} \usage{ -csv_to_df(input_dir, ...) +csv_to_df(dir, ...) -csv_to_dfs(input_dir, ...) +csv_to_dfs(dir, ...) -xl_to_df(input_dir, ...) +xl_to_df(dir, ...) -xl_to_dfs(input_dir, ...) +xl_to_dfs(dir, ...) } \arguments{ -\item{input_dir}{String giving the directory containing the excel workbooks +\item{dir}{String giving the directory containing the excel workbooks to read from.} \item{...}{Arguments passed to \code{\link[readr:read_csv]{readr::read_csv()}} (\code{csv_*()}) or @@ -33,7 +33,7 @@ single dataframe (\code{*_df()}) or a list of dataframes (\code{*_dfs()}) -- whe element of the list is named as the source file. Excel files are treated as if they were flat -- meaning that these \code{xl_*()} functions can read only a single sheet (specified via the argument \code{sheet} passed to -\code{\link[readr:read_excel]{readr::read_excel()}} via \code{...}). For reading multiple all sheets in a single +\code{\link[readxl:read_excel]{readxl::read_excel()}} via \code{...}). For reading multiple all sheets in a single file see \code{\link[=xlsheets_to_dfs]{xlsheets_to_dfs()}}. } \section{Acknowledgment}{ @@ -46,14 +46,14 @@ xl_to_dfs(tool_example("multiple_workbooks")) xl_to_df(tool_example("multiple_workbooks")) -# Pass an argument to `reaxl::read_excel()` via `...` +# Pass an argument to `read_excel()` via `...` xl_to_dfs(tool_example("multiple_workbooks"), sheet = 2) csv_to_dfs(tool_example("multiple_csv")) csv_to_df(tool_example("multiple_csv")) -# Pass an argument to `readr::read_csv()` via `...` +# Pass an argument to `read_csv()` via `...` csv_to_dfs(tool_example("multiple_csv"), n_max = 2) } \seealso{ diff --git a/man/xlff_to_output.Rd b/man/xlff_to_output.Rd index f5e1e19a..94628b18 100644 --- a/man/xlff_to_output.Rd +++ b/man/xlff_to_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/xlff_to_output.R +% Please edit documentation in R/inout_xlff_to_output.R \name{xlff_to_output} \alias{xlff_to_output} \alias{xlff_to_csv} @@ -7,17 +7,17 @@ \alias{xlff_to_dfs} \title{Read and wrangle excel-FastField and output dataframes or .csv/.xlsx files.} \usage{ -xlff_to_csv(input_dir, output_dir = "./", first_census = FALSE) +xlff_to_csv(dir_in, dir_out = "./", first_census = FALSE) -xlff_to_xl(input_dir, output_dir = "./", first_census = FALSE) +xlff_to_xl(dir_in, dir_out = "./", first_census = FALSE) -xlff_to_dfs(input_dir, first_census = FALSE) +xlff_to_dfs(dir_in, first_census = FALSE) } \arguments{ -\item{input_dir}{String giving the directory containing the excel workbooks +\item{dir_in}{String giving the directory containing the excel workbooks to read from.} -\item{output_dir}{String giving the directory where to write .csv files to.} +\item{dir_out}{String giving the directory where to write .csv files to.} \item{first_census}{Use \code{TRUE} if this is your first census. Use \code{FALSE} (default) if this is not your first census but a recensus.} @@ -70,34 +70,34 @@ library(readxl) # NOT A FIRST CENSUS # Path to the folder I want to read excel files from -input_dir <- dirname(tool_example("two_files/new_stem_1.xlsx")) -input_dir +dir_in <- dirname(tool_example("two_files/new_stem_1.xlsx")) +dir_in # Files I want to read -dir(input_dir, pattern = "xlsx") +dir(dir_in, pattern = "xlsx") # Path to the folder I want to write .csv files to -output_dir <- tempdir() +dir_out <- tempdir() # Output a csv file -xlff_to_csv(input_dir, output_dir) +xlff_to_csv(dir_in, dir_out) # Confirm -path_file(dir_ls(output_dir, regexp = "new_stem.*csv$")) +path_file(dir_ls(dir_out, regexp = "new_stem.*csv$")) # Also possible to output excel and a list of dataframe. See next section. # FIRST CENSUS -input_dir <- dirname(tool_example("first_census/census.xlsx")) +dir_in <- dirname(tool_example("first_census/census.xlsx")) # As a reminder you'll get a warning of missing sheets # Output list of dataframes (one per input workbook -- here only one) -dfs <- xlff_to_dfs(input_dir, first_census = TRUE) +dfs <- xlff_to_dfs(dir_in, first_census = TRUE) str(dfs, give.attr = FALSE) # Output excel -xlff_to_xl(input_dir, output_dir, first_census = TRUE) +xlff_to_xl(dir_in, dir_out, first_census = TRUE) # Read back -filename <- path(output_dir, "census.xlsx") +filename <- path(dir_out, "census.xlsx") out <- read_excel(filename) str(out, give.attr = FALSE) } diff --git a/man/xlsheets_to_dfs.Rd b/man/xlsheets_to_dfs.Rd index 1d590774..6b544879 100644 --- a/man/xlsheets_to_dfs.Rd +++ b/man/xlsheets_to_dfs.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/input_to_output.R +% Please edit documentation in R/inout_files_to_df.R \name{xlsheets_to_dfs} \alias{xlsheets_to_dfs} \title{Read one excel workbook and map each spreadsheet to a dataframe in a list.} diff --git a/tests/testthat/test-files_to_df.R b/tests/testthat/test-files_to_df.R deleted file mode 100644 index b9adb6d3..00000000 --- a/tests/testthat/test-files_to_df.R +++ /dev/null @@ -1,16 +0,0 @@ -context("files_to_df") - -library(fs) - -test_that("outputs expected object", { - path <- system.file("extdata", "files/01.csv", package = "fgeo.tool") - input_dir <- path_dir(path) - input_dir - dir(input_dir) - - expect_is(csv_to_df(input_dir), "data.frame") - expect_is(csv_to_dfs(input_dir), "list") - - expect_is(xl_to_df(input_dir), "data.frame") - expect_is(xl_to_dfs(input_dir), "list") -}) diff --git a/tests/testthat/test-input_to_output.R b/tests/testthat/test-inout_dfs_to_output.R similarity index 57% rename from tests/testthat/test-input_to_output.R rename to tests/testthat/test-inout_dfs_to_output.R index 30b1aa96..a9c21bbb 100644 --- a/tests/testthat/test-input_to_output.R +++ b/tests/testthat/test-inout_dfs_to_output.R @@ -1,31 +1,18 @@ -input <- system.file("extdata", "example.xlsx", package = "fgeo.tool") - - - -context("xlsheets_to_dfs") - -x <- xlsheets_to_dfs(input) - -test_that("input is a list of data frames", { - expect_type(x, "list") - expect_true(each_list_item_is_df(x)) -}) - - +input <- tool_example("example.xlsx") context("dfs_to_csv") -df_list <- xlsheets_to_dfs(input) +dfs <- xlsheets_to_dfs(input) output <- tempdir() test_that("errs with wrong input", { expect_error(dfs_to_csv(1, output)) expect_error(dfs_to_csv(list(1), output)) - expect_error(dfs_to_csv(df_list, 1)) - expect_error(dfs_to_csv(df_list, output, prefix = 1)) + expect_error(dfs_to_csv(dfs, 1)) + expect_error(dfs_to_csv(dfs, output, prefix = 1)) }) test_that("works as expected", { - dfs_to_csv(df_list, output, prefix = "myfile-") + dfs_to_csv(dfs, output, prefix = "myfile-") files <- dir(output) expect_true(length(files[grepl("^myfile.*csv$", files)]) > 0) }) @@ -34,7 +21,7 @@ test_that("works as expected", { context("dfs_to_df") -df_list <- list( +dfs <- list( a = data.frame(x = 1), b = data.frame(x = 2, y = 2), c = data.frame(x = 1, z = 3) @@ -43,13 +30,13 @@ df_list <- list( test_that("errs with wrong input", { expect_error(dfs_to_df(1)) expect_error(dfs_to_df(data.frame(1))) - expect_error(dfs_to_df(df_list, 1)) + expect_error(dfs_to_df(dfs, 1)) }) test_that("works as expected", { - x <- dfs_to_df(df_list, df_names = c("a", "c")) + x <- dfs_to_df(dfs, df_names = c("a", "c")) expect_equal(names(x), c("x", "z")) - x <- dfs_to_df(df_list, df_names = c("b", "c")) + x <- dfs_to_df(dfs, df_names = c("b", "c")) expect_equal(names(x), c("x", "y", "z")) expect_silent(dfs_to_df(list(data.frame(1)))) expect_silent( diff --git a/tests/testthat/test-inout_files_to_df.R b/tests/testthat/test-inout_files_to_df.R new file mode 100644 index 00000000..b34b8fa1 --- /dev/null +++ b/tests/testthat/test-inout_files_to_df.R @@ -0,0 +1,26 @@ +input <- tool_example("example.xlsx") + +context("xlsheets_to_dfs") + +x <- xlsheets_to_dfs(input) + +test_that("input is a list of data frames", { + expect_type(x, "list") + expect_true(each_list_item_is_df(x)) +}) + + + +context("files_to_df") + +library(fs) + +test_that("outputs expected object", { + dir <- tool_example("files") + + expect_is(csv_to_df(dir), "data.frame") + expect_is(csv_to_dfs(dir), "list") + + expect_is(xl_to_df(dir), "data.frame") + expect_is(xl_to_dfs(dir), "list") +}) diff --git a/tests/testthat/test-xlff_to_output.R b/tests/testthat/test-inout_xlff_to_output.R similarity index 95% rename from tests/testthat/test-xlff_to_output.R rename to tests/testthat/test-inout_xlff_to_output.R index 2bcb2301..dd599133 100644 --- a/tests/testthat/test-xlff_to_output.R +++ b/tests/testthat/test-inout_xlff_to_output.R @@ -58,7 +58,7 @@ test_that("works as expected", { context("xlff_to_csv") -test_that("errs if input_dir does not exist", { +test_that("errs if `dir` does not exist", { expect_error( xlff_to_csv("invalid_dir"), "must match a valid directory" @@ -166,9 +166,9 @@ test_that("outputs column codes with commas replaced by semicolon (#13)", { }) test_that("allows first_census", { - input_dir <- dirname(tool_example("first_census/census.xlsx")) + `dir` <- dirname(tool_example("first_census/census.xlsx")) output_dir <- tempdir() - out <- xlff_to_dfs(input_dir, first_census = TRUE)[[1]] + out <- xlff_to_dfs(`dir`, first_census = TRUE)[[1]] nms <- c( "submission_id", "quadrat", "tag", "stem_tag", "species", @@ -180,9 +180,9 @@ test_that("allows first_census", { }) test_that("passes with input missing key sheets (#33)", { - input_dir <- dirname(tool_example("missing_key/recensus.xlsx")) + `dir` <- dirname(tool_example("missing_key/recensus.xlsx")) expect_warning( - xlff_to_dfs(input_dir), + xlff_to_dfs(`dir`), "Adding missing sheets: original_stems, new_secondary_stems, recruits, root" ) }) diff --git a/tmp.R b/tmp.R index 8fd01e08..7e770092 100644 --- a/tmp.R +++ b/tmp.R @@ -2,6 +2,7 @@ # xxx cont here # * Replace bci data by luquillo data in map (top1quad by vft_1quad, ...) +# * Remove internal data functions_priority # Families ---------------------------------------------------------------- @@ -62,18 +63,11 @@ type_vft # dependencies. GENERAL -- NOT SPECIFIC TO FGEO ----------------------------- # Read or write -# TODO: Rename all of these functions to the format input_to_output(). -# Find common suffix? -# replace _lst by s? e.g. csv_to_df_lst() by csv_to_dfs() -# replace _ls by _lst? files_to_df csv_to_df csv_to_dfs xl_to_df xl_to_dfs - -# TODO: consider renaming xl_ to sheets when appropriate -# TODO: rename to end with output, something like df_to_csv() or dfs_to_csvs() dfs_to_csv dfs_to_df xlsheets_to_dfs diff --git a/vignettes/read_df_write.Rmd b/vignettes/read_df_write.Rmd index 66f9c0f4..7e089e47 100644 --- a/vignettes/read_df_write.Rmd +++ b/vignettes/read_df_write.Rmd @@ -30,9 +30,7 @@ If you have multiple files in a directory, you may want to combine them all into Suppose you have these files in a directory: ```{r} -path <- system.file("extdata", "files/01.csv", package = "fgeo.tool") -files <- path_dir(path) -files +files <- tool_example("files") dir(files) ```