From 1d6fc4b0a3cd637a636b1ac31e039d18c3bc700e Mon Sep 17 00:00:00 2001 From: maurolepore Date: Fri, 11 May 2018 17:46:55 -0400 Subject: [PATCH 1/7] Add example_path_dir(), files_to_df() and xl_sheets_to_output(). --- DESCRIPTION | 3 +- NAMESPACE | 11 + R/aaa.R | 6 +- R/example_path_dir.R | 10 + R/files_to_df.R | 49 ++++ R/xl_sheets_to_output.R | 187 +++++++++++++ README.md | 62 ++--- _pkgdown.yml | 9 + docs/index.html | 12 +- docs/reference/check_unique.html | 4 +- docs/reference/count_duplicated.html | 10 +- docs/reference/example_path_dir.html | 162 +++++++++++ docs/reference/exists_in_pkg.html | 3 +- docs/reference/fieldforms_output.html | 8 +- docs/reference/files_to_df.html | 258 ++++++++++++++++++ docs/reference/index.html | 42 ++- docs/reference/nms.html | 4 +- docs/reference/nms_extract_anycase.html | 5 +- docs/reference/nms_lowercase.html | 4 +- docs/reference/nms_restore_newvar.html | 4 +- docs/reference/path_example.html | 163 +++++++++++ docs/reference/reexports.html | 6 +- docs/reference/type_ensure.html | 4 +- docs/reference/xl_sheets_to_output.html | 211 ++++++++++++++ docs/sitemap.xml | 12 +- inst/extdata/files/01.csv | 6 + inst/extdata/files/01.xls | Bin 0 -> 22528 bytes inst/extdata/files/02.csv | 6 + inst/extdata/files/02.xlsx | Bin 0 -> 5107 bytes inst/extdata/new_stem_0/new_stem_0.xlsx | Bin 0 -> 16846 bytes inst/extdata/new_stem_1/new_stem_1.xlsx | Bin 0 -> 16908 bytes inst/extdata/recruits_none/recruits_none.xlsx | Bin 0 -> 16716 bytes inst/extdata/two_files/new_stem_1.xlsx | Bin 0 -> 16908 bytes inst/extdata/two_files/new_stem_2.xlsx | Bin 0 -> 16908 bytes man/example_path_dir.Rd | 20 ++ man/files_to_df.Rd | 49 ++++ man/reexports.Rd | 4 + man/xl_sheets_to_output.Rd | 58 ++++ tests/testthat/test-example_path_dir.R | 5 + tests/testthat/test-files_to_df.R | 14 + tests/testthat/test-xl_sheets_to_output.R | 165 +++++++++++ 41 files changed, 1504 insertions(+), 72 deletions(-) create mode 100644 R/example_path_dir.R create mode 100644 R/files_to_df.R create mode 100644 R/xl_sheets_to_output.R create mode 100644 docs/reference/example_path_dir.html create mode 100644 docs/reference/files_to_df.html create mode 100644 docs/reference/path_example.html create mode 100644 docs/reference/xl_sheets_to_output.html create mode 100644 inst/extdata/files/01.csv create mode 100644 inst/extdata/files/01.xls create mode 100644 inst/extdata/files/02.csv create mode 100644 inst/extdata/files/02.xlsx create mode 100644 inst/extdata/new_stem_0/new_stem_0.xlsx create mode 100644 inst/extdata/new_stem_1/new_stem_1.xlsx create mode 100644 inst/extdata/recruits_none/recruits_none.xlsx create mode 100644 inst/extdata/two_files/new_stem_1.xlsx create mode 100644 inst/extdata/two_files/new_stem_2.xlsx create mode 100644 man/example_path_dir.Rd create mode 100644 man/files_to_df.Rd create mode 100644 man/xl_sheets_to_output.Rd create mode 100644 tests/testthat/test-example_path_dir.R create mode 100644 tests/testthat/test-files_to_df.R create mode 100644 tests/testthat/test-xl_sheets_to_output.R diff --git a/DESCRIPTION b/DESCRIPTION index ef4b1f87..bb9dbac3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,8 @@ Imports: rlang (>= 0.2.0), rmarkdown (>= 1.9), stringr (>= 1.3.0), - tibble (>= 1.4.2) + tibble (>= 1.4.2), + writexl Suggests: bciex (>= 0.0.0.9000), covr (>= 3.0.1), diff --git a/NAMESPACE b/NAMESPACE index 6bb4574b..339424d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,10 @@ export(check_unique) export(check_unique_vector) export(count_duplicated) export(create_habitat) +export(csv_to_df) +export(csv_to_df_lst) +export(ensure_key_sheets) +export(example_path_dir) export(exists_in_pkg) export(fieldforms_header) export(fieldforms_output) @@ -29,6 +33,7 @@ export(ls_csv_df) export(ls_join_df) export(ls_list_spreadsheets) export(ls_name_df) +export(name_df_lst) export(nms_detect) export(nms_extract1) export(nms_extract_all) @@ -54,6 +59,11 @@ export(to_recensus) export(type_ensure) export(type_taxa) export(type_vft) +export(xl_sheets_to_csv) +export(xl_sheets_to_df) +export(xl_sheets_to_xl) +export(xl_to_df) +export(xl_to_df_lst) import(rlang) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) @@ -62,6 +72,7 @@ importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,ungroup) +importFrom(fgeo.base,name_df_lst) importFrom(tibble,tibble) importFrom(tibble,tribble) importFrom(utils,head) diff --git a/R/aaa.R b/R/aaa.R index 7e80b15d..4c0b3ef4 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -1,5 +1,3 @@ -# Suppress R CMD check note ----------------------------------------------- - #' @importFrom dplyr filter mutate select arrange group_by ungroup %>% #' @import rlang #' @importFrom tibble tibble tribble @@ -9,4 +7,8 @@ NULL #' @export dplyr::`%>%` +#' @importFrom fgeo.base name_df_lst +#' @export +fgeo.base::name_df_lst + globalVariables(c(".data")) diff --git a/R/example_path_dir.R b/R/example_path_dir.R new file mode 100644 index 00000000..5a655342 --- /dev/null +++ b/R/example_path_dir.R @@ -0,0 +1,10 @@ +#' Path to directory containing example data. +#' +#' @param path Path to a file (with extension) from inst/extdata/. +#' +#' @return Path to directory containing example data. +#' @export +#' +#' @examples +#' example_path_dir("two_files/new_stem_1.xlsx") +example_path_dir <- fgeo.base::example_path_factory("fgeo.tool", dirname) diff --git a/R/files_to_df.R b/R/files_to_df.R new file mode 100644 index 00000000..9c6ef243 --- /dev/null +++ b/R/files_to_df.R @@ -0,0 +1,49 @@ +#' Read all .csv or excel files from a directory into dataframes. +#' +#' Read all .csv, .xls, or .xlsx files from a directory into a dataframe or a +#' list of dataframes, where each element of the list is named as the source +#' file. +#' +#' @param input_dir String giving the directory containing the excel workbooks +#' to read from. +#' +#' @return The versions ending in _df output a single dataframe. The versions +#' ending in _lst output a list of dataframes. +#' +#' @examples +#' path <- system.file("extdata", "files/01.csv", package = "fgeo.tool") +#' input_dir <- fs::path_dir(path) +#' input_dir +#' dir(input_dir) +#' +#' # Read all .csv files +#' csv_to_df(input_dir) +#' csv_to_df_lst(input_dir) +#' +#' # Read all .xls or .xlsx files +#' xl_to_df_lst(input_dir) +#' xl_to_df(input_dir) +#' @name files_to_df +NULL + +files_to_df <- function(.map, .read, ext) { + function(input_dir) { + files <- fs::dir_ls(input_dir, regexp = ext) + dfs <- .map(files, .read) + rlang::set_names(dfs, fs::path_file(names(dfs))) + } +} + +#' @export +#' @name files_to_df +csv_to_df <- files_to_df(purrr::map_df, readr::read_csv, "csv$") +#' @export +#' @name files_to_df +csv_to_df_lst <- files_to_df(purrr::map, readr::read_csv, "csv$") +#' @export +#' @name files_to_df +xl_to_df <- files_to_df(purrr::map_df, readxl::read_excel, "xls|xlsx") +#' @export +#' @name files_to_df +xl_to_df_lst <- files_to_df(purrr::map, readxl::read_excel, "xls|xlsx") + diff --git a/R/xl_sheets_to_output.R b/R/xl_sheets_to_output.R new file mode 100644 index 00000000..19a467c9 --- /dev/null +++ b/R/xl_sheets_to_output.R @@ -0,0 +1,187 @@ +#' Combine spreadsheets from excel workbooks into .csv files (one per workbook). +#' +#' This is a rigid function with a very specific goal: To process +#' data from a specific sampling software. Specifically, this is what this +#' function does: +#' * Reads each spreadsheet from each workbook and map it to a dataframe. +#' * Lowercases and links the names of each dataframe. +#' * Keeps only these dataframes: (1) original_stems; (2) new_secondary_stems; +#' and (3). +#' * Dates the data by `submission_id` (`date` comes from the spreadsheet +#' `root`). +#' * Lowercases and links the names of each dataframe-variable. +#' * Drops fake stems. +#' * Output a single .csv file which name is prefixed with the name of the +#' workbook. +#' +#' @param input_dir String giving the directory containing the excel workbooks +#' to read from. +#' @param output_dir String giving the directory where to write .csv files to. +#' +#' @return Writes one .csv file for each workbook. +#' +#' @examples +#' library(fs) +#' +#' # Path to the folder I want to read excel files from +#' input_dir <- example_path_dir("two_files/new_stem_1.xlsx") +#' # Files I want to read +#' dir(input_dir, pattern = "xlsx") +#' +#' # Path to the folder I want to write .csv files to +#' output_dir <- tempdir() +#' +#' # Do the work +#' xl_sheets_to_csv(input_dir, output_dir) +#' +#' # Confirm +#' path_file(dir_ls(output_dir, regexp = "csv$")) +#' @name xl_sheets_to_output +NULL + +xl_sheets_to_file <- function(ext, fun_write) { + function(input_dir, output_dir = "./") { + check_output_dir(output_dir = output_dir, print_as = "`output_dir`") + dfs <- xl_sheets_to_df(input_dir = input_dir) + files <- fs::path_ext_remove(names(dfs)) + paths <- fs::path(output_dir, fs::path_ext_set(files, ext)) + purrr::walk2(dfs, paths, fun_write) + } +} + +#' @export +#' @rdname xl_sheets_to_output +xl_sheets_to_csv <- xl_sheets_to_file("csv", readr::write_csv) + +#' @export +#' @rdname xl_sheets_to_output +xl_sheets_to_xl <- xl_sheets_to_file("xlsx", writexl::write_xlsx) + +#' @export +#' @rdname xl_sheets_to_output +xl_sheets_to_df <- function(input_dir) { + check_input_dir(input_dir = input_dir, print_as = "`input_dir`") + out <- purrr::map(xl_workbooks_to_chr(input_dir), xl_sheets_to_df_) + purrr::set_names(out, basename(names(out))) +} + +#' Do xl_sheets_to_df() for each excel file. +#' @noRd +xl_sheets_to_df_ <- function(file, sheets = ensure_key_sheets()) { + # Piping functions to avoid useless intermediate variables + clean_dfm_list <- fgeo.tool::ls_list_spreadsheets(file) %>% + fgeo.tool::nms_tidy() %>% + ensure_key_sheets() %>% + purrr::keep(~!purrr::is_empty(.)) %>% + lapply(fgeo.tool::nms_tidy) %>% + drop_fake_stems() %>% + fgeo.tool::ls_name_df(name = "sheet") %>% + warn_if_empty("new_secondary_stems") %>% + warn_if_empty("recruits") %>% + # Avoid merge errors + coerce_as_character() + + with_date <- join_and_date(clean_dfm_list) + # In columns matching "codes", replace commas by semicolon + .df <- purrr::modify_if( + with_date, grepl("codes", names(with_date)), ~gsub(",", ";", .x) + ) + .df +} + + + +#' Check that key spreadsheets exist. +#' +#' @param x +#' +#' @return +#' @export +#' +#' @examples +ensure_key_sheets <- function(x) { + key <- c("original_stems", "new_secondary_stems", "recruits", "root") + missing_key_sheet <- !all(key %in% names(x)) + if (missing_key_sheet) { + msg <- paste0( + "Data should contain these sheets:\n", collapse(key), "\n", + "* Missing sheets: ", commas(setdiff(key, names(x))) + ) + abort(msg) + } + + x[intersect(key, names(x))] +} + +#' Remove rows equal to cero from the spreadsheet sheet new_secondary_stem. +#' @noRd +drop_fake_stems <- function(.df) { + dropped <- purrr::modify_at( + .df, .at = "new_secondary_stems", ~.x[.x$new_stem != 0, ] + ) + dropped +} + +#' Warns if a dataframe in a list of dataframes has empty rows. +#' @noRd +warn_if_empty <- function(.x, dfm_nm) { + dfm <- .x[[dfm_nm]] + has_cero_rows <- nrow(dfm) == 0 + if (has_cero_rows) { + warn(paste0("`", dfm_nm, "`", " has cero rows.")) + } + invisible(.x) +} + +coerce_as_character <- function(.x, ...) { + purrr::map(.x, ~purrr::modify(., .f = as.character, ...)) +} + +join_and_date <- function(.x) { + # Join data from all sheets except from `root` + is_not_root <- !grepl("root", names(.x)) + not_root_dfm <- .x %>% + purrr::keep(is_not_root) %>% + fgeo.tool::ls_join_df() %>% + dplyr::mutate(unique_stem = paste0(.data$tag, "_", .data$stem_tag)) + + # From `root`, pull only `date` (plus a column to merge by) + date <- .x[["root"]][c("submission_id", "date")] + + # Add date + dplyr::left_join(not_root_dfm, date, by = "submission_id") +} + +check_input_dir <- function(input_dir, print_as) { + stopifnot(is.character(input_dir)) + validate_dir(input_dir, "`input_dir`") + msg <- "`input_dir` must contain at least one excel file." + file_names <- xl_workbooks_to_chr(input_dir) + if (length(file_names) == 0) { + abort(msg) + } + invisible() +} + +check_output_dir <- function(output_dir, print_as) { + stopifnot(is.character(output_dir)) + validate_dir(output_dir, "`output_dir`") + invisible() +} + +validate_dir <- function(dir, dir_name) { + invalid_dir <- !fs::dir_exists(dir) + if (invalid_dir) { + msg <- paste0( + dir_name, " must match a valid directory.\n", + "bad ", dir_name, ": ", "'", dir, "'" + ) + abort(msg) + } else { + invisible(dir) + } +} + +xl_workbooks_to_chr <- function(input_dir) { + fs::dir_ls(input_dir, regexp = "\\.xls") +} diff --git a/README.md b/README.md index befc0464..0a2c5aee 100644 --- a/README.md +++ b/README.md @@ -63,35 +63,35 @@ row_top(df, Tag) #> # A tibble: 4 x 4 #> CensusID Tag Status status_tree #> -#> 1 1. 1. alive A -#> 2 1. 1. dead A -#> 3 2. 1. alive A -#> 4 2. 1. alive A +#> 1 1 1 alive A +#> 2 1 1 dead A +#> 3 2 1 alive A +#> 4 2 1 alive A row_top(df, Tag, -1) #> # A tibble: 4 x 4 #> CensusID Tag Status status_tree #> -#> 1 1. 3. dead A -#> 2 1. 3. dead A -#> 3 2. 3. dead A -#> 4 2. 3. dead A +#> 1 1 3 dead A +#> 2 1 3 dead A +#> 3 2 3 dead A +#> 4 2 3 dead A # Remove trees found dead in two or more censuses row_discard_twice_dead(df) #> # A tibble: 12 x 4 #> CensusID Tag Status status_tree #> -#> 1 1. 1. alive A -#> 2 1. 1. dead A -#> 3 1. 2. dead A -#> 4 1. 2. dead A -#> 5 1. 3. dead A -#> 6 1. 3. dead A -#> 7 2. 1. alive A -#> 8 2. 1. alive A -#> 9 2. 2. alive A -#> 10 2. 2. dead A -#> 11 2. 3. dead A -#> 12 2. 3. dead A +#> 1 1 1 alive A +#> 2 1 1 dead A +#> 3 1 2 dead A +#> 4 1 2 dead A +#> 5 1 3 dead A +#> 6 1 3 dead A +#> 7 2 1 alive A +#> 8 2 1 alive A +#> 9 2 2 alive A +#> 10 2 2 dead A +#> 11 2 3 dead A +#> 12 2 3 dead A ``` Check inputs. @@ -99,11 +99,11 @@ Check inputs. ``` r # Silent means success check_crucial_names(df, "Status") +#> Error in check_crucial_names(df, "Status"): could not find function "check_crucial_names" # Errs if the data hasn't a given name check_crucial_names(df, "DBH") -#> Error: Ensure your data set has these variables: -#> DBH +#> Error in check_crucial_names(df, "DBH"): could not find function "check_crucial_names" check_unique(df, "CensusID", msg = "* Is this what you expect?") #> Warning in do.call(cond, list(customized)): Duplicated values were detected @@ -133,9 +133,9 @@ dplyr::filter( #> # A tibble: 3 x 4 #> CensusID Tag Status status_tree #> -#> 1 2. 1. alive A -#> 2 2. 1. alive A -#> 3 2. 2. alive A +#> 1 2 1 alive A +#> 2 2 1 alive A +#> 3 2 2 alive A ``` You can combine **fgeo.tool** with **dplyr**. @@ -148,12 +148,12 @@ dplyr::select(edited, -Status) #> # A tibble: 6 x 3 #> CensusID Tag status_tree #> -#> 1 2. 1. A -#> 2 2. 1. A -#> 3 2. 2. A -#> 4 2. 2. A -#> 5 2. 3. A -#> 6 2. 3. A +#> 1 2 1 A +#> 2 2 1 A +#> 3 2 2 A +#> 4 2 2 A +#> 5 2 3 A +#> 6 2 3 A ``` You don’t have to, but if you want you can use the pipe (`%>%`). diff --git a/_pkgdown.yml b/_pkgdown.yml index 30d76fac..fa099464 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -42,6 +42,10 @@ navbar: reference: + - title: Read files into dataframes + contents: + - files_to_df + - title: Manipulate dataframes by adding columns contents: - starts_with("add_") @@ -65,6 +69,10 @@ reference: - title: Manipulate names contents: - starts_with("nms_") + + - title: Process the output of FastField + contents: + - xl_sheets_to_output - title: Quality control contents: @@ -97,3 +105,4 @@ reference: contents: - top1quad - top4quad + - example_path_dir diff --git a/docs/index.html b/docs/index.html index 5e5f72c5..a9e0fb35 100644 --- a/docs/index.html +++ b/docs/index.html @@ -156,12 +156,12 @@

#> 12 2 3 dead A

Check inputs.

# Silent means success
-check_crucial_names(df, "Status")
-
-# Errs if the data hasn't a given name
-check_crucial_names(df, "DBH")
-#> Error: Ensure your data set has these variables:
-#> DBH
+check_crucial_names(df, "Status")
+#> Error in check_crucial_names(df, "Status"): could not find function "check_crucial_names"
+
+# Errs if the data hasn't a given name
+check_crucial_names(df, "DBH")
+#> Error in check_crucial_names(df, "DBH"): could not find function "check_crucial_names"
 
 check_unique(df, "CensusID", msg = "* Is this what you expect?")
 #> Warning in do.call(cond, list(customized)): Duplicated values were detected
diff --git a/docs/reference/check_unique.html b/docs/reference/check_unique.html
index 25e1f356..ef4d438e 100644
--- a/docs/reference/check_unique.html
+++ b/docs/reference/check_unique.html
@@ -147,9 +147,7 @@ 

Value

See also

-

Other functions to check inputs.: check_crucial_names

-

Other functions for developers.: check_crucial_names, - exists_in_pkg, +

Other functions for developers.: exists_in_pkg, nms_extract_anycase, nms_lowercase, nms_restore_newvar, nms, diff --git a/docs/reference/count_duplicated.html b/docs/reference/count_duplicated.html index 4d8861b8..d7794012 100644 --- a/docs/reference/count_duplicated.html +++ b/docs/reference/count_duplicated.html @@ -167,8 +167,14 @@

See a

Examples

-
library(dplyr) -df <- data.frame(a = c(1, 2, 2, 3, 3, 3)) +
library(dplyr)
#> +#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’: +#> +#> filter, lag
#> The following object is masked from ‘package:testthat’: +#> +#> matches
#> The following objects are masked from ‘package:base’: +#> +#> intersect, setdiff, setequal, union
df <- data.frame(a = c(1, 2, 2, 3, 3, 3)) count_duplicated(df, a)
#> # A tibble: 2 x 2 #> a n #> <dbl> <int> diff --git a/docs/reference/example_path_dir.html b/docs/reference/example_path_dir.html new file mode 100644 index 00000000..7a31db9e --- /dev/null +++ b/docs/reference/example_path_dir.html @@ -0,0 +1,162 @@ + + + + + + + + +Path to directory containing example data. — example_path_dir • fgeo.tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

Path to directory containing example data.

+ +
+ +
example_path_dir(path)
+ +

Arguments

+ + + + + + +
path

Path to a file (with extension) from inst/extdata/.

+ +

Value

+ +

Path to directory containing example data.

+ + +

Examples

+
example_path_dir("two_files/new_stem_1.xlsx")
#> [1] ""
+
+ +
+ + +
+ + + + + + diff --git a/docs/reference/exists_in_pkg.html b/docs/reference/exists_in_pkg.html index 9f974944..f194ed1b 100644 --- a/docs/reference/exists_in_pkg.html +++ b/docs/reference/exists_in_pkg.html @@ -134,8 +134,7 @@

Value

See also

-

Other functions for developers.: check_crucial_names, - check_unique, +

Other functions for developers.: check_unique, nms_extract_anycase, nms_lowercase, nms_restore_newvar, nms, diff --git a/docs/reference/fieldforms_output.html b/docs/reference/fieldforms_output.html index 41da8374..399bf279 100644 --- a/docs/reference/fieldforms_output.html +++ b/docs/reference/fieldforms_output.html @@ -235,8 +235,8 @@

Examp #> | |.................................................................| 100% #> ordinary text without R code #> -#>

#> output file: forms_loop.knit.md
#> "C:/Users/LeporeM/AppData/Local/Pandoc/pandoc" +RTS -K512m -RTS forms_loop.utf8.md --to docx --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output pandoc356815ad722.docx --highlight-style tango --reference-doc reference.docx
#> -#> Output created: C:/Users/LeporeM/AppData/Local/Temp/2/RtmpikPaVH/fieldforms.docx
fieldforms_output( +#>
#> output file: forms_loop.knit.md
#> "C:/Users/LeporeM/AppData/Local/Pandoc/pandoc" +RTS -K512m -RTS forms_loop.utf8.md --to docx --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output pandoce00401d70c1.docx --highlight-style tango --reference-doc reference.docx
#> +#> Output created: C:/Users/LeporeM/AppData/Local/Temp/1/Rtmpkv2lyD/fieldforms.docx
fieldforms_output( prep, "fieldforms2.docx", header = "Custom Header", output_dir = tmp )
#> #> @@ -252,8 +252,8 @@

Examp #> | |.................................................................| 100% #> ordinary text without R code #> -#>

#> output file: forms_loop.knit.md
#> "C:/Users/LeporeM/AppData/Local/Pandoc/pandoc" +RTS -K512m -RTS forms_loop.utf8.md --to docx --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output pandoc356831b5a9c.docx --highlight-style tango --reference-doc reference.docx
#> -#> Output created: C:/Users/LeporeM/AppData/Local/Temp/2/RtmpikPaVH/fieldforms2.docx
+#>
#> output file: forms_loop.knit.md
#> "C:/Users/LeporeM/AppData/Local/Pandoc/pandoc" +RTS -K512m -RTS forms_loop.utf8.md --to docx --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash+smart --output pandoce0033f35f47.docx --highlight-style tango --reference-doc reference.docx
#> +#> Output created: C:/Users/LeporeM/AppData/Local/Temp/1/Rtmpkv2lyD/fieldforms2.docx
# Confirm dir(tmp)[grepl("docx$", dir(tmp))]
#> [1] "fieldforms.docx" "fieldforms2.docx"
# Cleaning temporary directory diff --git a/docs/reference/files_to_df.html b/docs/reference/files_to_df.html new file mode 100644 index 00000000..051bb4dc --- /dev/null +++ b/docs/reference/files_to_df.html @@ -0,0 +1,258 @@ + + + + + + + + +Read all .csv or excel files from a directory into dataframes. — files_to_df • fgeo.tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

Read all .csv, .xls, or .xlsx files from a directory into a dataframe or a +list of dataframes, where each element of the list is named as the source +file.

+ +
+ +
csv_to_df(input_dir)
+
+csv_to_df_lst(input_dir)
+
+xl_to_df(input_dir)
+
+xl_to_df_lst(input_dir)
+ +

Arguments

+ + + + + + +
input_dir

String giving the directory containing the excel workbooks +to read from.

+ +

Value

+ +

The versions ending in _df output a single dataframe. The versions +ending in _lst output a list of dataframes.

+ + +

Examples

+
path <- system.file("extdata", "files/01.csv", package = "fgeo.tool") +input_dir <- fs::path_dir(path) +input_dir
#> C:/Users/LeporeM/Dropbox/git_repos/fgeo.tool/inst/extdata/files
dir(input_dir)
#> [1] "01.csv" "01.xls" "02.csv" "02.xlsx"
+# Read all .csv files +csv_to_df(input_dir)
#> Parsed with column specification: +#> cols( +#> x = col_integer(), +#> y = col_character() +#> )
#> Parsed with column specification: +#> cols( +#> x = col_integer(), +#> y = col_character() +#> )
#> # A tibble: 10 x 2 +#> x y +#> <int> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 4 d +#> 5 5 e +#> 6 1 k +#> 7 2 l +#> 8 3 m +#> 9 4 n +#> 10 5 o
csv_to_df_lst(input_dir)
#> Parsed with column specification: +#> cols( +#> x = col_integer(), +#> y = col_character() +#> )
#> Parsed with column specification: +#> cols( +#> x = col_integer(), +#> y = col_character() +#> )
#> $`01.csv` +#> # A tibble: 5 x 2 +#> x y +#> <int> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 4 d +#> 5 5 e +#> +#> $`02.csv` +#> # A tibble: 5 x 2 +#> x y +#> <int> <chr> +#> 1 1 k +#> 2 2 l +#> 3 3 m +#> 4 4 n +#> 5 5 o +#>
+# Read all .xls or .xlsx files +xl_to_df_lst(input_dir)
#> $`01.xls` +#> # A tibble: 5 x 2 +#> x y +#> <dbl> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 4 d +#> 5 5 e +#> +#> $`02.xlsx` +#> # A tibble: 5 x 2 +#> x y +#> <dbl> <chr> +#> 1 1 k +#> 2 2 l +#> 3 3 m +#> 4 4 n +#> 5 5 o +#>
xl_to_df(input_dir)
#> # A tibble: 10 x 2 +#> x y +#> <dbl> <chr> +#> 1 1 a +#> 2 2 b +#> 3 3 c +#> 4 4 d +#> 5 5 e +#> 6 1 k +#> 7 2 l +#> 8 3 m +#> 9 4 n +#> 10 5 o
+
+ +
+ + +
+ + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index c22672cf..02f3c67b 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -108,6 +108,20 @@

Reference

+ + +

Read files into dataframes

+

+ + + + + +

csv_to_df() csv_to_df_lst() xl_to_df() xl_to_df_lst()

+ +

Read all .csv or excel files from a directory into dataframes.

+ +

Manipulate dataframes by adding columns

@@ -269,6 +283,20 @@

+

Process the output of FastField

+

+ + + + + +

xl_sheets_to_csv() xl_sheets_to_xl() xl_sheets_to_df()

+ +

Combine spreadsheets from excel workbooks into .csv files (one per workbook).

+ @@ -367,12 +395,6 @@

check_crucial_names()

- -

Check if an object contains specific names.

-

check_unique() check_unique_vector()

@@ -412,6 +434,12 @@

top4quad top1quad

Data sets for examples; adapted from bciex::bci12vft_mini.

+ + + +

example_path_dir()

+ +

Path to directory containing example data.

@@ -420,12 +448,14 @@

Contents