Skip to content
This repository has been archived by the owner on Jul 23, 2023. It is now read-only.

Commit

Permalink
Primary output is now path.
Browse files Browse the repository at this point in the history
  • Loading branch information
maurolepore committed Oct 15, 2018
1 parent 4032696 commit 04ee115
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 130 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@

export(agb_example)
export(data_preparation)
export(list_data)
import(BIOMASS)
import(data.table)
90 changes: 37 additions & 53 deletions R/data_preparation.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
#' Main routine to format, detect major obvious errors, and gap-fill those
#' errors in CTFS-formated data.
#'
#' @param site The full name of your site (in lower case); e.g., 'barro colorado
#' island' (TODO: Do you need to clarify that this should be a valid value
#' of `site.info$site`?).
#' @param stem `TRUE` or `FALSE`, using the stem data (`stem = TRUE`) rather
#' than the tree data (i.e. called 'full', `stem = FALSE`).
#' @param path String giving a path to a parent directory containing species
#' and census datasets.
#' @param stem `TRUE`(default) or `FALSE` reflect that your censuses are
#' ForestGEO `stem` or `tree` (aka `full`) tables, respectively.
#' @param site String giving the name of your site -- one of one of
#' `site.info$site` (e.g., 'barro colorado island'.
#' @param dbh_units set the unit ("mm" or "cm") of DBH values, by default
#' dbh_units=="mm".
#' @param WD Optional, provide an external data.frame of wood densities by
Expand All @@ -29,7 +30,11 @@
#' "path/to/file" (without extention) to output all records for trees with
#' major errors in productivity to a csv file. Defaults to not write such a
#' file.
#' @param DATA_path The pathname where the data are located.
#' @param graph_problems_to A string giving a directory with the format
#' "path/to/file" (without extention) to output graphs showing problematic
#' trees. The output may be multiple files, for example:
#' path/to/file_1.pdf,path/to/file_2.pdf, path/to/file_3.pdf, and so on.
#' Defaults to not write such files.
#' @param exclude_interval `NULL` by default. If needed a vector (e.g. c(1,2))
#' indicating which census interval(s) must be discarded from computation due,
#' for instance, to a change in measurement protocol.
Expand All @@ -40,12 +45,12 @@
#'
#' @examples
#' data_preparation(
#' DATA_path = agb_example("data"),
#' path = agb_example("data"),
#' site = "barro colorado island",
#' stem = TRUE
#' )
data_preparation <- function(stem,
DATA_path = NULL,
data_preparation <- function(path,
stem,
site,
dbh_units = "mm",
WD = NULL,
Expand All @@ -56,8 +61,8 @@ data_preparation <- function(stem,
dbh_stranglers = 500,
maxrel = 0.2,
write_errors_to = NULL,
exclude_interval = NULL,
graph_problems_to = NULL) {
graph_problems_to = NULL,
exclude_interval = NULL) {
# TODO: Rename data_preparation() to prepare_data()

# site <- tolower(site)
Expand All @@ -68,21 +73,6 @@ data_preparation <- function(stem,
# if (is.na(INDEX)) {
# stop("Site name should be one of the following: \n", paste(levels(factor(site.info$site)), collapse = " - "))
# }
# TODO: Remove this if (): Make `DATA_path` the first argument with no default
if (is.null(DATA_path)) {
path_folder <- getwd()
# TODO: Is the double assignment intentional? (i.e`<<-` instead of `<-`)
DATA_path <<- paste0(path_folder, "/data/")
}
# # TODO: Replace `DATA_path` by `.data`: A list of datasets.
# # TODO: Write a helper that creates the list of datasets given a path.
# file_names <- list.files(paste0(getwd(),"/data/"))
# lapply(file_names,load,function(x) paste0(getwd(),"/data/",x)) # doesn't work
# for (i in 1:length( file_names))
# {
# load(paste0(getwd(),"/data/",file_names[i]))
# }
## For sake of simplicity, we point toward the data stored in "data" folder (see above for automation)
site <- tolower(site)
INDEX <- match(tolower(site), site.info$site)
if (is.na(INDEX)) {
Expand All @@ -93,29 +83,20 @@ data_preparation <- function(stem,
)
}

DATA_path <- paste0(getwd(), "/data/")
path_folder <- getwd()
files <- list.files(DATA_path)
files <- list.files(path)
# TODO: Relying on string matching might be dangerous.
ifelse(
stem,
files <- files[grep("stem", files)],
files <- files[grep("full", files)]
)

# # TODO: Why not output a list of objects instead of files in a directory?
# ifelse(
# # FIXME: path_folder is undefined if user provide DATA_path, so this fails.
# # Define `path_folder` here again, or add argument `output_path`?
# !dir.exists(file.path(paste0(path_folder, "/output"))),
# dir.create(file.path(paste0(path_folder, "/output"))),
# FALSE
# )

# Create the receiving data.frame
nms <- c(
"treeID", "stemID", "tag", "StemTag", "sp", "quadrat", "gx", "gy", "dbh", "hom",
"ExactDate", "DFstatus", "codes", "date", "status", "CensusID", "year"
"treeID", "stemID", "tag", "StemTag", "sp", "quadrat", "gx", "gy", "dbh",
"hom", "ExactDate", "DFstatus", "codes", "date", "status", "CensusID",
"year"
)
# FIXME: Growing an object can be terribly slow. Instead of creating a dataframe
# with one row you should create a dataframe with as many rows as you need.
Expand All @@ -127,7 +108,7 @@ data_preparation <- function(stem,
# TODO: see seq_along()
for (i in 1:length(files)) {
# TODO: Again, this could be avoided if the censues come in a `.data` list
temp <- data.table::setDT(LOAD(paste(DATA_path, files[i], sep = "/")))
temp <- data.table::setDT(LOAD(paste(path, files[i], sep = "/")))
temp$CensusID <- i

# TODO: Too-long line. Need a meaningfully-named intermediary variable?
Expand Down Expand Up @@ -173,7 +154,10 @@ data_preparation <- function(stem,
# TODO: Reorder to match formals:
# df, use_palm_allometry, DBH = NULL, WD = NULL, H = NULL
# FIXME: This fails because WD can't be found.
df <- compute_agb(df, WD = WD, H = NULL, site = site, use_palm_allometry)
df <- compute_agb(
df, WD = WD, H = NULL, site = site, use_palm_allometry,
path = path
)

message("Step 3: AGB calculation done.")

Expand Down Expand Up @@ -440,13 +424,10 @@ compute_agb <- function(df,
DBH = NULL,
WD = NULL,
H = NULL,
site) {
if (!exists("DATA_path")) {
# TODO: `<<-` is dangerous. Are you sure you need it?
DATA_path <<- paste0(path_folder, "/data/")
}
site,
path) {
# Allocate wood density
df <- assignWD(df, site = site, WD = WD)
df <- assignWD(df, site = site, WD = WD, path = path)


# Compute biomass
Expand All @@ -462,7 +443,7 @@ compute_agb <- function(df,
# meaningful_name <- complicated-statement-you-need-insidge-if
# if (meaningful_name) ...
if (is.na(match("family", tolower(names(df))))) {
SP <- LOAD(paste0(DATA_path, list.files(DATA_path)[grep("spptable", list.files(DATA_path))]))
SP <- LOAD(paste0(path, list.files(path)[grep("spptable", list.files(path))]))
trim <- function(x) gsub("^\\s+|\\s+$", "", x)
SP$genus <- trim(substr(SP$Latin, 1, regexpr(" ", SP$Latin)))
SP$species <- trim(substr(SP$Latin, regexpr(" ", SP$Latin), 50))
Expand Down Expand Up @@ -496,13 +477,16 @@ compute_agb <- function(df,
#' @return A data.table (data.frame) with all relevant variables.
#' @keywords internal
#' @noRd
assignWD <- function(DAT, site, WD = NULL) {
if (is.null(DATA_path)) {
DATA_path <<- paste0(path_folder, "/data/")
}
assignWD <- function(DAT, site, WD = NULL, path) {

# Add genus & species to data
SP <- LOAD(paste(DATA_path, list.files(DATA_path)[grep("spptable", list.files(DATA_path))], sep = "/"))
SP <- LOAD(
paste(
path,
list.files(path)[grep("spptable", list.files(path))],
sep = "/"
)
)
# FIXME: Replace subset() with `[`. See ?subset():
# > Warning: This is a convenience function intended for use interactively.
# For programming it is better to use the standard subsetting functions like
Expand Down
22 changes: 0 additions & 22 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,6 @@ receiving_df <- function(.names) {
stats::setNames(data.frame(na), .names)
}



# TODO: Move to its own file and test.

#' Create a list from all .rda files in a parent directory.
#'
#' @param parent String giving the parent directory containing .rda files.
#'
#' @return A list where each element is the object stores in one .rda file
#' stored in `parent`.
#'
#' @export
#'
#' @examples
#' parent <- agb_example("data")
#' str(list_data(parent), list.len = 3)
list_data <- function(parent) {
lapply( fs::dir_ls(parent), function(x) get(load(x)))
}



# TODO: Move to its own file and test.

#' Path to directory containing example data.
Expand Down
13 changes: 5 additions & 8 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,11 @@ devtools::install_github("AGBfluxes")
```{r, error=TRUE}
library(AGBfluxes)
# Make sure the name of your site is as in the database.
# FIXME: We can be more flexible here. And try match insensitive to case
sited <- site.info$site
mysite <- as.character(sited[grep("Barro", sited, ignore.case = TRUE)])
mysite
path <- agb_example("data")
dir(path)
prep <- data_preparation(site = mysite, stem = TRUE)
# Make sure the name of your `site` is one of `site.info$site`
prep <- data_preparation(path, site = "barro colorado island", stem = TRUE)
head(prep)
```
Expand Down Expand Up @@ -92,8 +90,7 @@ problems <- paste0(tmp, "/problems")
# problems <- "results/problems"
prep_3 <- data_preparation(
site = "barro colorado island",
stem = TRUE,
path, site = "barro colorado island", stem = TRUE,
write_errors_to = errors,
graph_problems_to = problems
)
Expand Down
18 changes: 8 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,13 @@ method.
``` r
library(AGBfluxes)

# Make sure the name of your site is as in the database.
# FIXME: We can be more flexible here. And try match insensitive to case
sited <- site.info$site
mysite <- as.character(sited[grep("Barro", sited, ignore.case = TRUE)])
mysite
#> [1] "barro colorado island"

prep <- data_preparation(site = mysite, stem = TRUE)
path <- agb_example("data")
dir(path)
#> [1] "bci.spptable.rda" "bci_stem_1995.rda" "bci_stem_2000.rda"
#> [4] "bci_stem_2005.rda"

# Make sure the name of your `site` is one of `site.info$site`
prep <- data_preparation(path, site = "barro colorado island", stem = TRUE)
#> Step 1: Data import done.
#> Step 2: Data consolidation done.
#> The reference dataset contains 16781 wood density values
Expand Down Expand Up @@ -141,8 +140,7 @@ problems <- paste0(tmp, "/problems")
# problems <- "results/problems"

prep_3 <- data_preparation(
site = "barro colorado island",
stem = TRUE,
path, site = "barro colorado island", stem = TRUE,
write_errors_to = errors,
graph_problems_to = problems
)
Expand Down
26 changes: 16 additions & 10 deletions man/data_preparation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 0 additions & 22 deletions man/list_data.Rd

This file was deleted.

Binary file removed tests/testthat/data/bci.spptable.rda
Binary file not shown.
Binary file removed tests/testthat/data/bci_stem_1995.rda
Binary file not shown.
Binary file removed tests/testthat/data/bci_stem_2000.rda
Binary file not shown.
Binary file removed tests/testthat/data/bci_stem_2005.rda
Binary file not shown.
12 changes: 8 additions & 4 deletions tests/testthat/test-data_preparation.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
context("data_preparation")

prep <- data_preparation(
path = agb_example("data"),
site = "barro colorado island",
stem = TRUE,
taper_correction = TRUE,
Expand All @@ -10,9 +11,8 @@ prep <- data_preparation(
dbh_stranglers = 500,
maxrel = 0.2,
write_errors_to = NULL,
DATA_path = NULL,
exclude_interval = NULL,
graph_problems_to = NULL
graph_problems_to = NULL,
exclude_interval = NULL
)

nms <- c(
Expand All @@ -39,7 +39,11 @@ describe("data_preparation", {
})

it("works with minimum mandatory argumets", {
out <- data_preparation(site = "barro colorado island", stem = TRUE)
out <- data_preparation(
path = agb_example("data"),
site = "barro colorado island",
stem = TRUE
)
expect_is(out, "data.frame")
expect_named(out, nms)
})
Expand Down

0 comments on commit 04ee115

Please sign in to comment.