Skip to content

Commit

Permalink
Merge pull request #79 from chuhousen/chuhousen_work
Browse files Browse the repository at this point in the history
Update to CRAN comment, clean documents
  • Loading branch information
chuhousen authored Feb 8, 2022
2 parents 0d71694 + 41f9245 commit a827d3f
Show file tree
Hide file tree
Showing 85 changed files with 5,109 additions and 4,418 deletions.
4 changes: 2 additions & 2 deletions CRAN-RELEASE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
This package was submitted to CRAN on 2022-01-28.
Once it is accepted, delete this file and tag the release (commit ee17dfe).
This package was submitted to CRAN on 2022-01-31.
Once it is accepted, delete this file and tag the release (commit 0d71694).
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: amerifluxr
Type: Package
Title: Interface to AmeriFlux Data Services
Title: Interface to 'AmeriFlux' Data Services
Version: 1.0.0
Authors@R: c(
person(
Expand All @@ -15,8 +15,9 @@ Authors@R: c(
email = "koen.hufkens@gmail.com",
role = c("ctb"),
comment = c(ORCID = "0000-0002-5070-8109")))
Description: Programmatic interface to the AmeriFlux database (<https://ameriflux.lbl.gov/>).
Provides query, download, and data summary tools.
Description: Programmatic interface to the 'AmeriFlux' database
(<https://ameriflux.lbl.gov/>). Provide query, download,
and data summary tools.
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
LazyData: true
Expand Down
36 changes: 20 additions & 16 deletions R/amf_download_base.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@
#' "\item "education" (i.e., Education (Teacher or Student))
#' \item "other"
#' }
#' @param intended_use_text Enter a brief description of intended use. This will
#' be recorded in the data download log and emailed to
#' site's PI (free text).
#' @param intended_use_text Enter a brief description of intended use. This
#' will be recorded in the download log and emailed to site's PI (character).
#' @param out_dir Output directory for downloaded data, default tempdir()
#' @param verbose Show feedback on download progress (TRUE/FALSE)
#'
#' @return A vector of download filenames on the local drive
#' @return A vector of download file names on the local drive
#'
#' @export
#' @examples
Expand Down Expand Up @@ -83,19 +82,19 @@ amf_download_base <- function(user_id,
## check all inputs valid
if (!is.character(user_id) |
length(user_id) != 1) {
stop('user_id should be a string...')
stop("user_id should be a string...")
}

if (!is.character(user_email) |
length(user_email) != 1 |
!grepl("@", user_email)) {
stop('user_email not a valid email...')
stop("user_email not a valid email...")
}

# check if site_id are valid site ID
check_id <- amf_check_site_id(site_id)
## for multiple site ids
if(length(site_id) > 1){
if (length(site_id) > 1) {
if (any(!check_id)) {
warning(paste(
paste(site_id[which(!check_id)], collapse = ", "),
Expand All @@ -104,7 +103,7 @@ amf_download_base <- function(user_id,
site_id <- site_id[which(check_id)]

}
} else if (length(site_id) == 1){
} else if (length(site_id) == 1) {
## for single site id, need to work exception for AA-Flx, AA-Net
if (check_id | site_id == "AA-Flx" | site_id == "AA-Net") {
site_id <- site_id
Expand All @@ -114,7 +113,7 @@ amf_download_base <- function(user_id,

}
}
if(length(site_id) == 0){
if (length(site_id) == 0) {
stop("No valid Site ID in site_id...")
}

Expand All @@ -132,18 +131,18 @@ amf_download_base <- function(user_id,
return(intended_use_verbse)
}

if(is.null(intended_use_cat(intended_use = intended_use))){
if (is.null(intended_use_cat(intended_use = intended_use))) {
stop("Invalid intended_use input...")
}

# check if out_dir reachable
if(!dir.exists(out_dir)){
if (!dir.exists(out_dir)) {
stop("out_dir not valid...")
}

# prompt for data policy agreement
if (data_policy == "CCBY4.0") {
if (verbose){
if (verbose) {
cat("Data use guidelines for AmeriFlux CC-BY-4.0 Data Policy:\n",
fill = TRUE)
cat(
Expand Down Expand Up @@ -176,7 +175,7 @@ amf_download_base <- function(user_id,
)
}
} else if (data_policy == "LEGACY") {
if (verbose){
if (verbose) {
cat("Data use guidelines for AmeriFlux LEGACY License:\n",
fill = TRUE)
cat(
Expand Down Expand Up @@ -297,11 +296,16 @@ amf_download_base <- function(user_id,

# get zip file names
outfname <- strsplit(ftplink, c("/"))
outfname <- sapply(outfname, utils::tail, n = 1)
outfname <- unlist(lapply(outfname, utils::tail, n = 1))
outfname <-
substr(outfname,
1,
sapply(outfname, regexpr, pattern = "?=", fixed = TRUE) - 1)
unlist(lapply(
outfname,
regexpr,
pattern = "?=",
fixed = TRUE
)) - 1)

## check if any site_id has no data
if (length(outfname) < length(site_id)) {
Expand All @@ -320,7 +324,7 @@ amf_download_base <- function(user_id,
}

## check if downloaded files exist
miss_download <- which(!sapply(output_zip_file, file.exists))
miss_download <- which(!unlist(lapply(output_zip_file, file.exists)))
if (length(miss_download) > 0) {
warning(paste("Cannot download",
output_zip_file[miss_download],
Expand Down
6 changes: 3 additions & 3 deletions R/amf_download_bif.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@
#' }
#' @param intended_use_text Enter a brief description of intended use.
#' This will be recorded in the data download log and emailed to
#' site's PI (free text).
#' site's PI (character).
#' @param out_dir Output directory for downloaded data, default tempdir()
#' @param verbose Show feedback on download progress (TRUE/FALSE)
#' @param site_w_data Logical, download all registered sites (FALSE)
#' or only sites with available BASE data (TRUE)
#'
#' @return A vector of download filenames on the local drive
#' @return A vector of download file names on the local drive
#' @export
#'
#' @seealso amf_download_base
#' @seealso \code{\link{amf_download_base}}
#'
#' @examples
#'
Expand Down
27 changes: 11 additions & 16 deletions R/amf_extract_badm.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@
#' @param bif_data A data frame consists of 5 columns: SITE_ID, GROUP_ID,
#' VARIABLE_GROUP, VARIABLE, DATAVALUE, imported from function
#' \code{\link{amf_read_bif}}.
#' @param select_group A string, selected from VARIABLE_GROUP in the bif_data
#' @param select_group A string (character), selected from VARIABLE_GROUP
#' in the \code{bif_data}
#'
#' @seealso amf_read_bif
#' @seealso \code{\link{amf_read_bif}}
#'
#' @return A data frame of re-structured BADM data with the following columns:
#' \itemize{
#' \item GROUP_ID - A unique identifier for data belonging to the same instance of a reported variable group
#' \item GROUP_ID - A unique identifier for data belonging to the same
#' instance of a reported variable group
#' \item SITE_ID - Six character site identifier (CC-Sss)
#' \item VALUE - Values for all available VARIABLES in the selected group
#' \item ...
#' }
#' @export
#'
#' @examples
#' \dontrun{
#' # read the BADM BIF file, using an example data file
#' bif <- amf_read_bif(file = system.file("extdata",
#' "AMF_AA-Flx_BIF_20201218.xlsx",
#' "AMF_AA-Flx_BIF_CCBY4_20201218.xlsx",
#' package = "amerifluxr"))
#'
#' # get a list of valid VARIALBE_GROUP
Expand All @@ -33,19 +34,17 @@
#' # extract the selected VARIALBE_GROUP
#' amf_extract_badm(bif_data = bif, select_group = "GRP_FLUX_MEASUREMENTS")
#' amf_extract_badm(bif_data = bif, select_group = "GRP_IGBP")
#'}


amf_extract_badm <- function(bif_data,
select_group) {
# stop if missing bif_data parameter
if (missing(bif_data)) {
stop('bif_data not specified...')
stop("bif_data not specified...")
}

# stop if missing bif_data parameter
if (missing(select_group)) {
stop('select_group not specified...')
stop("select_group not specified...")
}

# check if the default columns exist
Expand All @@ -58,7 +57,7 @@ amf_extract_badm <- function(bif_data,
"DATAVALUE"
) %in% colnames(bif_data)
) != 5) {
stop('bif_data format unrecognized...')
stop("bif_data format unrecognized...")
}

# stop if select_group do not exist
Expand All @@ -70,15 +69,11 @@ amf_extract_badm <- function(bif_data,
} else{
# locate VARIALBE_GROUP
bif_work <-
bif_data[which(bif_data$VARIABLE_GROUP == select_group),]
bif_data[which(bif_data$VARIABLE_GROUP == select_group), ]

# get a list of VARIALBE under the specific VARIABLE_GROUP
var_ls <- unique(bif_work$VARIABLE)

# retrieve a list of GROUP_ID
entry_ls <- as.character(bif_work$GROUP_ID)
entry_ls <- unique(bif_work$GROUP_ID)

# output data frame
bif_out <- data.frame(
GROUP_ID = tapply(bif_work$GROUP_ID,
Expand All @@ -105,7 +100,7 @@ amf_extract_badm <- function(bif_data,
colnames(bif_out)[ncol(bif_out)] <- paste(var_ls[j])
}

bif_out <- bif_out[order(bif_out$SITE_ID),]
bif_out <- bif_out[order(bif_out$SITE_ID), ]

}

Expand Down
60 changes: 29 additions & 31 deletions R/amf_filter_base.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' Filter AmeriFlux BASE data product based on plausible range
#' Filter AmeriFlux BASE data based on plausible range
#'
#' @description The function filters BASE data product based on the expected
#' @description The function filters BASE data based on the expected
#' plausible ranges specified for each variable. See AmeriFlux web site
#' \url{https://ameriflux.lbl.gov/data/data-processing-pipelines/data-qaqc/physical-range-module/}
#' for description of physical ranges.
#' \url{https://ameriflux.lbl.gov/data/data-processing-pipelines/data-qaqc/}
#' for description of the plausible ranges.
#'
#' @param data_in A data frame containing BASE data, e.g.,
#' import from \code{\link{amf_read_base}}.
Expand All @@ -15,19 +15,17 @@
#' }
#' If not specified, use \code{\link{amf_variables}} by default.
#' @param basename_decode A data frame with at least two columns:
#' #' \itemize{
#' \itemize{
#' \item variable_name: actual variable name
#' \item basename: variable base name
#' }
#' If not specified, use \code{\link{amf_parse_basename}} by default.
#' @param loose_filter A number in ratio (0-1) used to adjust the physical range
#' for filtering. Set it to 0 if not used. The default is 0.05.
#'
#' @return A data frame similar to data_in, with out-of-range data points
#' being filtered out
#' @param loose_filter A number in ratio (0-1) used to adjust the physical
#' range for filtering. Set it to 0 if not used. The default is 0.05.
#'
#' @return A data frame similar to \code{data_in} filtered out off-range points
#' @export
#' @seealso amf_read_base, amf_var_info, amf_parse_basename
#'
#' @examples
#' \dontrun{
#' # read the BASE from a csv file
Expand All @@ -51,7 +49,7 @@ amf_filter_base <- function(data_in,

# stop if missing data_in parameter
if (missing(data_in)) {
stop('data_in not specified...')
stop("data_in not specified...")
}

# unless specified, obtain limit_ls through amf_variables()
Expand All @@ -61,11 +59,11 @@ amf_filter_base <- function(data_in,

# check if the default columns exist
if (sum(c("Name", "Min", "Max") %in% colnames(limit_ls)) != 3) {
stop('limit_ls format unrecognized...')
stop("limit_ls format unrecognized...")
} else if (!is.character(limit_ls$Name) |
!is.numeric(limit_ls$Min) |
!is.numeric(limit_ls$Max)) {
stop('limit_ls format unrecognized...')
stop("limit_ls format unrecognized...")
}

# unless specified, obtain basename_decode using amf_parse_basename()
Expand All @@ -76,25 +74,25 @@ amf_filter_base <- function(data_in,
}

# check if default columns exist
if (sum(c("variable_name","basename") %in% colnames(basename_decode)) != 2) {
stop('basename_decode format unrecognized...')
if (sum(c("variable_name", "basename") %in% colnames(basename_decode)) != 2) {
stop("basename_decode format unrecognized...")
}

# check loose_filter
if (!is.numeric(loose_filter) & !is.na(loose_filter)) {
stop('loose_filter should be numeric...')
stop("loose_filter should be numeric...")
}else if (loose_filter < 0 | loose_filter > 1) {
stop('loose_filter may be unrealistic...')
stop("loose_filter may be unrealistic...")
}

## ensure data_in match the order in basename_decode
var.order <- NULL
var_order <- NULL
for (i in seq_len(nrow(basename_decode))) {
var.order <-
c(var.order,
var_order <-
c(var_order,
which(colnames(data_in) == paste(basename_decode$variable_name[i])))
}
data_in <- data_in[, var.order]
data_in <- data_in[, var_order]

## filter by data_in by limit_ls, based on matching
## basename as provided in basename_decode
Expand All @@ -105,21 +103,21 @@ amf_filter_base <- function(data_in,

if (length(limit_ls_loc) == 1) {

var.upp <- limit_ls$Max[limit_ls_loc]
var.low <- limit_ls$Min[limit_ls_loc]
var_upp <- limit_ls$Max[limit_ls_loc]
var_low <- limit_ls$Min[limit_ls_loc]

# adjust for loose filtering
if (!is.na(var.upp) & !is.na(var.low) & !is.na(loose_filter)) {
var.upp <- var.upp + loose_filter * abs(var.upp - var.low)
var.low <- var.low - loose_filter * abs(var.upp - var.low)
if (!is.na(var_upp) & !is.na(var_low) & !is.na(loose_filter)) {
var_upp <- var_upp + loose_filter * abs(var_upp - var_low)
var_low <- var_low - loose_filter * abs(var_upp - var_low)
}

if (!is.na(var.upp) & is.numeric(data_in[, l])) {
data_in[which(data_in[, l] > var.upp), l] <- NA
if (!is.na(var_upp) & is.numeric(data_in[, l])) {
data_in[which(data_in[, l] > var_upp), l] <- NA
}

if (!is.na(var.low) & is.numeric(data_in[, l])) {
data_in[which(data_in[, l] < var.low), l] <- NA
if (!is.na(var_low) & is.numeric(data_in[, l])) {
data_in[which(data_in[, l] < var_low), l] <- NA
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions R/amf_list_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#' \url{https://ameriflux.lbl.gov/data/aboutdata/data-variables/} for details
#' about the variable naming.
#'
#' @param site_set a scalar or vector of character specifying the target
#' @param site_set A scalar or vector of character specifying the target
#' AmeriFlux Site ID (CC-Sss). If not specified, it returns all sites.
#' @param var_set a scalar or vector of character specifying the target
#' @param var_set A scalar or vector of character specifying the target
#' variables as in basename. See AmeriFlux
#' page\url{https://ameriflux.lbl.gov/data/aboutdata/data-variables/#base}
#' for a list of variable names. If not specified, it returns all variables.
Expand Down Expand Up @@ -46,7 +46,7 @@ amf_list_data <- function(site_set = NULL,
# get latest data variable availability
data_aval <- utils::read.csv(
amf_server("data_variable"),
header = T,
header = TRUE,
skip = 1,
stringsAsFactors = FALSE
)
Expand Down
Loading

0 comments on commit a827d3f

Please sign in to comment.