Merge pull request #79 from chuhousen/chuhousen_work

Update to CRAN comment, clean documents
chuhousen · Feb 8, 2022 · a827d3f · a827d3f
2 parents 0d71694 + 41f9245
commit a827d3f
Show file tree

Hide file tree

Showing 85 changed files with 5,109 additions and 4,418 deletions.
diff --git a/CRAN-RELEASE b/CRAN-RELEASE
@@ -1,2 +1,2 @@
-This package was submitted to CRAN on 2022-01-28.
-Once it is accepted, delete this file and tag the release (commit ee17dfe).
+This package was submitted to CRAN on 2022-01-31.
+Once it is accepted, delete this file and tag the release (commit 0d71694).
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: amerifluxr
 Type: Package
-Title: Interface to AmeriFlux Data Services
+Title: Interface to 'AmeriFlux' Data Services
 Version: 1.0.0
 Authors@R: c(
              person(
@@ -15,8 +15,9 @@ Authors@R: c(
                email = "koen.hufkens@gmail.com",
                role = c("ctb"),
                comment = c(ORCID = "0000-0002-5070-8109")))
-Description: Programmatic interface to the AmeriFlux database (<https://ameriflux.lbl.gov/>).
-    Provides query, download, and data summary tools.
+Description: Programmatic interface to the 'AmeriFlux' database
+    (<https://ameriflux.lbl.gov/>). Provide query, download,
+    and data summary tools.
 License: BSD_3_clause + file LICENSE
 Encoding: UTF-8
 LazyData: true

diff --git a/R/amf_download_base.R b/R/amf_download_base.R
@@ -32,13 +32,12 @@
 #'   "\item "education" (i.e., Education (Teacher or Student))
 #'    \item "other"
 #'  }
-#' @param intended_use_text Enter a brief description of intended use. This will
-#' be recorded in the data download log and emailed to
-#' site's PI (free text).
+#' @param intended_use_text Enter a brief description of intended use. This
+#' will be recorded in the download log and emailed to site's PI (character).
 #' @param out_dir Output directory for downloaded data, default tempdir()
 #' @param verbose Show feedback on download progress (TRUE/FALSE)
 #'
-#' @return A vector of download filenames on the local drive
+#' @return A vector of download file names on the local drive
 #'
 #' @export
 #' @examples
@@ -83,19 +82,19 @@ amf_download_base <- function(user_id,
   ## check all inputs valid
   if (!is.character(user_id) |
       length(user_id) != 1) {
-    stop('user_id should be a string...')
+    stop("user_id should be a string...")
   }
 
   if (!is.character(user_email) |
       length(user_email) != 1 |
       !grepl("@", user_email)) {
-    stop('user_email not a valid email...')
+    stop("user_email not a valid email...")
   }
 
   # check if site_id are valid site ID
   check_id <- amf_check_site_id(site_id)
   ## for multiple site ids
-  if(length(site_id) > 1){
+  if (length(site_id) > 1) {
     if (any(!check_id)) {
       warning(paste(
         paste(site_id[which(!check_id)], collapse = ", "),
@@ -104,7 +103,7 @@ amf_download_base <- function(user_id,
       site_id <- site_id[which(check_id)]
 
     }
-  } else if (length(site_id) == 1){
+  } else if (length(site_id) == 1) {
     ## for single site id, need to work exception for AA-Flx, AA-Net
     if (check_id | site_id == "AA-Flx" | site_id == "AA-Net") {
       site_id <- site_id
@@ -114,7 +113,7 @@ amf_download_base <- function(user_id,
 
     }
   }
-  if(length(site_id) == 0){
+  if (length(site_id) == 0) {
     stop("No valid Site ID in site_id...")
   }
 
@@ -132,18 +131,18 @@ amf_download_base <- function(user_id,
     return(intended_use_verbse)
   }
 
-  if(is.null(intended_use_cat(intended_use = intended_use))){
+  if (is.null(intended_use_cat(intended_use = intended_use))) {
     stop("Invalid intended_use input...")
   }
 
   # check if out_dir reachable
-  if(!dir.exists(out_dir)){
+  if (!dir.exists(out_dir)) {
     stop("out_dir not valid...")
   }
 
   # prompt for data policy agreement
   if (data_policy == "CCBY4.0") {
-    if (verbose){
+    if (verbose) {
       cat("Data use guidelines for AmeriFlux CC-BY-4.0 Data Policy:\n",
           fill = TRUE)
       cat(
@@ -176,7 +175,7 @@ amf_download_base <- function(user_id,
       )
     }
   } else if (data_policy == "LEGACY") {
-    if (verbose){
+    if (verbose) {
       cat("Data use guidelines for AmeriFlux LEGACY License:\n",
           fill = TRUE)
       cat(
@@ -297,11 +296,16 @@ amf_download_base <- function(user_id,
 
       # get zip file names
       outfname <- strsplit(ftplink, c("/"))
-      outfname <- sapply(outfname,  utils::tail, n = 1)
+      outfname <- unlist(lapply(outfname,  utils::tail, n = 1))
       outfname <-
         substr(outfname,
                1,
-               sapply(outfname, regexpr, pattern = "?=", fixed = TRUE) - 1)
+               unlist(lapply(
+                 outfname,
+                 regexpr,
+                 pattern = "?=",
+                 fixed = TRUE
+               )) - 1)
 
       ## check if any site_id has no data
       if (length(outfname) < length(site_id)) {
@@ -320,7 +324,7 @@ amf_download_base <- function(user_id,
       }
 
       ## check if downloaded files exist
-      miss_download <- which(!sapply(output_zip_file, file.exists))
+      miss_download <- which(!unlist(lapply(output_zip_file, file.exists)))
       if (length(miss_download) > 0) {
         warning(paste("Cannot download",
                       output_zip_file[miss_download],

diff --git a/R/amf_download_bif.R b/R/amf_download_bif.R
@@ -30,16 +30,16 @@
 #'  }
 #' @param intended_use_text Enter a brief description of intended use.
 #' This will be recorded in the data download log and emailed to
-#' site's PI (free text).
+#' site's PI (character).
 #' @param out_dir Output directory for downloaded data, default tempdir()
 #' @param verbose Show feedback on download progress (TRUE/FALSE)
 #' @param site_w_data Logical, download all registered sites (FALSE)
 #' or only sites with available BASE data (TRUE)
 #'
-#' @return A vector of download filenames on the local drive
+#' @return A vector of download file names on the local drive
 #' @export
 #'
-#' @seealso amf_download_base
+#' @seealso \code{\link{amf_download_base}}
 #'
 #' @examples
 #'

diff --git a/R/amf_extract_badm.R b/R/amf_extract_badm.R
@@ -7,24 +7,25 @@
 #' @param bif_data A data frame consists of 5 columns: SITE_ID, GROUP_ID,
 #' VARIABLE_GROUP, VARIABLE, DATAVALUE, imported from function
 #' \code{\link{amf_read_bif}}.
-#' @param select_group A string, selected from VARIABLE_GROUP in the bif_data
+#' @param select_group A string (character), selected from VARIABLE_GROUP
+#'  in the \code{bif_data}
 #'
-#' @seealso amf_read_bif
+#' @seealso \code{\link{amf_read_bif}}
 #'
 #' @return A data frame of re-structured BADM data with the following columns:
 #' \itemize{
-#'   \item GROUP_ID - A unique identifier for data belonging to the same instance of a reported variable group
+#'   \item GROUP_ID - A unique identifier for data belonging to the same
+#'   instance of a reported variable group
 #'   \item SITE_ID - Six character site identifier (CC-Sss)
 #'   \item VALUE - Values for all available VARIABLES in the selected group
 #'   \item ...
 #' }
 #' @export
 #'
 #' @examples
-#' \dontrun{
 #' # read the BADM BIF file, using an example data file
 #' bif <- amf_read_bif(file = system.file("extdata",
-#'                                        "AMF_AA-Flx_BIF_20201218.xlsx",
+#'                                        "AMF_AA-Flx_BIF_CCBY4_20201218.xlsx",
 #'                                         package = "amerifluxr"))
 #'
 #' # get a list of valid VARIALBE_GROUP
@@ -33,19 +34,17 @@
 #' # extract the selected VARIALBE_GROUP
 #' amf_extract_badm(bif_data = bif, select_group = "GRP_FLUX_MEASUREMENTS")
 #' amf_extract_badm(bif_data = bif, select_group = "GRP_IGBP")
-#'}
-
 
 amf_extract_badm <- function(bif_data,
                              select_group) {
   # stop if missing bif_data parameter
   if (missing(bif_data)) {
-    stop('bif_data not specified...')
+    stop("bif_data not specified...")
   }
 
   # stop if missing bif_data parameter
   if (missing(select_group)) {
-    stop('select_group not specified...')
+    stop("select_group not specified...")
   }
 
   # check if the default columns exist
@@ -58,7 +57,7 @@ amf_extract_badm <- function(bif_data,
       "DATAVALUE"
     ) %in% colnames(bif_data)
   ) != 5) {
-    stop('bif_data format unrecognized...')
+    stop("bif_data format unrecognized...")
   }
 
   # stop if select_group do not exist
@@ -70,15 +69,11 @@ amf_extract_badm <- function(bif_data,
   } else{
     # locate VARIALBE_GROUP
     bif_work <-
-      bif_data[which(bif_data$VARIABLE_GROUP == select_group),]
+      bif_data[which(bif_data$VARIABLE_GROUP == select_group), ]
 
     # get a list of VARIALBE under the specific VARIABLE_GROUP
     var_ls <- unique(bif_work$VARIABLE)
 
-    # retrieve a list of GROUP_ID
-    entry_ls <- as.character(bif_work$GROUP_ID)
-    entry_ls <- unique(bif_work$GROUP_ID)
-
     # output data frame
     bif_out <- data.frame(
       GROUP_ID = tapply(bif_work$GROUP_ID,
@@ -105,7 +100,7 @@ amf_extract_badm <- function(bif_data,
       colnames(bif_out)[ncol(bif_out)] <- paste(var_ls[j])
     }
 
-    bif_out <- bif_out[order(bif_out$SITE_ID),]
+    bif_out <- bif_out[order(bif_out$SITE_ID), ]
 
   }
 

diff --git a/R/amf_filter_base.R b/R/amf_filter_base.R
@@ -1,9 +1,9 @@
-#' Filter AmeriFlux BASE data product based on plausible range
+#' Filter AmeriFlux BASE data based on plausible range
 #'
-#' @description The function filters BASE data product based on the expected
+#' @description The function filters BASE data based on the expected
 #' plausible ranges specified for each variable. See AmeriFlux web site
-#' \url{https://ameriflux.lbl.gov/data/data-processing-pipelines/data-qaqc/physical-range-module/}
-#' for description of physical ranges.
+#' \url{https://ameriflux.lbl.gov/data/data-processing-pipelines/data-qaqc/}
+#' for description of the plausible ranges.
 #'
 #' @param data_in A data frame containing BASE data, e.g.,
 #'  import from \code{\link{amf_read_base}}.
@@ -15,19 +15,17 @@
 #'  }
 #'  If not specified, use \code{\link{amf_variables}} by default.
 #' @param basename_decode A data frame with at least two columns:
-#' #' \itemize{
+#' \itemize{
 #'   \item variable_name: actual variable name
 #'   \item basename: variable base name
 #'  }
 #'  If not specified, use \code{\link{amf_parse_basename}} by default.
-#' @param loose_filter A number in ratio (0-1) used to adjust the physical range
-#' for filtering. Set it to 0 if not used. The default is 0.05.
-#'
-#' @return A data frame similar to data_in, with out-of-range data points
-#'  being filtered out
+#' @param loose_filter A number in ratio (0-1) used to adjust the physical
+#' range for filtering. Set it to 0 if not used. The default is 0.05.
 #'
+#' @return A data frame similar to \code{data_in} filtered out off-range points
 #' @export
-#' @seealso amf_read_base, amf_var_info, amf_parse_basename
+#'
 #' @examples
 #' \dontrun{
 #' # read the BASE from a csv file
@@ -51,7 +49,7 @@ amf_filter_base <- function(data_in,
 
   # stop if missing data_in parameter
   if (missing(data_in)) {
-    stop('data_in not specified...')
+    stop("data_in not specified...")
   }
 
   # unless specified, obtain limit_ls through amf_variables()
@@ -61,11 +59,11 @@ amf_filter_base <- function(data_in,
 
   # check if the default columns exist
   if (sum(c("Name", "Min", "Max") %in% colnames(limit_ls)) != 3) {
-    stop('limit_ls format unrecognized...')
+    stop("limit_ls format unrecognized...")
   } else if (!is.character(limit_ls$Name) |
              !is.numeric(limit_ls$Min) |
              !is.numeric(limit_ls$Max)) {
-    stop('limit_ls format unrecognized...')
+    stop("limit_ls format unrecognized...")
   }
 
   # unless specified, obtain basename_decode using amf_parse_basename()
@@ -76,25 +74,25 @@ amf_filter_base <- function(data_in,
   }
 
   # check if default columns exist
-  if (sum(c("variable_name","basename") %in% colnames(basename_decode)) != 2) {
-    stop('basename_decode format unrecognized...')
+  if (sum(c("variable_name", "basename") %in% colnames(basename_decode)) != 2) {
+    stop("basename_decode format unrecognized...")
   }
 
   # check loose_filter
   if (!is.numeric(loose_filter) & !is.na(loose_filter)) {
-    stop('loose_filter should be numeric...')
+    stop("loose_filter should be numeric...")
   }else if (loose_filter < 0 | loose_filter > 1) {
-    stop('loose_filter may be unrealistic...')
+    stop("loose_filter may be unrealistic...")
   }
 
   ## ensure data_in match the order in basename_decode
-  var.order <- NULL
+  var_order <- NULL
   for (i in seq_len(nrow(basename_decode))) {
-    var.order <-
-      c(var.order,
+    var_order <-
+      c(var_order,
         which(colnames(data_in) == paste(basename_decode$variable_name[i])))
   }
-  data_in <- data_in[, var.order]
+  data_in <- data_in[, var_order]
 
   ## filter by data_in by limit_ls, based on matching
   ##  basename as provided in basename_decode
@@ -105,21 +103,21 @@ amf_filter_base <- function(data_in,
 
     if (length(limit_ls_loc) == 1) {
 
-      var.upp <- limit_ls$Max[limit_ls_loc]
-      var.low <- limit_ls$Min[limit_ls_loc]
+      var_upp <- limit_ls$Max[limit_ls_loc]
+      var_low <- limit_ls$Min[limit_ls_loc]
 
       # adjust for loose filtering
-      if (!is.na(var.upp) & !is.na(var.low) & !is.na(loose_filter)) {
-        var.upp <- var.upp + loose_filter * abs(var.upp - var.low)
-        var.low <- var.low - loose_filter * abs(var.upp - var.low)
+      if (!is.na(var_upp) & !is.na(var_low) & !is.na(loose_filter)) {
+        var_upp <- var_upp + loose_filter * abs(var_upp - var_low)
+        var_low <- var_low - loose_filter * abs(var_upp - var_low)
       }
 
-      if (!is.na(var.upp) & is.numeric(data_in[, l])) {
-        data_in[which(data_in[, l] > var.upp), l] <- NA
+      if (!is.na(var_upp) & is.numeric(data_in[, l])) {
+        data_in[which(data_in[, l] > var_upp), l] <- NA
       }
 
-      if (!is.na(var.low) & is.numeric(data_in[, l])) {
-        data_in[which(data_in[, l] < var.low), l] <- NA
+      if (!is.na(var_low) & is.numeric(data_in[, l])) {
+        data_in[which(data_in[, l] < var_low), l] <- NA
       }
     }
   }

diff --git a/R/amf_list_data.R b/R/amf_list_data.R
@@ -5,9 +5,9 @@
 #' \url{https://ameriflux.lbl.gov/data/aboutdata/data-variables/} for details
 #' about the variable naming.
 #'
-#' @param site_set a scalar or vector of character specifying the target
+#' @param site_set A scalar or vector of character specifying the target
 #' AmeriFlux Site ID (CC-Sss). If not specified, it returns all sites.
-#' @param var_set a scalar or vector of character specifying the target
+#' @param var_set A scalar or vector of character specifying the target
 #' variables as in basename. See AmeriFlux
 #' page\url{https://ameriflux.lbl.gov/data/aboutdata/data-variables/#base}
 #' for a list of variable names. If not specified, it returns all variables.
@@ -46,7 +46,7 @@ amf_list_data <- function(site_set = NULL,
     # get latest data variable availability
     data_aval <- utils::read.csv(
       amf_server("data_variable"),
-      header = T,
+      header = TRUE,
       skip = 1,
       stringsAsFactors = FALSE
     )