version 0.1.0

cran · Aug 16, 2023 · 5e90fa8 · 5e90fa8
commit 5e90fa8
Show file tree

Hide file tree

Showing 11 changed files with 387 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,20 @@
+Package: lettervalue
+Type: Package
+Title: Computing Letter Values
+Version: 0.1.0
+Authors@R: c(
+    person(given = "Gilberto",
+           family = "Sassi",
+           role = c("aut", "cre"),
+           email = "sassi.pereira.gilberto@gmail.com"))
+Maintainer: Gilberto Sassi <sassi.pereira.gilberto@gmail.com>
+Imports: tibble, glue, purrr, stats
+Description: Letter Values for the course Exploratory Data Analysis at Federal University of Bahia (Brazil). The approach implemented in the package is presented in the textbook of Tukey  (1977) <ISBN: 978-0201076165>.
+License: MIT + file LICENSE
+Encoding: UTF-8
+RoxygenNote: 7.2.3
+NeedsCompilation: no
+Packaged: 2023-08-14 19:18:07 UTC; gilberto
+Author: Gilberto Sassi [aut, cre]
+Repository: CRAN
+Date/Publication: 2023-08-16 09:22:35 UTC
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2023
+COPYRIGHT HOLDER: Gilberto Sassi
diff --git a/MD5 b/MD5
@@ -0,0 +1,10 @@
+d32fee6b2683bbebe93236c262e2c961 *DESCRIPTION
+030f7bd6f916621537262524081e9e93 *LICENSE
+22d3e20cdbbc9f25f6a653b491f0940b *NAMESPACE
+e1e757ecb1499565265d9955787c0418 *R/letter_value.R
+0247f029d586f31f699cab5963425c89 *R/print_lv.R
+56520e1281b3361ab194679159c3749b *R/summary_lv.R
+efe910e2b9bf08b92d3b9e3eaf93393b *README.md
+608cee8290ae08964a7f29cc807c0540 *man/letter_value.Rd
+b9bbe501cc2836453b20898a7011bba2 *man/print.lv.Rd
+e49099c6f1856d92761431b06cc442a5 *man/summary.lv.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(print,lv)
+S3method(summary,lv)
+export(letter_value)
+export(print.lv)
+export(summary.lv)
diff --git a/R/letter_value.R b/R/letter_value.R
@@ -0,0 +1,63 @@
+#' Compute Letter Value.
+#'
+#' \code{letter_value} returns the letter values until the level indicated by \code{level}.
+#'
+#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
+#'
+#' @param x numeric vector
+#' @param level integer value between 2 and 9 indicating the level to compute the letter values. Default value is 2.
+#' @param na_rm a logical evaluating to \code{TRUE} or \code{FALSE} indicating whether NA values should be stripped before the computation proceeds. Default value is \code{TRUE}.
+#' @return a \code{list} object is returned with the variable name (\code{variable_name}), the sample (\code{sample}), and a data frame with the following columns:
+#' \describe{
+#'  \item{letter}{a letter indicating the letter value}
+#'  \item{depth}{depth of the letter value}
+#'  \item{lv_lower}{lower letter value}
+#'  \item{lv_upper}{upper letter value}
+#' }
+#' @examples
+#' letter_value(rivers)
+#' @export
+letter_value <- function(x, level = 2, na_rm = TRUE) {
+  if (na_rm) {
+    order_stats <- stats::na.omit(x) |> sort()
+  } else {
+    order_stats <- sort(x)
+  }
+
+  len <- length(order_stats) # length of sample
+  if (level > 9) stop("Limit level summary is 9.")
+
+  letters <- c("F", "E", "D", "C", "B", "A")
+
+  depth <- (len + 1) / 2
+  for (l in 2:level) {
+    depth <- c(depth, (floor(depth[l - 1]) + 1) / 2)
+  }
+
+  if (depth[level] < 1) stop("Levels too high to be estimated.")
+
+  median_lv <- (order_stats[floor(depth[1])] +
+                  order_stats[ceiling(depth[1])]) / 2
+  lower_lv <- vector("double", level - 1)
+  upper_lv <- vector("double", level - 1)
+
+  for (l in 2:level) {
+    k <- floor(depth[l])
+    lower_lv[l - 1] <- order_stats[k]
+    upper_lv[l - 1] <- order_stats[len + 1 - k]
+  }
+
+  output <- list(
+    variable_name = paste(deparse(substitute(x))),
+    sample = order_stats,
+    lv_data_frame = tibble::tibble(
+      letter = c("M", letters[seq_len(level - 1)], "1"),
+      depth = c(depth, 1),
+      lv_lower = c(median_lv, lower_lv, order_stats[1]),
+      lv_upper = c(median_lv, upper_lv, order_stats[len])
+    )
+  )
+
+  class(output) <- c("lv")
+  return(output)
+}
diff --git a/R/print_lv.R b/R/print_lv.R
@@ -0,0 +1,86 @@
+#' Display Letter Values.
+#'
+#' Display and return letter values.
+#'
+#' The diagram is, by default, the 5-number summary, where the sample size, the median (location measure) and the F-spread (distance between lower fourth and upper fourth). Others diagrams are avalaible increasing the argument \code{level}.
+#'
+#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
+#'
+#' This is a generic print method for the class "lv".
+#'
+#' @param x an object \code{lv}.
+#' @param ... further arguments passed to or from other methods.
+#' @return No return value, called to improve visualization of letter values as proposed at the seminal book Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published of 1983.
+#' @examples
+#' lv_obj <- letter_value(rivers)
+#' print.lv(lv_obj)
+#'
+#' @export print.lv
+#' @export
+print.lv <- function(x, ...) {
+  len <- length(x$sample)
+
+  df_lv <- x$lv_data_frame
+
+  name <- x$variable_name
+
+  add_space <- \(x, n) ifelse(
+    nchar(x) == n,
+    x,
+    c(x, rep(" ", n - nchar(x))) |> paste0(collapse = "")
+  )
+
+  coluna_1 <- length(df_lv$letter) |>
+    seq_len() |>
+    purrr::map_chr(\(k) {
+      glue::glue("{df_lv$letter[k]} {df_lv$depth[k]}")
+    })
+  n1 <- max(nchar(coluna_1))
+  coluna_1 <- purrr::map_chr(coluna_1, ~ add_space(.x, n1))
+
+  coluna_2 <- c(
+    "|",
+    2:length(df_lv$letter) |>
+      purrr::map_chr(\(k) {
+        glue::glue("| {df_lv$lv_lower[k]}")
+      })
+  )
+  n2 <- max(nchar(coluna_2))
+  coluna_2 <- purrr::map_chr(coluna_2, ~ add_space(.x, n2))
+
+  coluna_3 <- glue::glue("    {df_lv$lv_lower[1]}    ")
+  for (k in 2:length(df_lv$letter)) {
+    coluna_3 <- c(coluna_3, rep(" ", nchar(coluna_3[1])) |> paste0(collapse = ""))
+  }
+  n3 <- max(nchar(coluna_3), length(name))
+  coluna_3 <- purrr::map_chr(coluna_3, ~ add_space(.x, n3))
+
+  coluna_4 <- c(
+    "",
+    2:length(df_lv$letter) |>
+      purrr::map_chr(\(k) {
+        glue::glue("{df_lv$lv_upper[k]}")
+      })
+  )
+  n4 <- max(nchar(coluna_4))
+  coluna_4 <- purrr::map_chr(coluna_4, ~ add_space(.x, n4))
+
+
+  name <- add_space(name, max(n2 + n3 + n4 + 2, nchar(name)))
+  linha <- paste0(rep("-", max(n2 + n3 + n4 + 2, nchar(name))),
+                  collapse = "")
+  output <- glue::glue("n = {len}
+
+{paste0(rep(' ', n1), collapse = '')}{name}
+{paste0(rep(' ', n1), collapse = '')}{linha}
+
+")
+  for (k in seq_len(length(df_lv$letter))) {
+    output <- paste0(output, glue::glue("{coluna_1[k]}{coluna_2[k]}{coluna_3[k]}{coluna_4[k]} |
+
+"))
+  }
+
+  cat(output)
+  invisible(x)
+}
diff --git a/R/summary_lv.R b/R/summary_lv.R
@@ -0,0 +1,74 @@
+#' Summary Using Letter Value
+#'
+#' Compute the resume measures (location and scale) using letter values.
+#'
+#' In this summary, we present the trimean, median, F-spread, F-pseudo sigma, F-pseudo variance e outliers values.
+#'
+#' @details This function returns the measures of location and scale as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
+#'
+#' This is a generic method for the class "lv".
+#'
+#' @param object an object \code{lv}.
+#' @param ... further arguments passed to or from other methods.
+#' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.
+#' @return A \code{tibble} object with the following columns:
+#' \describe{
+#' \item{trimean}{resistant measure to small changes in the dataset for location.}
+#' \item{median}{resistant measure to small changes in the datase for location.}
+#' \item{f_spread}{resistant measure to small changes in the dataset for scale.}
+#' \item{f_pesudo_sigma}{resistant measure to small changes in the dataset for location. For a normal distribution, this measure is equal to populational statndard deviation.}
+#' \item{f_pseudo_variance}{squared valued of \code{f_pseudo_sigma}.}
+#' \item{outliers}{values outside whiskers.}
+#' }
+#' @examples
+#' lv_obj <- letter_value(rivers)
+#' summary.lv(lv_obj)
+#'
+#' @export summary.lv
+#' @export
+summary.lv <- function(object, ..., coef = 1.5) {
+    # letter values data frame
+    df_lv <- object$lv_data_frame
+
+    # trimean
+    trimean <- 0.25 * df_lv$lv_lower[2] + 0.5 * df_lv$lv_lower[1] + 0.25 * df_lv$lv_upper[2]
+
+    # median
+    median <- df_lv$lv_lower[1]
+
+    # F-spread
+    f_spread <- df_lv$lv_upper[2] - df_lv$lv_lower[2]
+
+    # F-Pseudo sigma
+    f_pseudo_sigma <- f_spread / 1.379
+
+    # F-Pseudo variance
+    f_pseudo_variance <- f_pseudo_sigma^2
+
+    # outliers
+    lower_bound <- df_lv$lv_lower[2] - coef * f_spread
+    upper_bound <- df_lv$lv_upper[2] + coef * f_spread
+
+    points <- vector("double")
+    if (min(object$sample) < lower_bound) {
+        points <- c(points, object$sample[object$sample < lower_bound])
+    }
+    if (max(object$sample) > upper_bound) {
+        points <- c(points, object$sample[object$sample > upper_bound])
+    }
+
+    if (length(points) > 0) {
+        outliers <- points
+    } else {
+        outliers <- "No outliers"
+    }
+
+    tibble::tibble(
+        trimean = trimean,
+        median = median,
+        f_spread = f_spread,
+        f_pseudo_sigma = f_pseudo_sigma,
+        f_pseudo_variance = f_pseudo_variance,
+        outliers = list(outliers)
+    )
+}
diff --git a/README.md b/README.md
@@ -0,0 +1,20 @@
+Letter Values for the course Exploratory Data Analysis (MATD39) at
+Federal University of Bahia (Brazil). The approach implemented in the
+package is presented in the textbook of Montgomery and Runger (2010)
+&lt;ISBN: 0-471-38491-7&gt;.
+
+## Example
+
+In this example, we’ve used the dataset `rivers`, which is available in
+`R` and it is a `vector`.
+
+    library(lettervalue)
+    letter_value(rivers)
+
+    ## n = 141
+    ## 
+    ##     rivers                
+    ##     ----------------------
+    ## M 71|        425         |
+    ## F 36| 310           680  |
+    ## 1 1 | 135           3710 |
diff --git a/man/letter_value.Rd b/man/letter_value.Rd
diff --git a/man/print.lv.Rd b/man/print.lv.Rd
diff --git a/man/summary.lv.Rd b/man/summary.lv.Rd