Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gilberto-sassi authored and cran-robot committed Aug 16, 2023
0 parents commit 5e90fa8
Show file tree
Hide file tree
Showing 11 changed files with 387 additions and 0 deletions.
20 changes: 20 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,20 @@
Package: lettervalue
Type: Package
Title: Computing Letter Values
Version: 0.1.0
Authors@R: c(
person(given = "Gilberto",
family = "Sassi",
role = c("aut", "cre"),
email = "sassi.pereira.gilberto@gmail.com"))
Maintainer: Gilberto Sassi <sassi.pereira.gilberto@gmail.com>
Imports: tibble, glue, purrr, stats
Description: Letter Values for the course Exploratory Data Analysis at Federal University of Bahia (Brazil). The approach implemented in the package is presented in the textbook of Tukey (1977) <ISBN: 978-0201076165>.
License: MIT + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.2.3
NeedsCompilation: no
Packaged: 2023-08-14 19:18:07 UTC; gilberto
Author: Gilberto Sassi [aut, cre]
Repository: CRAN
Date/Publication: 2023-08-16 09:22:35 UTC
2 changes: 2 additions & 0 deletions LICENSE
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: Gilberto Sassi
10 changes: 10 additions & 0 deletions MD5
@@ -0,0 +1,10 @@
d32fee6b2683bbebe93236c262e2c961 *DESCRIPTION
030f7bd6f916621537262524081e9e93 *LICENSE
22d3e20cdbbc9f25f6a653b491f0940b *NAMESPACE
e1e757ecb1499565265d9955787c0418 *R/letter_value.R
0247f029d586f31f699cab5963425c89 *R/print_lv.R
56520e1281b3361ab194679159c3749b *R/summary_lv.R
efe910e2b9bf08b92d3b9e3eaf93393b *README.md
608cee8290ae08964a7f29cc807c0540 *man/letter_value.Rd
b9bbe501cc2836453b20898a7011bba2 *man/print.lv.Rd
e49099c6f1856d92761431b06cc442a5 *man/summary.lv.Rd
7 changes: 7 additions & 0 deletions NAMESPACE
@@ -0,0 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(print,lv)
S3method(summary,lv)
export(letter_value)
export(print.lv)
export(summary.lv)
63 changes: 63 additions & 0 deletions R/letter_value.R
@@ -0,0 +1,63 @@
#' Compute Letter Value.
#'
#' \code{letter_value} returns the letter values until the level indicated by \code{level}.
#'
#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
#'
#' @param x numeric vector
#' @param level integer value between 2 and 9 indicating the level to compute the letter values. Default value is 2.
#' @param na_rm a logical evaluating to \code{TRUE} or \code{FALSE} indicating whether NA values should be stripped before the computation proceeds. Default value is \code{TRUE}.
#' @return a \code{list} object is returned with the variable name (\code{variable_name}), the sample (\code{sample}), and a data frame with the following columns:
#' \describe{
#' \item{letter}{a letter indicating the letter value}
#' \item{depth}{depth of the letter value}
#' \item{lv_lower}{lower letter value}
#' \item{lv_upper}{upper letter value}
#' }
#' @examples
#' letter_value(rivers)
#' @export
letter_value <- function(x, level = 2, na_rm = TRUE) {
if (na_rm) {
order_stats <- stats::na.omit(x) |> sort()
} else {
order_stats <- sort(x)
}

len <- length(order_stats) # length of sample
if (level > 9) stop("Limit level summary is 9.")

letters <- c("F", "E", "D", "C", "B", "A")

depth <- (len + 1) / 2
for (l in 2:level) {
depth <- c(depth, (floor(depth[l - 1]) + 1) / 2)
}

if (depth[level] < 1) stop("Levels too high to be estimated.")

median_lv <- (order_stats[floor(depth[1])] +
order_stats[ceiling(depth[1])]) / 2
lower_lv <- vector("double", level - 1)
upper_lv <- vector("double", level - 1)

for (l in 2:level) {
k <- floor(depth[l])
lower_lv[l - 1] <- order_stats[k]
upper_lv[l - 1] <- order_stats[len + 1 - k]
}

output <- list(
variable_name = paste(deparse(substitute(x))),
sample = order_stats,
lv_data_frame = tibble::tibble(
letter = c("M", letters[seq_len(level - 1)], "1"),
depth = c(depth, 1),
lv_lower = c(median_lv, lower_lv, order_stats[1]),
lv_upper = c(median_lv, upper_lv, order_stats[len])
)
)

class(output) <- c("lv")
return(output)
}
86 changes: 86 additions & 0 deletions R/print_lv.R
@@ -0,0 +1,86 @@
#' Display Letter Values.
#'
#' Display and return letter values.
#'
#' The diagram is, by default, the 5-number summary, where the sample size, the median (location measure) and the F-spread (distance between lower fourth and upper fourth). Others diagrams are avalaible increasing the argument \code{level}.
#'
#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
#'
#' This is a generic print method for the class "lv".
#'
#' @param x an object \code{lv}.
#' @param ... further arguments passed to or from other methods.
#' @return No return value, called to improve visualization of letter values as proposed at the seminal book Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published of 1983.
#' @examples
#' lv_obj <- letter_value(rivers)
#' print.lv(lv_obj)
#'
#' @export print.lv
#' @export
print.lv <- function(x, ...) {
len <- length(x$sample)

df_lv <- x$lv_data_frame

name <- x$variable_name

add_space <- \(x, n) ifelse(
nchar(x) == n,
x,
c(x, rep(" ", n - nchar(x))) |> paste0(collapse = "")
)

coluna_1 <- length(df_lv$letter) |>
seq_len() |>
purrr::map_chr(\(k) {
glue::glue("{df_lv$letter[k]} {df_lv$depth[k]}")
})
n1 <- max(nchar(coluna_1))
coluna_1 <- purrr::map_chr(coluna_1, ~ add_space(.x, n1))

coluna_2 <- c(
"|",
2:length(df_lv$letter) |>
purrr::map_chr(\(k) {
glue::glue("| {df_lv$lv_lower[k]}")
})
)
n2 <- max(nchar(coluna_2))
coluna_2 <- purrr::map_chr(coluna_2, ~ add_space(.x, n2))

coluna_3 <- glue::glue(" {df_lv$lv_lower[1]} ")
for (k in 2:length(df_lv$letter)) {
coluna_3 <- c(coluna_3, rep(" ", nchar(coluna_3[1])) |> paste0(collapse = ""))
}
n3 <- max(nchar(coluna_3), length(name))
coluna_3 <- purrr::map_chr(coluna_3, ~ add_space(.x, n3))

coluna_4 <- c(
"",
2:length(df_lv$letter) |>
purrr::map_chr(\(k) {
glue::glue("{df_lv$lv_upper[k]}")
})
)
n4 <- max(nchar(coluna_4))
coluna_4 <- purrr::map_chr(coluna_4, ~ add_space(.x, n4))


name <- add_space(name, max(n2 + n3 + n4 + 2, nchar(name)))
linha <- paste0(rep("-", max(n2 + n3 + n4 + 2, nchar(name))),
collapse = "")
output <- glue::glue("n = {len}
{paste0(rep(' ', n1), collapse = '')}{name}
{paste0(rep(' ', n1), collapse = '')}{linha}
")
for (k in seq_len(length(df_lv$letter))) {
output <- paste0(output, glue::glue("{coluna_1[k]}{coluna_2[k]}{coluna_3[k]}{coluna_4[k]} |
"))
}

cat(output)
invisible(x)
}
74 changes: 74 additions & 0 deletions R/summary_lv.R
@@ -0,0 +1,74 @@
#' Summary Using Letter Value
#'
#' Compute the resume measures (location and scale) using letter values.
#'
#' In this summary, we present the trimean, median, F-spread, F-pseudo sigma, F-pseudo variance e outliers values.
#'
#' @details This function returns the measures of location and scale as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983.
#'
#' This is a generic method for the class "lv".
#'
#' @param object an object \code{lv}.
#' @param ... further arguments passed to or from other methods.
#' @param coef Length of the whiskers as multiple of IQR. Defaults to 1.
#' @return A \code{tibble} object with the following columns:
#' \describe{
#' \item{trimean}{resistant measure to small changes in the dataset for location.}
#' \item{median}{resistant measure to small changes in the datase for location.}
#' \item{f_spread}{resistant measure to small changes in the dataset for scale.}
#' \item{f_pesudo_sigma}{resistant measure to small changes in the dataset for location. For a normal distribution, this measure is equal to populational statndard deviation.}
#' \item{f_pseudo_variance}{squared valued of \code{f_pseudo_sigma}.}
#' \item{outliers}{values outside whiskers.}
#' }
#' @examples
#' lv_obj <- letter_value(rivers)
#' summary.lv(lv_obj)
#'
#' @export summary.lv
#' @export
summary.lv <- function(object, ..., coef = 1.5) {
# letter values data frame
df_lv <- object$lv_data_frame

# trimean
trimean <- 0.25 * df_lv$lv_lower[2] + 0.5 * df_lv$lv_lower[1] + 0.25 * df_lv$lv_upper[2]

# median
median <- df_lv$lv_lower[1]

# F-spread
f_spread <- df_lv$lv_upper[2] - df_lv$lv_lower[2]

# F-Pseudo sigma
f_pseudo_sigma <- f_spread / 1.379

# F-Pseudo variance
f_pseudo_variance <- f_pseudo_sigma^2

# outliers
lower_bound <- df_lv$lv_lower[2] - coef * f_spread
upper_bound <- df_lv$lv_upper[2] + coef * f_spread

points <- vector("double")
if (min(object$sample) < lower_bound) {
points <- c(points, object$sample[object$sample < lower_bound])
}
if (max(object$sample) > upper_bound) {
points <- c(points, object$sample[object$sample > upper_bound])
}

if (length(points) > 0) {
outliers <- points
} else {
outliers <- "No outliers"
}

tibble::tibble(
trimean = trimean,
median = median,
f_spread = f_spread,
f_pseudo_sigma = f_pseudo_sigma,
f_pseudo_variance = f_pseudo_variance,
outliers = list(outliers)
)
}
20 changes: 20 additions & 0 deletions README.md
@@ -0,0 +1,20 @@
Letter Values for the course Exploratory Data Analysis (MATD39) at
Federal University of Bahia (Brazil). The approach implemented in the
package is presented in the textbook of Montgomery and Runger (2010)
&lt;ISBN: 0-471-38491-7&gt;.

## Example

In this example, we’ve used the dataset `rivers`, which is available in
`R` and it is a `vector`.

library(lettervalue)
letter_value(rivers)

## n = 141
##
## rivers
## ----------------------
## M 71| 425 |
## F 36| 310 680 |
## 1 1 | 135 3710 |
33 changes: 33 additions & 0 deletions man/letter_value.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/print.lv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/summary.lv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5e90fa8

Please sign in to comment.