Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 5e90fa8
Showing
11 changed files
with
387 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Package: lettervalue | ||
Type: Package | ||
Title: Computing Letter Values | ||
Version: 0.1.0 | ||
Authors@R: c( | ||
person(given = "Gilberto", | ||
family = "Sassi", | ||
role = c("aut", "cre"), | ||
email = "sassi.pereira.gilberto@gmail.com")) | ||
Maintainer: Gilberto Sassi <sassi.pereira.gilberto@gmail.com> | ||
Imports: tibble, glue, purrr, stats | ||
Description: Letter Values for the course Exploratory Data Analysis at Federal University of Bahia (Brazil). The approach implemented in the package is presented in the textbook of Tukey (1977) <ISBN: 978-0201076165>. | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
NeedsCompilation: no | ||
Packaged: 2023-08-14 19:18:07 UTC; gilberto | ||
Author: Gilberto Sassi [aut, cre] | ||
Repository: CRAN | ||
Date/Publication: 2023-08-16 09:22:35 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: Gilberto Sassi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
d32fee6b2683bbebe93236c262e2c961 *DESCRIPTION | ||
030f7bd6f916621537262524081e9e93 *LICENSE | ||
22d3e20cdbbc9f25f6a653b491f0940b *NAMESPACE | ||
e1e757ecb1499565265d9955787c0418 *R/letter_value.R | ||
0247f029d586f31f699cab5963425c89 *R/print_lv.R | ||
56520e1281b3361ab194679159c3749b *R/summary_lv.R | ||
efe910e2b9bf08b92d3b9e3eaf93393b *README.md | ||
608cee8290ae08964a7f29cc807c0540 *man/letter_value.Rd | ||
b9bbe501cc2836453b20898a7011bba2 *man/print.lv.Rd | ||
e49099c6f1856d92761431b06cc442a5 *man/summary.lv.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(print,lv) | ||
S3method(summary,lv) | ||
export(letter_value) | ||
export(print.lv) | ||
export(summary.lv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#' Compute Letter Value. | ||
#' | ||
#' \code{letter_value} returns the letter values until the level indicated by \code{level}. | ||
#' | ||
#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983. | ||
#' | ||
#' @param x numeric vector | ||
#' @param level integer value between 2 and 9 indicating the level to compute the letter values. Default value is 2. | ||
#' @param na_rm a logical evaluating to \code{TRUE} or \code{FALSE} indicating whether NA values should be stripped before the computation proceeds. Default value is \code{TRUE}. | ||
#' @return a \code{list} object is returned with the variable name (\code{variable_name}), the sample (\code{sample}), and a data frame with the following columns: | ||
#' \describe{ | ||
#' \item{letter}{a letter indicating the letter value} | ||
#' \item{depth}{depth of the letter value} | ||
#' \item{lv_lower}{lower letter value} | ||
#' \item{lv_upper}{upper letter value} | ||
#' } | ||
#' @examples | ||
#' letter_value(rivers) | ||
#' @export | ||
letter_value <- function(x, level = 2, na_rm = TRUE) { | ||
if (na_rm) { | ||
order_stats <- stats::na.omit(x) |> sort() | ||
} else { | ||
order_stats <- sort(x) | ||
} | ||
|
||
len <- length(order_stats) # length of sample | ||
if (level > 9) stop("Limit level summary is 9.") | ||
|
||
letters <- c("F", "E", "D", "C", "B", "A") | ||
|
||
depth <- (len + 1) / 2 | ||
for (l in 2:level) { | ||
depth <- c(depth, (floor(depth[l - 1]) + 1) / 2) | ||
} | ||
|
||
if (depth[level] < 1) stop("Levels too high to be estimated.") | ||
|
||
median_lv <- (order_stats[floor(depth[1])] + | ||
order_stats[ceiling(depth[1])]) / 2 | ||
lower_lv <- vector("double", level - 1) | ||
upper_lv <- vector("double", level - 1) | ||
|
||
for (l in 2:level) { | ||
k <- floor(depth[l]) | ||
lower_lv[l - 1] <- order_stats[k] | ||
upper_lv[l - 1] <- order_stats[len + 1 - k] | ||
} | ||
|
||
output <- list( | ||
variable_name = paste(deparse(substitute(x))), | ||
sample = order_stats, | ||
lv_data_frame = tibble::tibble( | ||
letter = c("M", letters[seq_len(level - 1)], "1"), | ||
depth = c(depth, 1), | ||
lv_lower = c(median_lv, lower_lv, order_stats[1]), | ||
lv_upper = c(median_lv, upper_lv, order_stats[len]) | ||
) | ||
) | ||
|
||
class(output) <- c("lv") | ||
return(output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#' Display Letter Values. | ||
#' | ||
#' Display and return letter values. | ||
#' | ||
#' The diagram is, by default, the 5-number summary, where the sample size, the median (location measure) and the F-spread (distance between lower fourth and upper fourth). Others diagrams are avalaible increasing the argument \code{level}. | ||
#' | ||
#' @details This function computes the letter values as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983. | ||
#' | ||
#' This is a generic print method for the class "lv". | ||
#' | ||
#' @param x an object \code{lv}. | ||
#' @param ... further arguments passed to or from other methods. | ||
#' @return No return value, called to improve visualization of letter values as proposed at the seminal book Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published of 1983. | ||
#' @examples | ||
#' lv_obj <- letter_value(rivers) | ||
#' print.lv(lv_obj) | ||
#' | ||
#' @export print.lv | ||
#' @export | ||
print.lv <- function(x, ...) { | ||
len <- length(x$sample) | ||
|
||
df_lv <- x$lv_data_frame | ||
|
||
name <- x$variable_name | ||
|
||
add_space <- \(x, n) ifelse( | ||
nchar(x) == n, | ||
x, | ||
c(x, rep(" ", n - nchar(x))) |> paste0(collapse = "") | ||
) | ||
|
||
coluna_1 <- length(df_lv$letter) |> | ||
seq_len() |> | ||
purrr::map_chr(\(k) { | ||
glue::glue("{df_lv$letter[k]} {df_lv$depth[k]}") | ||
}) | ||
n1 <- max(nchar(coluna_1)) | ||
coluna_1 <- purrr::map_chr(coluna_1, ~ add_space(.x, n1)) | ||
|
||
coluna_2 <- c( | ||
"|", | ||
2:length(df_lv$letter) |> | ||
purrr::map_chr(\(k) { | ||
glue::glue("| {df_lv$lv_lower[k]}") | ||
}) | ||
) | ||
n2 <- max(nchar(coluna_2)) | ||
coluna_2 <- purrr::map_chr(coluna_2, ~ add_space(.x, n2)) | ||
|
||
coluna_3 <- glue::glue(" {df_lv$lv_lower[1]} ") | ||
for (k in 2:length(df_lv$letter)) { | ||
coluna_3 <- c(coluna_3, rep(" ", nchar(coluna_3[1])) |> paste0(collapse = "")) | ||
} | ||
n3 <- max(nchar(coluna_3), length(name)) | ||
coluna_3 <- purrr::map_chr(coluna_3, ~ add_space(.x, n3)) | ||
|
||
coluna_4 <- c( | ||
"", | ||
2:length(df_lv$letter) |> | ||
purrr::map_chr(\(k) { | ||
glue::glue("{df_lv$lv_upper[k]}") | ||
}) | ||
) | ||
n4 <- max(nchar(coluna_4)) | ||
coluna_4 <- purrr::map_chr(coluna_4, ~ add_space(.x, n4)) | ||
|
||
|
||
name <- add_space(name, max(n2 + n3 + n4 + 2, nchar(name))) | ||
linha <- paste0(rep("-", max(n2 + n3 + n4 + 2, nchar(name))), | ||
collapse = "") | ||
output <- glue::glue("n = {len} | ||
{paste0(rep(' ', n1), collapse = '')}{name} | ||
{paste0(rep(' ', n1), collapse = '')}{linha} | ||
") | ||
for (k in seq_len(length(df_lv$letter))) { | ||
output <- paste0(output, glue::glue("{coluna_1[k]}{coluna_2[k]}{coluna_3[k]}{coluna_4[k]} | | ||
")) | ||
} | ||
|
||
cat(output) | ||
invisible(x) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#' Summary Using Letter Value | ||
#' | ||
#' Compute the resume measures (location and scale) using letter values. | ||
#' | ||
#' In this summary, we present the trimean, median, F-spread, F-pseudo sigma, F-pseudo variance e outliers values. | ||
#' | ||
#' @details This function returns the measures of location and scale as presented at Understanding Robust and Exploratory Data Analysis by Hoaglin, Mosteller and Tukey published in 1983. | ||
#' | ||
#' This is a generic method for the class "lv". | ||
#' | ||
#' @param object an object \code{lv}. | ||
#' @param ... further arguments passed to or from other methods. | ||
#' @param coef Length of the whiskers as multiple of IQR. Defaults to 1. | ||
#' @return A \code{tibble} object with the following columns: | ||
#' \describe{ | ||
#' \item{trimean}{resistant measure to small changes in the dataset for location.} | ||
#' \item{median}{resistant measure to small changes in the datase for location.} | ||
#' \item{f_spread}{resistant measure to small changes in the dataset for scale.} | ||
#' \item{f_pesudo_sigma}{resistant measure to small changes in the dataset for location. For a normal distribution, this measure is equal to populational statndard deviation.} | ||
#' \item{f_pseudo_variance}{squared valued of \code{f_pseudo_sigma}.} | ||
#' \item{outliers}{values outside whiskers.} | ||
#' } | ||
#' @examples | ||
#' lv_obj <- letter_value(rivers) | ||
#' summary.lv(lv_obj) | ||
#' | ||
#' @export summary.lv | ||
#' @export | ||
summary.lv <- function(object, ..., coef = 1.5) { | ||
# letter values data frame | ||
df_lv <- object$lv_data_frame | ||
|
||
# trimean | ||
trimean <- 0.25 * df_lv$lv_lower[2] + 0.5 * df_lv$lv_lower[1] + 0.25 * df_lv$lv_upper[2] | ||
|
||
# median | ||
median <- df_lv$lv_lower[1] | ||
|
||
# F-spread | ||
f_spread <- df_lv$lv_upper[2] - df_lv$lv_lower[2] | ||
|
||
# F-Pseudo sigma | ||
f_pseudo_sigma <- f_spread / 1.379 | ||
|
||
# F-Pseudo variance | ||
f_pseudo_variance <- f_pseudo_sigma^2 | ||
|
||
# outliers | ||
lower_bound <- df_lv$lv_lower[2] - coef * f_spread | ||
upper_bound <- df_lv$lv_upper[2] + coef * f_spread | ||
|
||
points <- vector("double") | ||
if (min(object$sample) < lower_bound) { | ||
points <- c(points, object$sample[object$sample < lower_bound]) | ||
} | ||
if (max(object$sample) > upper_bound) { | ||
points <- c(points, object$sample[object$sample > upper_bound]) | ||
} | ||
|
||
if (length(points) > 0) { | ||
outliers <- points | ||
} else { | ||
outliers <- "No outliers" | ||
} | ||
|
||
tibble::tibble( | ||
trimean = trimean, | ||
median = median, | ||
f_spread = f_spread, | ||
f_pseudo_sigma = f_pseudo_sigma, | ||
f_pseudo_variance = f_pseudo_variance, | ||
outliers = list(outliers) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Letter Values for the course Exploratory Data Analysis (MATD39) at | ||
Federal University of Bahia (Brazil). The approach implemented in the | ||
package is presented in the textbook of Montgomery and Runger (2010) | ||
<ISBN: 0-471-38491-7>. | ||
|
||
## Example | ||
|
||
In this example, we’ve used the dataset `rivers`, which is available in | ||
`R` and it is a `vector`. | ||
|
||
library(lettervalue) | ||
letter_value(rivers) | ||
|
||
## n = 141 | ||
## | ||
## rivers | ||
## ---------------------- | ||
## M 71| 425 | | ||
## F 36| 310 680 | | ||
## 1 1 | 135 3710 | |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.