From eca6b1a615d3642504cf0b93053f9882ab129fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 22 Nov 2021 18:17:44 +0000 Subject: [PATCH 1/9] added 2 checks for precision and improved @param entries for `precision` and `scale` + docs + updated unit tests --- r/R/type.R | 13 +++++++++++-- r/man/data-type.Rd | 7 +++++-- r/tests/testthat/test-data-type.R | 4 ++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index afa9a094af15f..42e279c4604c6 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -187,8 +187,11 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. -#' @param precision For `decimal()`, precision -#' @param scale For `decimal()`, scale +#' @param precision For `decimal()`, precision. The number of significant digits +#' the the arrow `decimal` type can represent. Currently `decimal()` is mapped +#' to `DecimalType128`, having a maximum precision of 38 significant digits. +#' @param scale For `decimal()`, scale. The number of digits after the decimal +#' point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns #' @@ -360,6 +363,12 @@ decimal <- function(precision, scale) { } else { stop('"precision" must be an integer', call. = FALSE) } + if (precision > 38) { + stop('"precision" must be lower than or equal to 38', call. = FALSE) + } + if (precision < 1) { + stop('"precision" must be greater than 0', call. = FALSE) + } if (is.numeric(scale)) { scale <- as.integer(scale) } else { diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index a063189757334..8cb8ca5bd2a65 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -110,9 +110,12 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} -\item{precision}{For \code{decimal()}, precision} +\item{precision}{For \code{decimal()}, precision. The number of significant digits +the the arrow \code{decimal} type can represent. Currently \code{decimal()} is mapped +to \code{DecimalType128}, having a maximum precision of 38 significant digits.} -\item{scale}{For \code{decimal()}, scale} +\item{scale}{For \code{decimal()}, scale. The number of digits after the decimal +point. It can be negative.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index a9d0879b8a06b..b69de9fa58f58 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -390,8 +390,8 @@ test_that("decimal type and validation", { expect_error(decimal(4)) expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') - expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") - expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") + expect_error(decimal(0, 2), "\"precision\" must be greater than 0") + expect_error(decimal(100, 2), "\"precision\" must be lower than or equal to 38") expect_error(decimal(4, NA), '"scale" must be an integer') expect_r6_class(decimal(4, 2), "Decimal128Type") From 3b9c5c51369a4d146a382315c7403cc7c12cf4b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 22 Nov 2021 18:22:01 +0000 Subject: [PATCH 2/9] typo --- r/R/type.R | 2 +- r/man/data-type.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 42e279c4604c6..5e943e705320c 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -188,7 +188,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. #' @param precision For `decimal()`, precision. The number of significant digits -#' the the arrow `decimal` type can represent. Currently `decimal()` is mapped +#' the arrow `decimal` type can represent. Currently `decimal()` is mapped #' to `DecimalType128`, having a maximum precision of 38 significant digits. #' @param scale For `decimal()`, scale. The number of digits after the decimal #' point. It can be negative. diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 8cb8ca5bd2a65..fa32880287eea 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -111,7 +111,7 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} \item{precision}{For \code{decimal()}, precision. The number of significant digits -the the arrow \code{decimal} type can represent. Currently \code{decimal()} is mapped +the arrow \code{decimal} type can represent. Currently \code{decimal()} is mapped to \code{DecimalType128}, having a maximum precision of 38 significant digits.} \item{scale}{For \code{decimal()}, scale. The number of digits after the decimal From b71edd808311d23de0bedee8cd4629307fad91e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 22 Nov 2021 18:26:26 +0000 Subject: [PATCH 3/9] update docs and args def --- r/R/type.R | 4 ++-- r/man/data-type.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 5e943e705320c..d40669987e528 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -187,10 +187,10 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. -#' @param precision For `decimal()`, precision. The number of significant digits +#' @param precision For `decimal()`, the number of significant digits #' the arrow `decimal` type can represent. Currently `decimal()` is mapped #' to `DecimalType128`, having a maximum precision of 38 significant digits. -#' @param scale For `decimal()`, scale. The number of digits after the decimal +#' @param scale For `decimal()`, the number of digits after the decimal #' point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index fa32880287eea..c6148e43d4e2d 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -110,11 +110,11 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} -\item{precision}{For \code{decimal()}, precision. The number of significant digits +\item{precision}{For \code{decimal()}, the number of significant digits the arrow \code{decimal} type can represent. Currently \code{decimal()} is mapped to \code{DecimalType128}, having a maximum precision of 38 significant digits.} -\item{scale}{For \code{decimal()}, scale. The number of digits after the decimal +\item{scale}{For \code{decimal()}, the number of digits after the decimal point. It can be negative.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} From ef3b446bc819549082d7b124d1796918e8c67673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 22 Nov 2021 18:28:45 +0000 Subject: [PATCH 4/9] typo --- r/R/type.R | 2 +- r/tests/testthat/test-data-type.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index d40669987e528..e487372b30d0e 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -364,7 +364,7 @@ decimal <- function(precision, scale) { stop('"precision" must be an integer', call. = FALSE) } if (precision > 38) { - stop('"precision" must be lower than or equal to 38', call. = FALSE) + stop('"precision" must be less than or equal to 38', call. = FALSE) } if (precision < 1) { stop('"precision" must be greater than 0', call. = FALSE) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index b69de9fa58f58..a6513c6fd8da7 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -391,7 +391,7 @@ test_that("decimal type and validation", { expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') expect_error(decimal(0, 2), "\"precision\" must be greater than 0") - expect_error(decimal(100, 2), "\"precision\" must be lower than or equal to 38") + expect_error(decimal(100, 2), "\"precision\" must be less than or equal to 38") expect_error(decimal(4, NA), '"scale" must be an integer') expect_r6_class(decimal(4, 2), "Decimal128Type") From 65786cc0edf85ae2e60b15aaf342d60bfad0ef02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Tue, 23 Nov 2021 12:57:58 +0000 Subject: [PATCH 5/9] added more description for decimal() --- r/R/type.R | 12 ++++++++++++ r/man/data-type.Rd | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/r/R/type.R b/r/R/type.R index e487372b30d0e..9a4fc9da36bea 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -180,6 +180,18 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' types, this conversion can be disabled (so that `int64` always yields a #' `bit64::integer64` object) by setting `options(arrow.int64_downcast = #' FALSE)`. +#' `decimal()` creates a `decimal128` type. Arrow decimals are fixed-point +#' decimal numbers encoded as a scalar integer. The `precision` is the number of +#' significant digits that the decimal type can represent; the `scale` is the +#' number of digits after the decimal point. For example, the number 1234.567 +#' has a precision of 7 and a scale of 3. Note that `scale` can be negative. +#' +#' As an example, `decimal(7, 3)` can exactly represent the numbers 1234.567 and +#' -1234.567 (encoded internally as the 128-bit integers 1234567 and -1234567, +#' respectively), but neither 12345.67 nor 123.4567. +#' +#' `decimal(5, -3)` can exactly represent the number 12345000 (encoded +#' internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. #' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index c6148e43d4e2d..bda702289c693 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -152,6 +152,18 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c ("numeric") and \code{int64} is converted to \code{bit64::integer64}. For \code{int64} types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. +\code{decimal()} creates a \code{decimal128} type. Arrow decimals are fixed-point +decimal numbers encoded as a scalar integer. The \code{precision} is the number of +significant digits that the decimal type can represent; the \code{scale} is the +number of digits after the decimal point. For example, the number 1234.567 +has a precision of 7 and a scale of 3. Note that \code{scale} can be negative. + +As an example, \code{decimal(7, 3)} can exactly represent the numbers 1234.567 and +-1234.567 (encoded internally as the 128-bit integers 1234567 and -1234567, +respectively), but neither 12345.67 nor 123.4567. + +\code{decimal(5, -3)} can exactly represent the number 12345000 (encoded +internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. } \examples{ \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From d81f406e5eef7d57ef77fa7494fe73be78aa87e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 24 Nov 2021 15:34:38 +0000 Subject: [PATCH 6/9] revert to C++ error messaging --- r/R/type.R | 6 ------ r/tests/testthat/test-data-type.R | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 9a4fc9da36bea..6bbcd5b4618a2 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -375,12 +375,6 @@ decimal <- function(precision, scale) { } else { stop('"precision" must be an integer', call. = FALSE) } - if (precision > 38) { - stop('"precision" must be less than or equal to 38', call. = FALSE) - } - if (precision < 1) { - stop('"precision" must be greater than 0', call. = FALSE) - } if (is.numeric(scale)) { scale <- as.integer(scale) } else { diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index a6513c6fd8da7..a9d0879b8a06b 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -390,8 +390,8 @@ test_that("decimal type and validation", { expect_error(decimal(4)) expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') - expect_error(decimal(0, 2), "\"precision\" must be greater than 0") - expect_error(decimal(100, 2), "\"precision\" must be less than or equal to 38") + expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") + expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") expect_error(decimal(4, NA), '"scale" must be an integer') expect_r6_class(decimal(4, 2), "Decimal128Type") From 930157b01a719520e6d7c071b4bcb39ade981c38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 24 Nov 2021 15:47:13 +0000 Subject: [PATCH 7/9] improve docs for decimal() --- r/R/type.R | 8 ++++++-- r/man/data-type.Rd | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index 6bbcd5b4618a2..ac3dcf3e95f84 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -180,6 +180,7 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' types, this conversion can be disabled (so that `int64` always yields a #' `bit64::integer64` object) by setting `options(arrow.int64_downcast = #' FALSE)`. +#' #' `decimal()` creates a `decimal128` type. Arrow decimals are fixed-point #' decimal numbers encoded as a scalar integer. The `precision` is the number of #' significant digits that the decimal type can represent; the `scale` is the @@ -192,6 +193,9 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' #' `decimal(5, -3)` can exactly represent the number 12345000 (encoded #' internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. +#' The `scale` can be thought of as an argument that controls rounding. When +#' negative, `scale` causes the number to be expressed using scientific notation +#' and power of 10. #' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can @@ -200,8 +204,8 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. #' @param precision For `decimal()`, the number of significant digits -#' the arrow `decimal` type can represent. Currently `decimal()` is mapped -#' to `DecimalType128`, having a maximum precision of 38 significant digits. +#' the arrow `decimal` type can represent. The maximum precision for +#' `decimal()` is 38 significant digits. #' @param scale For `decimal()`, the number of digits after the decimal #' point. It can be negative. #' @param type For `list_of()`, a data type to make a list-of-type diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index bda702289c693..2b2313571b26f 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -111,8 +111,8 @@ take any of those four values.} \item{timezone}{For \code{timestamp()}, an optional time zone string.} \item{precision}{For \code{decimal()}, the number of significant digits -the arrow \code{decimal} type can represent. Currently \code{decimal()} is mapped -to \code{DecimalType128}, having a maximum precision of 38 significant digits.} +the arrow \code{decimal} type can represent. The maximum precision for +\code{decimal()} is 38 significant digits.} \item{scale}{For \code{decimal()}, the number of digits after the decimal point. It can be negative.} @@ -152,6 +152,7 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c ("numeric") and \code{int64} is converted to \code{bit64::integer64}. For \code{int64} types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. + \code{decimal()} creates a \code{decimal128} type. Arrow decimals are fixed-point decimal numbers encoded as a scalar integer. The \code{precision} is the number of significant digits that the decimal type can represent; the \code{scale} is the @@ -164,6 +165,9 @@ respectively), but neither 12345.67 nor 123.4567. \code{decimal(5, -3)} can exactly represent the number 12345000 (encoded internally as the 128-bit integer 12345), but neither 123450000 nor 1234500. +The \code{scale} can be thought of as an argument that controls rounding. When +negative, \code{scale} causes the number to be expressed using scientific notation +and power of 10. } \examples{ \dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From 47513b4481d3c77142fe240e503e5a4f46e32eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Wed, 24 Nov 2021 15:56:01 +0000 Subject: [PATCH 8/9] add a comment near the tests likely to be affected by ARROW-14842 --- r/tests/testthat/test-data-type.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index a9d0879b8a06b..b951a67cad5b9 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -390,6 +390,7 @@ test_that("decimal type and validation", { expect_error(decimal(4)) expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') + # ARROW-14842 to improve messaging in C++ expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") expect_error(decimal(4, NA), '"scale" must be an integer') From 2fc3564fb0986a3c46f3b56e9e4220e1b6d9848d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Thu, 25 Nov 2021 11:03:00 +0000 Subject: [PATCH 9/9] removing comment as ARROW-14842 is being addressed --- r/tests/testthat/test-data-type.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index b951a67cad5b9..a9d0879b8a06b 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -390,7 +390,6 @@ test_that("decimal type and validation", { expect_error(decimal(4)) expect_error(decimal(4, "two"), '"scale" must be an integer') expect_error(decimal(NA, 2), '"precision" must be an integer') - # ARROW-14842 to improve messaging in C++ expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0") expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100") expect_error(decimal(4, NA), '"scale" must be an integer')