apache · dragosmg · Dec 7, 2021 · Dec 7, 2021 · Dec 7, 2021 · Dec 7, 2021
diff --git a/r/NAMESPACE b/r/NAMESPACE
@@ -215,6 +215,7 @@ export(date32)
 export(date64)
 export(decimal)
 export(decimal128)
+export(decimal256)
 export(default_memory_pool)
 export(dictionary)
 export(ends_with)

diff --git a/r/NEWS.md b/r/NEWS.md
@@ -19,11 +19,12 @@
 
 # arrow 6.0.1.9000
 
+* Added `decimal256()`. Updated `decimal()`, which now calls `decimal256()` or `decimal128()` based on the value of the `precision` argument.
 * updated `write_csv_arrow()` to follow the signature of `readr::write_csv()`. The following arguments are supported:
   * `file` identical to `sink`
   * `col_names` identical to `include_header`
   * other arguments are currently unsupported, but the function errors with a meaningful message.
-* Added `decimal128()` (identical to `decimal()`) as the name is more explicit and updated docs to encourage its use. 
+* Added `decimal128()` (~~identical to `decimal()`~~) as the name is more explicit and updated docs to encourage its use. 
 * Source builds now by default use `pkg-config` to search for system dependencies (such as `libz`) and link to them 
 if present. To retain the previous behaviour of downloading and building all dependencies, set `ARROW_DEPENDENCY_SOURCE=BUNDLED`. 
 

diff --git a/r/R/array.R b/r/R/array.R
@@ -187,8 +187,27 @@ Array$create <- function(x, type = NULL) {
     }
     return(out)
   }
-  vec_to_Array(x, type)
+
+  if (is.null(type)) {
+    return(vec_to_Array(x, type))
+  }
+
+  # a type is given, this needs more care
+  tryCatch(
+    vec_to_Array(x, type),
+    error = function(cnd) {
+      attempt <- try(vec_to_Array(x, NULL)$cast(type), silent = TRUE)
+      abort(
+        c(conditionMessage(cnd),
+          i = if (!inherits(attempt, "try-error")) {
+            "You might want to try casting manually with `Array$create(...)$cast(...)`."
+          }
+        )
+      )
+    }
+  )
 }
+
 #' @include arrowExports.R
 Array$import_from_c <- ImportArray
 

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
diff --git a/r/R/type.R b/r/R/type.R
@@ -150,8 +150,11 @@ DecimalType <- R6Class("DecimalType",
     scale = function() DecimalType__scale(self)
   )
 )
+
 Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType)
 
+Decimal256Type <- R6Class("Decimal256Type", inherit = DecimalType)
+
 NestedType <- R6Class("NestedType", inherit = DataType)
 
 #' Apache Arrow data types
@@ -181,7 +184,7 @@ NestedType <- R6Class("NestedType", inherit = DataType)
 #' `bit64::integer64` object) by setting `options(arrow.int64_downcast =
 #' FALSE)`.
 #'
-#' `decimal128()` creates a `decimal128` type. Arrow decimals are fixed-point
+#' `decimal128()` creates a `Decimal128Type`. Arrow decimals are fixed-point
 #' decimal numbers encoded as a scalar integer. The `precision` is the number of
 #' significant digits that the decimal type can represent; the `scale` is the
 #' number of digits after the decimal point. For example, the number 1234.567
@@ -197,21 +200,30 @@ NestedType <- R6Class("NestedType", inherit = DataType)
 #' negative, `scale` causes the number to be expressed using scientific notation
 #' and power of 10.
 #'
-#' `decimal()` is identical to `decimal128()`, defined for backward compatibility.
-#' Use `decimal128()` as the name  is more informative and `decimal()` might be
-#' deprecated in the future.
+#' `decimal256()` creates a `Decimal256Type`, which allows for higher maximum
+#' precision. For most use cases, the maximum precision offered by `Decimal128Type`
+#' is sufficient, and it will result in a more compact and more efficient encoding.
+#'
+#' #' `decimal()` creates either a `Decimal128Type` or a `Decimal256Type`
+#' depending on the value for `precision`. If `precision` is greater than 38 a
+#' `Decimal256Type` is returned, otherwise a `Decimal128Type`.
+#'
+#' Use `decimal128()` or `decimal256()` as the names are more informative than
+#' `decimal()`.
 #'
 #' @param unit For time/timestamp types, the time unit. `time32()` can take
 #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can
 #' take any of those four values.
 #' @param timezone For `timestamp()`, an optional time zone string.
 #' @param byte_width byte width for `FixedSizeBinary` type.
 #' @param list_size list size for `FixedSizeList` type.
-#' @param precision For `decimal()`, `decimal128()` the number of significant
-#'    digits the arrow `decimal` type can represent. The maximum precision for
-#'    `decimal()` and `decimal128()` is 38 significant digits.
-#' @param scale For `decimal()` and `decimal128()`, the number of digits after
-#'    the decimal point. It can be negative.
+#' @param precision For `decimal()`, `decimal128()`, and `decimal256()` the
+#'    number of significant digits the arrow `decimal` type can represent. The
+#'    maximum precision for `decimal128()` is 38 significant digits, while for
+#'    `decimal256()` it is 76 digits. `decimal()` will use it to choose which
+#'    type of decimal to return.
+#' @param scale For `decimal()`, `decimal128()`, and `decimal256()` the number
+#'    of digits after the decimal point. It can be negative.
 #' @param type For `list_of()`, a data type to make a list-of-type
 #' @param ... For `struct()`, a named list of types to define the struct columns
 #'
@@ -375,25 +387,49 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") {
   Timestamp__initialize(unit, timezone)
 }
 
+#' @rdname data-type
+#' @export
+decimal <- function(precision, scale) {
+  args <- check_decimal_args(precision, scale)
+
+  if (args$precision > 38) {
+    decimal256(args$precision, args$scale)
+  } else {
+    decimal128(args$precision, args$scale)
+  }
+}
+
 #' @rdname data-type
 #' @export
 decimal128 <- function(precision, scale) {
+  args <- check_decimal_args(precision, scale)
+  Decimal128Type__initialize(args$precision, args$scale)
+}
+
+#' @rdname data-type
+#' @export
+decimal256 <- function(precision, scale) {
+  args <- check_decimal_args(precision, scale)
+  Decimal256Type__initialize(args$precision, args$scale)
+}
+
+check_decimal_args <- function(precision, scale) {
   if (is.numeric(precision)) {
-    precision <- as.integer(precision)
+    precision <- vec_cast(precision, to = integer())
+    vctrs::vec_assert(precision, size = 1L)
   } else {
-    stop('"precision" must be an integer', call. = FALSE)
+    stop("`precision` must be an integer", call. = FALSE)
   }
+
   if (is.numeric(scale)) {
-    scale <- as.integer(scale)
+    scale <- vec_cast(scale, to = integer())
+    vctrs::vec_assert(scale, size = 1L)
   } else {
-    stop('"scale" must be an integer', call. = FALSE)
+    stop("`scale` must be an integer", call. = FALSE)
   }
-  Decimal128Type__initialize(precision, scale)
-}
 
-#' @rdname data-type
-#' @export
-decimal <- decimal128
+  list(precision = precision, scale = scale)
+}
 
 StructType <- R6Class("StructType",
   inherit = NestedType,
@@ -496,6 +532,7 @@ canonical_type_str <- function(type_str) {
     null = "null",
     timestamp = "timestamp",
     decimal128 = "decimal128",
+    decimal256 = "decimal256",
     struct = "struct",
     list_of = "list",
     list = "list",

diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
@@ -905,6 +905,7 @@ class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
   }
 };
 
+template <typename Type>
 class Converter_Decimal : public Converter {
  public:
   explicit Converter_Decimal(const std::shared_ptr<ChunkedArray>& chunked_array)
@@ -919,8 +920,9 @@ class Converter_Decimal : public Converter {
 
   Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
                            R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
+    //using DecimalArray = typename TypeTraits<Type>::ArrayType;
     auto p_data = REAL(data) + start;
-    const auto& decimals_arr = checked_cast<const arrow::Decimal128Array&>(*array);
+    const auto& decimals_arr = checked_cast<const arrow::DecimalArray&>(*array);
 
     auto ingest_one = [&](R_xlen_t i) {
       p_data[i] = std::stod(decimals_arr.FormatValue(i).c_str());
@@ -1217,7 +1219,10 @@ std::shared_ptr<Converter> Converter::Make(
       }
 
     case Type::DECIMAL128:
-      return std::make_shared<arrow::r::Converter_Decimal>(chunked_array);
+      return std::make_shared<arrow::r::Converter_Decimal<Decimal128Type>>(chunked_array);
+
+    case Type::DECIMAL256:
+      return std::make_shared<arrow::r::Converter_Decimal<Decimal256Type>>(chunked_array);
 
       // nested
     case Type::STRUCT:
@@ -1245,7 +1250,7 @@ std::shared_ptr<Converter> Converter::Make(
       break;
   }
 
-  cpp11::stop("cannot handle Array of type ", type->name().c_str());
+  cpp11::stop("cannot handle Array of type <%s>", type->name().c_str());
 }
 
 std::shared_ptr<ChunkedArray> to_chunks(const std::shared_ptr<Array>& array) {

diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp
@@ -82,6 +82,8 @@ const char* r6_class_name<arrow::DataType>::get(
 
     case Type::DECIMAL128:
       return "Decimal128Type";
+    case Type::DECIMAL256:
+      return "Decimal256Type";
 
     case Type::LIST:
       return "ListType";
@@ -180,6 +182,13 @@ std::shared_ptr<arrow::DataType> Decimal128Type__initialize(int32_t precision,
   return ValueOrStop(arrow::Decimal128Type::Make(precision, scale));
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::DataType> Decimal256Type__initialize(int32_t precision,
+                                                            int32_t scale) {
+  // Use the builder that validates inputs
+  return ValueOrStop(arrow::Decimal256Type::Make(precision, scale));
+}
+
 // [[arrow::export]]
 std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(R_xlen_t byte_width) {
   if (byte_width == NA_INTEGER) {

diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
@@ -206,7 +206,8 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", {
     int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
     uint64(), float32(), float64(), timestamp("ns"), binary(),
     large_binary(), fixed_size_binary(32), date32(), date64(),
-    decimal128(4, 2), dictionary(), struct(x = int32())
+    decimal128(4, 2), #decimal256(4, 2), # not yet supported - see below
+    dictionary(), struct(x = int32())
   )
 
   empty_filter <- ChunkedArray$create(type = bool())
@@ -228,6 +229,28 @@ test_that("ChunkedArray supports empty arrays (ARROW-13761)", {
       expect_identical(length(as.vector(zero_empty_chunks)), 1L)
     }
   }
+
+  skip("array_filter has no kernel matching input types `(array[decimal256(4, 2)], array[bool])`")
+  # once the above Jira is addressed decimal256() can be moved together with the
+  # other types. this chunk can be deleted once decimal256() is added to types
+  type <- decimal256(4, 2)
+  empty_filter <- ChunkedArray$create(type = bool())
+  one_empty_chunk <- ChunkedArray$create(type = type)
+  expect_type_equal(one_empty_chunk$type, type)
+  if (type != struct(x = int32())) {
+    expect_identical(length(one_empty_chunk), length(as.vector(one_empty_chunk)))
+  } else {
+    # struct -> tbl and length(tbl) is num_columns instead of num_rows
+    expect_identical(length(as.vector(one_empty_chunk)), 1L)
+  }
+  zero_empty_chunks <- one_empty_chunk$Filter(empty_filter)
+  expect_equal(zero_empty_chunks$num_chunks, 0)
+  expect_type_equal(zero_empty_chunks$type, type)
+  if (type != struct(x = int32())) {
+    expect_identical(length(zero_empty_chunks), length(as.vector(zero_empty_chunks)))
+  } else {
+    expect_identical(length(as.vector(zero_empty_chunks)), 1L)
+  }
 })
 
 test_that("integer types casts for ChunkedArray (ARROW-3741)", {