diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc index d677be25ae492..ec9713e41fe88 100644 --- a/cpp/src/arrow/json/converter.cc +++ b/cpp/src/arrow/json/converter.cc @@ -166,10 +166,28 @@ class DecimalConverter : public PrimitiveConverter { using Builder = typename TypeTraits::BuilderType; Builder builder(out_type_, pool_); RETURN_NOT_OK(builder.Resize(dict_array.indices()->length())); + const auto& decimal_type(checked_cast(*out_type_)); + int32_t out_precision = decimal_type.precision(); + int32_t out_scale = decimal_type.scale(); - auto visit_valid = [&builder](string_view repr) { - ARROW_ASSIGN_OR_RAISE(value_type value, - TypeTraits::BuilderType::ValueType::FromString(repr)); + auto visit_valid = [&](string_view repr) { + value_type value; + int32_t precision, scale; + RETURN_NOT_OK(TypeTraits::BuilderType::ValueType::FromString( + repr, &value, &precision, &scale)); + if (precision > out_precision) { + return GenericConversionError(*out_type_, ": ", repr, " requires precision ", + precision); + } + if (scale != out_scale) { + auto result = value.Rescale(scale, out_scale); + if (ARROW_PREDICT_FALSE(!result.ok())) { + return GenericConversionError(*out_type_, ": ", repr, " requires scale ", + scale); + } else { + value = result.MoveValueUnsafe(); + } + } builder.UnsafeAppend(value); return Status::OK(); }; diff --git a/cpp/src/arrow/json/converter_test.cc b/cpp/src/arrow/json/converter_test.cc index 030f2a7bc13fe..03e02acf188bb 100644 --- a/cpp/src/arrow/json/converter_test.cc +++ b/cpp/src/arrow/json/converter_test.cc @@ -17,6 +17,7 @@ #include "arrow/json/converter.h" +#include #include #include @@ -190,9 +191,14 @@ TEST(ConverterTest, Decimal128And256) { options.explicit_schema = schema({field("", decimal_type)}); std::string json_source = R"( - {"" : "02.0000000000"} - {"" : "30.0000000000"} - )"; + {"" : "02.0000000000"} + {"" : "30.0000000000"} + {"" : "30.0123456789"} + {"" : "30.012345678900"} + {"" : "30.0123"} + {"" : "0.012345678"} + {"" : "1234567890123456789012345678.0123456789"} + )"; std::shared_ptr parse_array; ASSERT_OK(ParseFromString(options, json_source, &parse_array)); @@ -204,11 +210,59 @@ TEST(ConverterTest, Decimal128And256) { // assert equality auto expected = ArrayFromJSON(decimal_type, R"([ "02.0000000000", - "30.0000000000"])"); + "30.0000000000", + "30.0123456789", + "30.0123456789", + "30.0123000000", + "0.0123456780", + "1234567890123456789012345678.0123456789" + ])"); AssertArraysEqual(*expected, *converted); } } +TEST(ConverterTest, Decimal128And256ScaleError) { + for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) { + ParseOptions options; + options.explicit_schema = schema({field("", decimal_type)}); + + std::string json_source = R"( + {"" : "30.0123456789001"} + )"; + + std::shared_ptr parse_array; + ASSERT_OK(ParseFromString(options, json_source, &parse_array)); + + std::string error_msg = "Failed of conversion of JSON to " + + decimal_type->ToString() + + ": 30.0123456789001 requires scale 13"; + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr(error_msg), + Convert(decimal_type, parse_array->GetFieldByName(""))); + } +} + +TEST(ConverterTest, Decimal128And256PrecisionError) { + for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) { + ParseOptions options; + options.explicit_schema = schema({field("", decimal_type)}); + + std::string json_source = R"( + {"" : "123456789012345678901234567890.0123456789"} + )"; + + std::shared_ptr parse_array; + ASSERT_OK(ParseFromString(options, json_source, &parse_array)); + + std::string error_msg = + "Invalid: Failed of conversion of JSON to " + decimal_type->ToString() + + ": 123456789012345678901234567890.0123456789 requires precision 40"; + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr(error_msg), + Convert(decimal_type, parse_array->GetFieldByName(""))); + } +} + } // namespace json } // namespace arrow diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py index d2972725779cf..f37019ade574c 100644 --- a/python/pyarrow/tests/test_json.py +++ b/python/pyarrow/tests/test_json.py @@ -16,6 +16,7 @@ # under the License. from collections import OrderedDict +from decimal import Decimal import io import itertools import json @@ -243,6 +244,21 @@ def test_reconcile_accross_blocks(self): # Check that the issue was exercised assert table.column("a").num_chunks > 1 + def test_explicit_schema_decimal(self): + rows = (b'{"a": 1}\n' + b'{"a": 1.45}\n' + b'{"a": -23.456}\n' + b'{}\n') + expected = { + 'a': [Decimal("1"), Decimal("1.45"), Decimal("-23.456"), None], + } + for type_factory in (pa.decimal128, pa.decimal256): + schema = pa.schema([('a', type_factory(9, 4))]) + opts = ParseOptions(explicit_schema=schema) + table = self.read_bytes(rows, parse_options=opts) + assert table.schema == schema + assert table.to_pydict() == expected + def test_explicit_schema_with_unexpected_behaviour(self): # infer by default rows = (b'{"foo": "bar", "num": 0}\n'