Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17995: [C++] Fix json decimals not being rescaled based on the explicit schema #14380

Merged
merged 11 commits into from
Oct 15, 2022
24 changes: 21 additions & 3 deletions cpp/src/arrow/json/converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,28 @@ class DecimalConverter : public PrimitiveConverter {
using Builder = typename TypeTraits<T>::BuilderType;
Builder builder(out_type_, pool_);
RETURN_NOT_OK(builder.Resize(dict_array.indices()->length()));
const auto& decimal_type(checked_cast<const DecimalType&>(*out_type_));
int32_t out_precision = decimal_type.precision();
int32_t out_scale = decimal_type.scale();

auto visit_valid = [&builder](string_view repr) {
ARROW_ASSIGN_OR_RAISE(value_type value,
TypeTraits<T>::BuilderType::ValueType::FromString(repr));
auto visit_valid = [&](string_view repr) {
value_type value;
int32_t precision, scale;
RETURN_NOT_OK(TypeTraits<T>::BuilderType::ValueType::FromString(
repr, &value, &precision, &scale));
stiga-huang marked this conversation as resolved.
Show resolved Hide resolved
if (precision > out_precision) {
return GenericConversionError(*out_type_, ": ", repr, " requires precision ",
precision);
}
if (scale != out_scale) {
auto result = value.Rescale(scale, out_scale);
if (ARROW_PREDICT_FALSE(!result.ok())) {
return GenericConversionError(*out_type_, ": ", repr, " requires scale ",
scale);
} else {
value = result.MoveValueUnsafe();
}
}
builder.UnsafeAppend(value);
return Status::OK();
};
Expand Down
62 changes: 58 additions & 4 deletions cpp/src/arrow/json/converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "arrow/json/converter.h"

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <string>
Expand Down Expand Up @@ -190,9 +191,14 @@ TEST(ConverterTest, Decimal128And256) {
options.explicit_schema = schema({field("", decimal_type)});

std::string json_source = R"(
{"" : "02.0000000000"}
{"" : "30.0000000000"}
)";
{"" : "02.0000000000"}
{"" : "30.0000000000"}
{"" : "30.0123456789"}
{"" : "30.012345678900"}
{"" : "30.0123"}
{"" : "0.012345678"}
{"" : "1234567890123456789012345678.0123456789"}
)";

std::shared_ptr<StructArray> parse_array;
ASSERT_OK(ParseFromString(options, json_source, &parse_array));
Expand All @@ -204,11 +210,59 @@ TEST(ConverterTest, Decimal128And256) {
// assert equality
auto expected = ArrayFromJSON(decimal_type, R"([
"02.0000000000",
"30.0000000000"])");
"30.0000000000",
"30.0123456789",
"30.0123456789",
"30.0123000000",
"0.0123456780",
"1234567890123456789012345678.0123456789"
])");

AssertArraysEqual(*expected, *converted);
}
}

TEST(ConverterTest, Decimal128And256ScaleError) {
for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) {
ParseOptions options;
options.explicit_schema = schema({field("", decimal_type)});

std::string json_source = R"(
{"" : "30.0123456789001"}
)";

std::shared_ptr<StructArray> parse_array;
ASSERT_OK(ParseFromString(options, json_source, &parse_array));

std::string error_msg = "Failed of conversion of JSON to " +
decimal_type->ToString() +
": 30.0123456789001 requires scale 13";
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::HasSubstr(error_msg),
Convert(decimal_type, parse_array->GetFieldByName("")));
}
}

TEST(ConverterTest, Decimal128And256PrecisionError) {
for (auto decimal_type : {decimal128(38, 10), decimal256(38, 10)}) {
ParseOptions options;
options.explicit_schema = schema({field("", decimal_type)});

std::string json_source = R"(
{"" : "123456789012345678901234567890.0123456789"}
)";

std::shared_ptr<StructArray> parse_array;
ASSERT_OK(ParseFromString(options, json_source, &parse_array));

std::string error_msg =
"Invalid: Failed of conversion of JSON to " + decimal_type->ToString() +
": 123456789012345678901234567890.0123456789 requires precision 40";
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::HasSubstr(error_msg),
Convert(decimal_type, parse_array->GetFieldByName("")));
}
}

} // namespace json
} // namespace arrow
16 changes: 16 additions & 0 deletions python/pyarrow/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

from collections import OrderedDict
from decimal import Decimal
import io
import itertools
import json
Expand Down Expand Up @@ -243,6 +244,21 @@ def test_reconcile_accross_blocks(self):
# Check that the issue was exercised
assert table.column("a").num_chunks > 1

def test_explicit_schema_decimal(self):
rows = (b'{"a": 1}\n'
b'{"a": 1.45}\n'
b'{"a": -23.456}\n'
b'{}\n')
expected = {
'a': [Decimal("1"), Decimal("1.45"), Decimal("-23.456"), None],
}
for type_factory in (pa.decimal128, pa.decimal256):
schema = pa.schema([('a', type_factory(9, 4))])
opts = ParseOptions(explicit_schema=schema)
table = self.read_bytes(rows, parse_options=opts)
assert table.schema == schema
assert table.to_pydict() == expected

def test_explicit_schema_with_unexpected_behaviour(self):
# infer by default
rows = (b'{"foo": "bar", "num": 0}\n'
Expand Down