Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-3701: [Gandiva] add op for decimal 128 #2942

Closed
wants to merge 14 commits into from
Closed
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+3,128 −235
Diff settings

Always

Just for now

Copy path View file
@@ -121,7 +121,6 @@ matrix:
- ARROW_TRAVIS_COVERAGE=1
- ARROW_TRAVIS_PYTHON_DOCS=1
- ARROW_TRAVIS_PYTHON_JVM=1
- ARROW_TRAVIS_PYTHON_GANDIVA=1
- ARROW_TRAVIS_OPTIONAL_INSTALL=1
- ARROW_BUILD_WARNING_LEVEL=CHECKIN
# TODO(wesm): Run the benchmarks outside of Travis
@@ -138,6 +137,8 @@ matrix:
- export PLASMA_VALGRIND=0
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7 || travis_terminate 1
- export PLASMA_VALGRIND=1
# Gandiva tests are not enabled with python 2.7
- ARROW_TRAVIS_PYTHON_GANDIVA=1
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 || travis_terminate 1
- $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
- name: "[OS X] C++ w/ XCode 8.3"
@@ -466,4 +466,108 @@ TEST(Decimal128Test, TestToInteger) {
ASSERT_RAISES(Invalid, invalid_int64.ToInteger(&out2));
}

TEST(Decimal128Test, GetWholeAndFraction) {
Decimal128 value("123456");
Decimal128 whole;
Decimal128 fraction;
int32_t out;

value.GetWholeAndFraction(0, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(123456, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(0, out);

value.GetWholeAndFraction(1, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(12345, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(6, out);

value.GetWholeAndFraction(5, &whole, &fraction);
This conversation was marked as resolved by pitrou

This comment has been minimized.

Copy link
@pitrou

pitrou Jan 2, 2019

Contributor

Would be nice to test negative numbers as well.

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 6, 2019

Author Contributor

done

ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(1, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(23456, out);

value.GetWholeAndFraction(7, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(0, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(123456, out);
}

TEST(Decimal128Test, GetWholeAndFractionNegative) {
Decimal128 value("-123456");
Decimal128 whole;
Decimal128 fraction;
int32_t out;

value.GetWholeAndFraction(0, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(-123456, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(0, out);

value.GetWholeAndFraction(1, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(-12345, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(-6, out);

value.GetWholeAndFraction(5, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(-1, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(-23456, out);

value.GetWholeAndFraction(7, &whole, &fraction);
ASSERT_OK(whole.ToInteger(&out));
ASSERT_EQ(0, out);
ASSERT_OK(fraction.ToInteger(&out));
ASSERT_EQ(-123456, out);
}

TEST(Decimal128Test, IncreaseScale) {
Decimal128 result;
int32_t out;

result = Decimal128("1234").IncreaseScaleBy(3);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(1234000, out);

result = Decimal128("-1234").IncreaseScaleBy(3);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(-1234000, out);
}

TEST(Decimal128Test, ReduceScaleAndRound) {
Decimal128 result;
int32_t out;

result = Decimal128("123456").ReduceScaleBy(1, false);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(12345, out);

result = Decimal128("123456").ReduceScaleBy(1, true);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(12346, out);

result = Decimal128("123451").ReduceScaleBy(1, true);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(12345, out);

result = Decimal128("-123789").ReduceScaleBy(2, true);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(-1238, out);

result = Decimal128("-123749").ReduceScaleBy(2, true);
This conversation was marked as resolved by pitrou

This comment has been minimized.

Copy link
@pitrou

pitrou Jan 2, 2019

Contributor

What happens with -123750? Round to even? Something else?

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 6, 2019

Author Contributor

becomes -1238 (>= 10^2/2). added test.

This comment has been minimized.

Copy link
@pitrou

pitrou Jan 6, 2019

Contributor

Ok. AFAIK, round-to-even is generally preferred for accuracy, but we can change this later.

ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(-1237, out);

result = Decimal128("-123750").ReduceScaleBy(2, true);
ASSERT_OK(result.ToInteger(&out));
ASSERT_EQ(-1238, out);
}

} // namespace arrow
Copy path View file
@@ -35,7 +35,7 @@
namespace arrow {

static const Decimal128 ScaleMultipliers[] = {
Decimal128(0LL),
Decimal128(1LL),
Decimal128(10LL),
Decimal128(100LL),
Decimal128(1000LL),
@@ -75,6 +75,47 @@ static const Decimal128 ScaleMultipliers[] = {
Decimal128(542101086242752217LL, 68739955140067328ULL),
Decimal128(5421010862427522170LL, 687399551400673280ULL)};

static const Decimal128 ScaleMultipliersHalf[] = {
Decimal128(0ULL),
Decimal128(5ULL),
Decimal128(50ULL),
Decimal128(500ULL),
Decimal128(5000ULL),
Decimal128(50000ULL),
Decimal128(500000ULL),
Decimal128(5000000ULL),
Decimal128(50000000ULL),
Decimal128(500000000ULL),
Decimal128(5000000000ULL),
Decimal128(50000000000ULL),
Decimal128(500000000000ULL),
Decimal128(5000000000000ULL),
Decimal128(50000000000000ULL),
Decimal128(500000000000000ULL),
Decimal128(5000000000000000ULL),
Decimal128(50000000000000000ULL),
Decimal128(500000000000000000ULL),
Decimal128(5000000000000000000ULL),
Decimal128(2LL, 13106511852580896768ULL),
Decimal128(27LL, 1937910009842106368ULL),
Decimal128(271LL, 932356024711512064ULL),
Decimal128(2710LL, 9323560247115120640ULL),
Decimal128(27105LL, 1001882102603448320ULL),
Decimal128(271050LL, 10018821026034483200ULL),
Decimal128(2710505LL, 7954489891797073920ULL),
Decimal128(27105054LL, 5757922623132532736ULL),
Decimal128(271050543LL, 2238994010196672512ULL),
Decimal128(2710505431LL, 3943196028257173504ULL),
Decimal128(27105054312LL, 2538472135152631808ULL),
Decimal128(271050543121LL, 6937977277816766464ULL),
Decimal128(2710505431213LL, 14039540557039009792ULL),
Decimal128(27105054312137LL, 11268197054423236608ULL),
Decimal128(271050543121376LL, 2001506101975056384ULL),
Decimal128(2710505431213761LL, 1568316946041012224ULL),
Decimal128(27105054312137610LL, 15683169460410122240ULL),
Decimal128(271050543121376108LL, 9257742014424809472ULL),
Decimal128(2710505431213761085LL, 343699775700336640ULL)};

static constexpr uint64_t kIntMask = 0xFFFFFFFF;
static constexpr auto kCarryBit = static_cast<uint64_t>(1) << static_cast<uint64_t>(32);

@@ -884,6 +925,60 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
return Status::OK();
}

void Decimal128::GetWholeAndFraction(int scale, Decimal128* whole,
Decimal128* fraction) const {
DCHECK_GE(scale, 0);
DCHECK_LE(scale, 38);

Decimal128 multiplier(ScaleMultipliers[scale]);
This conversation was marked as resolved by pitrou

This comment has been minimized.

Copy link
@pitrou

pitrou Jan 2, 2019

Contributor

Probably want to add some DCHECKs for the scale.

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 6, 2019

Author Contributor

done

DCHECK_OK(Divide(multiplier, whole, fraction));
}

const Decimal128& Decimal128::GetScaleMultiplier(int32_t scale) {
DCHECK_GE(scale, 0);
DCHECK_LE(scale, 38);

return ScaleMultipliers[scale];
This conversation was marked as resolved by pitrou

This comment has been minimized.

Copy link
@pitrou

pitrou Jan 2, 2019

Contributor

Same here.

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 6, 2019

Author Contributor

done

}

Decimal128 Decimal128::IncreaseScaleBy(int32_t increase_by) const {
DCHECK_GE(increase_by, 0);
DCHECK_LE(increase_by, 38);

return (*this) * ScaleMultipliers[increase_by];
}

Decimal128 Decimal128::ReduceScaleBy(int32_t reduce_by, bool round) const {
DCHECK_GE(reduce_by, 0);
DCHECK_LE(reduce_by, 38);

Decimal128 divisor(ScaleMultipliers[reduce_by]);

This comment has been minimized.

Copy link
@wesm

wesm Jan 8, 2019

Member

Does it help to have static versions of these?

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 8, 2019

Author Contributor

is it ok to add as needed ?

Decimal128 result;
Decimal128 remainder;
DCHECK_OK(Divide(divisor, &result, &remainder));
if (round) {
auto divisor_half = ScaleMultipliersHalf[reduce_by];
if (remainder.Abs() >= divisor_half) {
if (result > 0) {
result += 1;
} else {
result -= 1;
}
}
}
return result;
}

int32_t Decimal128::CountLeadingBinaryZeros() const {
DCHECK_GE(*this, Decimal128(0));

if (high_bits_ == 0) {
return BitUtil::CountLeadingZeros(low_bits_) + 64;
} else {
return BitUtil::CountLeadingZeros(static_cast<uint64_t>(high_bits_));
}
}

// Helper function used by Decimal128::FromBigEndian
static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
// We don't bounds check the length here because this is called by
Copy path View file
@@ -139,9 +139,28 @@ class ARROW_EXPORT Decimal128 {
/// \return error status if the length is an invalid value
static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* out);

/// \brief seperate the integer and fractional parts for the given scale.
void GetWholeAndFraction(int32_t scale, Decimal128* whole, Decimal128* fraction) const;

/// \brief Scale multiplier for given scale value.
static const Decimal128& GetScaleMultiplier(int32_t scale);

/// \brief Convert Decimal128 from one scale to another
Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const;

/// \brief Scale up.
Decimal128 IncreaseScaleBy(int32_t increase_by) const;

/// \brief Scale down.
/// - If 'round' is true, the right-most digits are dropped and the result value is
/// rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits
/// (>= 10^reduce_by / 2).
/// - If 'round' is false, the right-most digits are simply dropped.
Decimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;

/// \brief count the number of leading binary zeroes.
int32_t CountLeadingBinaryZeros() const;

/// \brief Convert to a signed integer
template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
Status ToInteger(T* out) const {
@@ -46,6 +46,8 @@ set(SRC_FILES annotator.cc
bitmap_accumulator.cc
configuration.cc
context_helper.cc
decimal_ir.cc
decimal_type_util.cc
engine.cc
date_utils.cc
expr_decomposer.cc
@@ -54,6 +56,7 @@ set(SRC_FILES annotator.cc
expression_registry.cc
exported_funcs_registry.cc
filter.cc
function_ir_builder.cc
function_registry.cc
function_registry_arithmetic.cc
function_registry_datetime.cc
@@ -175,6 +178,7 @@ ADD_GANDIVA_TEST(lru_cache_test)
ADD_GANDIVA_TEST(to_date_holder_test)
ADD_GANDIVA_TEST(simple_arena_test)
ADD_GANDIVA_TEST(like_holder_test)
ADD_GANDIVA_TEST(decimal_type_util_test)

if (ARROW_GANDIVA_JAVA)
add_subdirectory(jni)
Copy path View file
@@ -35,6 +35,9 @@ using ArrayPtr = std::shared_ptr<arrow::Array>;
using DataTypePtr = std::shared_ptr<arrow::DataType>;
using DataTypeVector = std::vector<DataTypePtr>;

using Decimal128TypePtr = std::shared_ptr<arrow::Decimal128Type>;
using Decimal128TypeVector = std::vector<Decimal128TypePtr>;

using FieldPtr = std::shared_ptr<arrow::Field>;
using FieldVector = std::vector<FieldPtr>;

@@ -48,6 +51,14 @@ using ArrayDataVector = std::vector<ArrayDataPtr>;
using Status = arrow::Status;
using StatusCode = arrow::StatusCode;

static inline bool is_decimal_128(DataTypePtr type) {
if (type->id() == arrow::Type::DECIMAL) {
auto decimal_type = arrow::internal::checked_cast<arrow::DecimalType*>(type.get());
return decimal_type->byte_width() == 16;
} else {
return false;
}
}
} // namespace gandiva

#endif // GANDIVA_EXPR_ARROW_H
Copy path View file
@@ -0,0 +1,75 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DECIMAL_FULL_H
#define DECIMAL_FULL_H

#include <cstdint>
#include <iostream>
#include <string>
#include "arrow/util/decimal.h"

namespace gandiva {

using Decimal128 = arrow::Decimal128;

/// Represents a 128-bit decimal value along with its precision and scale.
class Decimal128Full {

This comment has been minimized.

Copy link
@wesm

wesm Jan 8, 2019

Member

Hm. DecimalScalar/Decimal128Scalar might be clearer

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 8, 2019

Author Contributor

agree, will change.

public:
Decimal128Full(int64_t high_bits, uint64_t low_bits, int32_t precision, int32_t scale)
: value_(high_bits, low_bits), precision_(precision), scale_(scale) {}

Decimal128Full(std::string value, int32_t precision, int32_t scale)

This comment has been minimized.

Copy link
@wesm

wesm Jan 8, 2019

Member

const std::string&

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 8, 2019

Author Contributor

will fix.

: value_(value), precision_(precision), scale_(scale) {}

Decimal128Full(const Decimal128& value, int32_t precision, int32_t scale)
: value_(value), precision_(precision), scale_(scale) {}

Decimal128Full(int32_t precision, int32_t scale)
: value_(0), precision_(precision), scale_(scale) {}

This comment has been minimized.

Copy link
@wesm

wesm Jan 8, 2019

Member

Could be better to be explicit with the value in such case

This comment has been minimized.

Copy link
@pravindra

pravindra Jan 8, 2019

Author Contributor

the default constructor for Decimal128 sets the value to 0. so, i'll remove the value_(0).


uint32_t scale() const { return scale_; }

uint32_t precision() const { return precision_; }

const arrow::Decimal128& value() const { return value_; }

inline std::string ToString() const {
return value_.ToString(0) + "," + std::to_string(precision_) + "," +
std::to_string(scale_);
}

friend std::ostream& operator<<(std::ostream& os, const Decimal128Full& dec) {
os << dec.ToString();
return os;
}

private:
Decimal128 value_;

int32_t precision_;
int32_t scale_;
};

inline bool operator==(const Decimal128Full& left, const Decimal128Full& right) {
return left.value() == right.value() && left.precision() == right.precision() &&
left.scale() == right.scale();
}

} // namespace gandiva

#endif // DECIMAL_FULL_H
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.