Skip to content

Commit

Permalink
ARROW-655: [C++/Python] Implement DecimalArray
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Mar 23, 2017
1 parent 2406d4e commit be209e1
Show file tree
Hide file tree
Showing 40 changed files with 2,027 additions and 42 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
*.dylib
.build_cache_dir
MANIFEST
.eggs/
3 changes: 2 additions & 1 deletion ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ CPP_DIR=$TRAVIS_BUILD_DIR/cpp

CMAKE_COMMON_FLAGS="\
-DARROW_BUILD_BENCHMARKS=ON \
-DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
-DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL \
-DCMAKE_CXX_STANDARD=11"

if [ $TRAVIS_OS_NAME == "linux" ]; then
cmake -DARROW_TEST_MEMCHECK=on \
Expand Down
3 changes: 2 additions & 1 deletion ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ build_parquet_cpp() {
cmake \
-DCMAKE_BUILD_TYPE=debug \
-DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \
-DCMAKE_CXX_STANDARD=11 \
-DPARQUET_ARROW=on \
-DPARQUET_BUILD_BENCHMARKS=off \
-DPARQUET_BUILD_EXECUTABLES=off \
Expand Down Expand Up @@ -90,7 +91,7 @@ python_version_tests() {
# Other stuff pip install
pip install -r requirements.txt

python setup.py build_ext --inplace --with-parquet --with-jemalloc
python setup.py build_ext --inplace --with-parquet --with-jemalloc --extra-cmake-args="-DCMAKE_CXX_STANDARD=11"

python -c "import pyarrow.parquet"
python -c "import pyarrow.jemalloc"
Expand Down
1 change: 1 addition & 0 deletions cpp/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ Testing/
*.kdev4
*.log
*.swp
.idea/
5 changes: 2 additions & 3 deletions cpp/src/arrow/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
#ifndef ARROW_ALLOCATOR_H
#define ARROW_ALLOCATOR_H

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include <cstddef>
#include <memory>
#include <utility>

#include "arrow/memory_pool.h"
#include "arrow/status.h"

namespace arrow {

template <class T>
Expand Down
182 changes: 180 additions & 2 deletions cpp/src/arrow/array-decimal-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/type.h"
#include "gtest/gtest.h"

#include "arrow/type.h"
#include "arrow/builder.h"
#include "arrow/test-util.h"
#include "arrow/util/decimal.h"

namespace arrow {

TEST(TypesTest, TestDecimalType) {
TEST(TypesTest, TestDecimal32Type) {
DecimalType t1(8, 4);

ASSERT_EQ(t1.type, Type::DECIMAL);
Expand All @@ -35,6 +38,181 @@ TEST(TypesTest, TestDecimalType) {
ASSERT_EQ(t2.type, Type::DECIMAL);
ASSERT_EQ(t2.precision, 8);
ASSERT_EQ(t2.scale, 4);

// Test properties
ASSERT_EQ(t1.byte_width(), 4);
ASSERT_EQ(t1.bit_width(), 32);
}

TEST(TypesTest, TestDecimal64Type) {
DecimalType t1(12, 5);

ASSERT_EQ(t1.type, Type::DECIMAL);
ASSERT_EQ(t1.precision, 12);
ASSERT_EQ(t1.scale, 5);

ASSERT_EQ(t1.ToString(), std::string("decimal(12, 5)"));

// Test copy constructor
DecimalType t2 = t1;
ASSERT_EQ(t2.type, Type::DECIMAL);
ASSERT_EQ(t2.precision, 12);
ASSERT_EQ(t2.scale, 5);

// Test properties
ASSERT_EQ(t1.byte_width(), 8);
ASSERT_EQ(t1.bit_width(), 64);
}

TEST(TypesTest, TestDecimal128Type) {
DecimalType t1(27, 7);

ASSERT_EQ(t1.type, Type::DECIMAL);
ASSERT_EQ(t1.precision, 27);
ASSERT_EQ(t1.scale, 7);

ASSERT_EQ(t1.ToString(), std::string("decimal(27, 7)"));

// Test copy constructor
DecimalType t2 = t1;
ASSERT_EQ(t2.type, Type::DECIMAL);
ASSERT_EQ(t2.precision, 27);
ASSERT_EQ(t2.scale, 7);

// Test properties
ASSERT_EQ(t1.byte_width(), 16);
ASSERT_EQ(t1.bit_width(), 128);
}

TEST(TestDecimal, ConstructString) {
Decimal64 value(std::string("-2342.139"));
ASSERT_EQ(value.value, -2342139);
}

template <typename T>
class DecimalTestBase {
public:
virtual std::vector<uint8_t> data(
const std::vector<T>& input, size_t byte_width) const = 0;

void test(int precision, const std::vector<T>& draw,
const std::vector<uint8_t>& valid_bytes) const {
std::shared_ptr<DataType> type(std::make_shared<DecimalType>(precision, 4));
auto specific_type(*std::dynamic_pointer_cast<DecimalType>(type).get());
int byte_width = specific_type.byte_width();
auto builder(std::make_shared<DecimalBuilder>(default_memory_pool(), type));
size_t null_count = 0;

size_t size = draw.size();
builder->Reserve(size);

for (size_t i = 0; i < size; ++i) {
if (valid_bytes[i]) {
builder->Append(draw[i]);
} else {
builder->AppendNull();
++null_count;
}
}

std::vector<uint8_t> raw_bytes(data(draw, byte_width));
auto expected_data(std::make_shared<Buffer>(raw_bytes.data(), size * byte_width));
auto expected_null_bitmap(test::bytes_to_null_buffer(valid_bytes));
int64_t expected_null_count = test::null_count(valid_bytes);
auto expected(std::make_shared<DecimalArray>(
type, size, expected_data, expected_null_bitmap, expected_null_count));

std::shared_ptr<Array> out;
ASSERT_OK(builder->Finish(&out));

auto result(std::dynamic_pointer_cast<DecimalArray>(out));
ASSERT_TRUE(result->Equals(*expected));
}
};

template <typename T>
class DecimalTest : public DecimalTestBase<T> {
public:
std::vector<uint8_t> data(
const std::vector<T>& input, size_t byte_width) const override {
std::vector<uint8_t> result;
result.reserve(input.size() * byte_width);
// TODO(phillipc): There's probably a better way to do this
for (size_t i = 0; i < input.size(); ++i) {
auto bytes = reinterpret_cast<const uint8_t*>(&input[i].value);
for (size_t j = 0; j < byte_width; ++j) {
result.push_back(bytes[j]);
}
}
return result;
}
};

class Decimal32BuilderTest : public ::testing::TestWithParam<int>,
public DecimalTest<Decimal32> {};

class Decimal64BuilderTest : public ::testing::TestWithParam<int>,
public DecimalTest<Decimal64> {};

class Decimal128BuilderTest : public ::testing::TestWithParam<int>,
public DecimalTest<Decimal128> {};

TEST_P(Decimal32BuilderTest, NoNulls) {
int precision = GetParam();
std::vector<Decimal32> draw = {
Decimal32(1), Decimal32(2), Decimal32(2389), Decimal32(4), Decimal32(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
}

TEST_P(Decimal64BuilderTest, NoNulls) {
int precision = GetParam();
std::vector<Decimal64> draw = {
Decimal64(1), Decimal64(2), Decimal64(2389), Decimal64(4), Decimal64(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
}

TEST_P(Decimal128BuilderTest, NoNulls) {
int precision = GetParam();
std::vector<Decimal128> draw = {
Decimal128(1), Decimal128(2), Decimal128(2389), Decimal128(4), Decimal128(-12348)};
std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
}

TEST_P(Decimal32BuilderTest, WithNulls) {
int precision = GetParam();
std::vector<Decimal32> draw = {
Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4), Decimal32(-1)};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
}

TEST_P(Decimal64BuilderTest, WithNulls) {
int precision = GetParam();
std::vector<Decimal64> draw = {
Decimal64(1), Decimal64(2), Decimal64(-1), Decimal64(4), Decimal64(-1)};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
}

TEST_P(Decimal128BuilderTest, WithNulls) {
int precision = GetParam();
std::vector<Decimal128> draw = {
Decimal128(1), Decimal128(2), Decimal128(-1), Decimal128(4), Decimal128(-1)};
std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
}

INSTANTIATE_TEST_CASE_P(Decimal32BuilderTest, Decimal32BuilderTest,
::testing::Range(
DecimalPrecision<int32_t>::minimum, DecimalPrecision<int32_t>::maximum));
INSTANTIATE_TEST_CASE_P(Decimal64BuilderTest, Decimal64BuilderTest,
::testing::Range(
DecimalPrecision<int64_t>::minimum, DecimalPrecision<int64_t>::maximum));
INSTANTIATE_TEST_CASE_P(Decimal128BuilderTest, Decimal128BuilderTest,
::testing::Range(
DecimalPrecision<int128_t>::minimum, DecimalPrecision<int128_t>::maximum));

} // namespace arrow
6 changes: 1 addition & 5 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ FixedWidthBinaryArray::FixedWidthBinaryArray(const std::shared_ptr<DataType>& ty
: Array(type, length, null_bitmap, null_count, offset),
data_(data),
raw_data_(nullptr) {
DCHECK(type->type == Type::FIXED_WIDTH_BINARY);
byte_width_ = static_cast<const FixedWidthBinaryType&>(*type).byte_width();
if (data) { raw_data_ = data->data(); }
}
Expand Down Expand Up @@ -489,6 +488,7 @@ ARRAY_VISITOR_DEFAULT(DoubleArray);
ARRAY_VISITOR_DEFAULT(BinaryArray);
ARRAY_VISITOR_DEFAULT(StringArray);
ARRAY_VISITOR_DEFAULT(FixedWidthBinaryArray);
ARRAY_VISITOR_DEFAULT(DecimalArray);
ARRAY_VISITOR_DEFAULT(Date32Array);
ARRAY_VISITOR_DEFAULT(Date64Array);
ARRAY_VISITOR_DEFAULT(TimeArray);
Expand All @@ -499,10 +499,6 @@ ARRAY_VISITOR_DEFAULT(StructArray);
ARRAY_VISITOR_DEFAULT(UnionArray);
ARRAY_VISITOR_DEFAULT(DictionaryArray);

Status ArrayVisitor::Visit(const DecimalArray& array) {
return Status::NotImplemented("decimal");
}

// ----------------------------------------------------------------------
// Instantiate templates

Expand Down
17 changes: 17 additions & 0 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,23 @@ class ARROW_EXPORT FixedWidthBinaryArray : public Array {
const uint8_t* raw_data_;
};

// ----------------------------------------------------------------------
// DecimalArray
class ARROW_EXPORT DecimalArray : public FixedWidthBinaryArray {
public:
using TypeClass = Type;
DecimalArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = nullptr, int64_t null_count = 0,
int64_t offset = 0)
: FixedWidthBinaryArray(type, length, data, null_bitmap, null_count, offset) {}

template <typename T>
Decimal<T> Value(int64_t i) const {
return Decimal<T>(GetValue(i));
}
};

// ----------------------------------------------------------------------
// Struct

Expand Down
26 changes: 25 additions & 1 deletion cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,28 @@ Status BooleanBuilder::Append(
return Status::OK();
}

// ----------------------------------------------------------------------
// DecimalBuilder
DecimalBuilder::DecimalBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type)
: FixedWidthBinaryBuilder(pool, type) {}

template <typename T>
Status DecimalBuilder::Append(const Decimal<T>& val) {
RETURN_NOT_OK(ArrayBuilder::Reserve(1));
FixedWidthBinaryBuilder::Append(reinterpret_cast<const uint8_t*>(&val.value));
return Status::OK();
}

template Status DecimalBuilder::Append(const Decimal32& val);
template Status DecimalBuilder::Append(const Decimal64& val);
template Status DecimalBuilder::Append(const Decimal128& val);

Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
std::shared_ptr<Buffer> data = byte_builder_.Finish();
*out = std::make_shared<DecimalArray>(type_, length_, data, null_bitmap_, null_count_);
return Status::OK();
}

// ----------------------------------------------------------------------
// ListBuilder

Expand Down Expand Up @@ -440,7 +462,6 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
FixedWidthBinaryBuilder::FixedWidthBinaryBuilder(
MemoryPool* pool, const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool, type), byte_builder_(pool) {
DCHECK(type->type == Type::FIXED_WIDTH_BINARY);
byte_width_ = static_cast<const FixedWidthBinaryType&>(*type).byte_width();
}

Expand Down Expand Up @@ -533,6 +554,9 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(INT64, Int64Builder);
BUILDER_CASE(DATE32, Date32Builder);
BUILDER_CASE(DATE64, Date64Builder);
case Type::DECIMAL:
out->reset(new DecimalBuilder(pool, type));
return Status::OK();
case Type::TIMESTAMP:
out->reset(new TimestampBuilder(pool, type));
return Status::OK();
Expand Down
14 changes: 12 additions & 2 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/decimal.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

Expand Down Expand Up @@ -76,12 +77,12 @@ class ARROW_EXPORT ArrayBuilder {
Status SetNotNull(int64_t length);

/// Allocates initial capacity requirements for the builder. In most
/// cases subclasses should override and call there parent classes
/// cases subclasses should override and call their parent class's
/// method as well.
virtual Status Init(int64_t capacity);

/// Resizes the null_bitmap array. In most
/// cases subclasses should override and call there parent classes
/// cases subclasses should override and call their parent class's
/// method as well.
virtual Status Resize(int64_t new_bits);

Expand Down Expand Up @@ -409,6 +410,15 @@ class ARROW_EXPORT FixedWidthBinaryBuilder : public ArrayBuilder {
BufferBuilder byte_builder_;
};

class ARROW_EXPORT DecimalBuilder : public FixedWidthBinaryBuilder {
public:
explicit DecimalBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type);

template <typename T>
Status Append(const Decimal<T>& val);
Status Finish(std::shared_ptr<Array>* out) override;
};

// ----------------------------------------------------------------------
// Struct

Expand Down
Loading

0 comments on commit be209e1

Please sign in to comment.