Skip to content

Commit

Permalink
Address more review points
Browse files Browse the repository at this point in the history
  • Loading branch information
benibus committed Jun 21, 2023
1 parent af52e6e commit 22d54c2
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 68 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ set(ARROW_SRCS
util/debug.cc
util/decimal.cc
util/delimiting.cc
util/float16.cc
util/formatting.cc
util/future.cc
util/hashing.cc
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/arrow/util/float16.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <ostream>

#include "arrow/util/float16.h"

namespace arrow {
namespace util {

std::ostream& operator<<(std::ostream& os, Float16Base arg) { return (os << arg.bits()); }

} // namespace util
} // namespace arrow
53 changes: 36 additions & 17 deletions cpp/src/arrow/util/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
#pragma once

#include <array>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <iosfwd>
#include <limits>
#include <type_traits>

#include "arrow/util/bit_util.h"
#include "arrow/util/endian.h"
#include "arrow/util/ubsan.h"
#include "arrow/util/visibility.h"

Expand All @@ -45,38 +45,57 @@ class Float16Base {
Float16Base() = default;
constexpr explicit Float16Base(uint16_t value) : value_(value) {}

/// \brief Return the value's integer representation
constexpr uint16_t bits() const { return value_; }
constexpr explicit operator uint16_t() const { return bits(); }

/// \brief Return true if the value is negative (sign bit is set)
constexpr bool signbit() const { return (value_ & 0x8000) != 0; }

/// \brief Return true if the value is NaN
constexpr bool is_nan() const {
return (value_ & 0x7c00) == 0x7c00 && (value_ & 0x03ff) != 0;
}
/// \brief Return true if the value is positive/negative infinity
constexpr bool is_infinity() const { return (value_ & 0x7fff) == 0x7c00; }
/// \brief Return true if the value is positive/negative zero
constexpr bool is_zero() const { return (value_ & 0x7fff) == 0; }

/// \brief Copy the value's bytes in native-endian byte order
void ToBytes(uint8_t* dest) const { std::memcpy(dest, &value_, sizeof(value_)); }
/// \brief Return the value's bytes in native-endian byte order
std::array<uint8_t, 2> ToBytes() const {
std::array<uint8_t, 2> bytes;
ToBytes(bytes.data());
return bytes;
constexpr std::array<uint8_t, 2> ToBytes() const {
#if ARROW_LITTLE_ENDIAN
return ToLittleEndian();
#else
return ToBigEndian();
#endif
}

/// \brief Copy the value's bytes in little-endian byte order
void ToLittleEndian(uint8_t* dest) const {
Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes(dest);
}
std::array<uint8_t, 2> ToLittleEndian() const {
return Float16Base{bit_util::ToLittleEndian(value_)}.ToBytes();
/// \brief Return the value's bytes in little-endian byte order
constexpr std::array<uint8_t, 2> ToLittleEndian() const {
#if ARROW_LITTLE_ENDIAN
return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
#else
return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
#endif
}

/// \brief Copy the value's bytes in big-endian byte order
void ToBigEndian(uint8_t* dest) const {
Float16Base{bit_util::ToBigEndian(value_)}.ToBytes(dest);
}
std::array<uint8_t, 2> ToBigEndian() const {
return Float16Base{bit_util::ToBigEndian(value_)}.ToBytes();
/// \brief Return the value's bytes in big-endian byte order
constexpr std::array<uint8_t, 2> ToBigEndian() const {
#if ARROW_LITTLE_ENDIAN
return {uint8_t(value_ >> 8), uint8_t(value_ & 0xff)};
#else
return {uint8_t(value_ & 0xff), uint8_t(value_ >> 8)};
#endif
}

friend constexpr bool operator==(Float16Base lhs, Float16Base rhs) {
Expand All @@ -98,13 +117,10 @@ class Float16Base {
return !Float16Base::CompareLt(rhs, lhs);
}
friend constexpr bool operator>=(Float16Base lhs, Float16Base rhs) {
if (lhs.is_nan() || rhs.is_nan()) return false;
return !Float16Base::CompareLt(lhs, rhs);
return rhs <= lhs;
}

friend std::ostream& operator<<(std::ostream& os, Float16Base arg) {
return (os << arg.bits());
}
ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16Base arg);

protected:
uint16_t value_;
Expand All @@ -118,7 +134,7 @@ class Float16Base {
if (lhs.signbit()) {
if (rhs.signbit()) {
// Both are negative
return (lhs.bits() & 0x7fff) > (rhs.bits() & 0x7fff);
return lhs.bits() > rhs.bits();
} else {
// Handle +/-0
return !lhs.is_zero() || rhs.bits() != 0;
Expand All @@ -127,7 +143,7 @@ class Float16Base {
return false;
} else {
// Both are positive
return (lhs.bits() & 0x7fff) < (rhs.bits() & 0x7fff);
return lhs.bits() < rhs.bits();
}
}
};
Expand All @@ -145,9 +161,12 @@ class Float16 : public Float16Base {
return Float16(SafeLoadAs<uint16_t>(src));
}

/// \brief Read a `Float16` from memory in little-endian byte order
static Float16 FromLittleEndian(const uint8_t* src) {
return Float16(bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
}

/// \brief Read a `Float16` from memory in big-endian byte order
static Float16 FromBigEndian(const uint8_t* src) {
return Float16(bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
}
Expand Down
40 changes: 24 additions & 16 deletions cpp/src/arrow/util/float16_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,8 @@ class Float16OperatorTest : public ::testing::Test {
const auto num_values = static_cast<int>(test_values.size());

// Check all combinations of operands in both directions
for (int offset = 0; offset < num_values; ++offset) {
int i = 0;
int j = offset;
while (i < num_values) {
for (int i = 0; i < num_values; ++i) {
for (int j = 0; j < num_values; ++j) {
ARROW_SCOPED_TRACE(i, ",", j);

auto a = test_values[i];
Expand All @@ -110,9 +108,6 @@ class Float16OperatorTest : public ::testing::Test {
// Results for float16 and float32 should be the same
auto ret = Operator{}(a, b);
ASSERT_EQ(ret.first, ret.second);

++i;
j = (j + 1) % num_values;
}
}
}
Expand All @@ -127,19 +122,32 @@ TYPED_TEST(Float16OperatorTest, Compare) { this->TestCompare(g_test_values); }

TEST(Float16Test, ToBytes) {
constexpr auto f16 = Float16(0xd01c);
auto bytes = f16.ToBytes();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
std::array<uint8_t, 2> bytes;
auto load = [&bytes]() { return SafeLoadAs<uint16_t>(bytes.data()); };

// Test native-endian
f16.ToBytes(bytes.data());
ASSERT_EQ(load(), 0xd01c);
bytes = f16.ToBytes();
ASSERT_EQ(load(), 0xd01c);

#if ARROW_LITTLE_ENDIAN
bytes = f16.ToLittleEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
bytes = f16.ToBigEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
constexpr uint16_t expected_le = 0xd01c;
constexpr uint16_t expected_be = 0x1cd0;
#else
constexpr uint16_t expected_le = 0x1cd0;
constexpr uint16_t expected_be = 0xd01c;
#endif
// Test little-endian
f16.ToLittleEndian(bytes.data());
ASSERT_EQ(load(), expected_le);
bytes = f16.ToLittleEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0x1cd0);
ASSERT_EQ(load(), expected_le);
// Test big-endian
f16.ToBigEndian(bytes.data());
ASSERT_EQ(load(), expected_be);
bytes = f16.ToBigEndian();
ASSERT_EQ(SafeLoadAs<uint16_t>(bytes.data()), 0xd01c);
#endif
ASSERT_EQ(load(), expected_be);
}

TEST(Float16Test, FromBytes) {
Expand Down
74 changes: 39 additions & 35 deletions cpp/src/parquet/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,20 @@ constexpr int value_length(int type_length, const FLBA& value) { return type_len

// Static "constants" for normalizing float16 min/max values. These need to be expressed
// as pointers because `Float16LogicalType` represents an FLBA.
const uint8_t* float16_lowest() {
static const auto bytes = std::numeric_limits<Float16>::lowest().ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_max() {
static const auto bytes = std::numeric_limits<Float16>::max().ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_positive_zero() {
static const auto bytes = Float16(0).ToLittleEndian();
return bytes.data();
}
const uint8_t* float16_negative_zero() {
static const auto bytes = (-Float16(0)).ToLittleEndian();
return bytes.data();
}
struct Float16Constants {
static constexpr const uint8_t* lowest() { return lowest_.data(); }
static constexpr const uint8_t* max() { return max_.data(); }
static constexpr const uint8_t* positive_zero() { return positive_zero_.data(); }
static constexpr const uint8_t* negative_zero() { return negative_zero_.data(); }

private:
using Bytes = std::array<uint8_t, 2>;
static constexpr Bytes lowest_ =
std::numeric_limits<Float16>::lowest().ToLittleEndian();
static constexpr Bytes max_ = std::numeric_limits<Float16>::max().ToLittleEndian();
static constexpr Bytes positive_zero_ = (+Float16(0)).ToLittleEndian();
static constexpr Bytes negative_zero_ = (-Float16(0)).ToLittleEndian();
};

template <typename DType, bool is_signed>
struct CompareHelper {
Expand Down Expand Up @@ -301,12 +299,12 @@ struct CompareHelper<FLBAType, is_signed>
struct Float16CompareHelper {
using T = FLBA;

static T DefaultMin() { return T{float16_max()}; }
static T DefaultMax() { return T{float16_lowest()}; }
static T DefaultMin() { return T{Float16Constants::max()}; }
static T DefaultMax() { return T{Float16Constants::lowest()}; }

static T Coalesce(T val, T fallback) {
return val.ptr != nullptr && Float16::FromLittleEndian(val.ptr).is_nan() ? fallback
: val;
return (val.ptr == nullptr || Float16::FromLittleEndian(val.ptr).is_nan()) ? fallback
: val;
}

static inline bool Compare(int type_length, const T& a, const T& b) {
Expand Down Expand Up @@ -386,10 +384,10 @@ optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_
}

if (min == Float16(0)) {
min_flba = FLBA{float16_negative_zero()};
min_flba = FLBA{Float16Constants::negative_zero()};
}
if (max == -Float16(0)) {
max_flba = FLBA{float16_positive_zero()};
max_flba = FLBA{Float16Constants::positive_zero()};
}

return {{min_flba, max_flba}};
Expand Down Expand Up @@ -540,13 +538,13 @@ std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMi
return GetMinMaxBinaryHelper<false>(*this, values);
}

static LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
if (const auto& logical_type = descr->logical_type()) {
return logical_type->type();
}
return LogicalType::Type::NONE;
}
static LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
return LogicalTypeId(stats.descr());
}

Expand Down Expand Up @@ -613,20 +611,26 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {

void IncrementNumValues(int64_t n) override { num_values_ += n; }

static bool IsMeaningfulLogicalType(LogicalType::Type::type type) {
switch (type) {
case LogicalType::Type::FLOAT16:
return true;
default:
return false;
}
}

bool Equals(const Statistics& raw_other) const override {
if (physical_type() != raw_other.physical_type()) return false;

const auto logical_id = LogicalTypeId(*this);
switch (logical_id) {
// Only compare against logical types that influence the interpretation of the
// physical type
case LogicalType::Type::FLOAT16:
if (LogicalTypeId(raw_other) != logical_id) {
return false;
}
break;
default:
break;
const auto other_logical_id = LogicalTypeId(raw_other);
// Only compare against logical types that influence the interpretation of the
// physical type
if (IsMeaningfulLogicalType(logical_id)) {
if (logical_id != other_logical_id) return false;
} else if (IsMeaningfulLogicalType(other_logical_id)) {
return false;
}

const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other);
Expand Down Expand Up @@ -886,7 +890,7 @@ std::shared_ptr<Comparator> DoMakeComparator(Type::type physical_type,
case Type::FIXED_LEN_BYTE_ARRAY:
if (logical_type == LogicalType::Type::FLOAT16) {
return std::make_shared<
TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>();
TypedComparatorImpl<true, FLBAType, Float16CompareHelper>>(type_length);
}
return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
default:
Expand Down

0 comments on commit 22d54c2

Please sign in to comment.