Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ if(ARROW_COMPUTE)
compute/kernels/vector_nested.cc
compute/kernels/vector_rank.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_rolling.cc
compute/kernels/vector_run_end_encode.cc
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc)
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,11 @@ class NumericBuilder
data_builder_.UnsafeAppend(value_type{}); // zero
}

void UnsafeAppendNulls(int64_t length) {
ArrayBuilder::UnsafeAppendToBitmap(length, false);
data_builder_.UnsafeAppend(length, value_type{}); // zero
}

std::shared_ptr<DataType> type() const override { return type_; }

protected:
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RollingOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -153,6 +154,10 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("tiebreaker", &RankOptions::tiebreaker));
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
DataMember("periods", &PairwiseOptions::periods));
static auto kRollingOptionsType = GetFunctionOptionsType<RollingOptions>(
DataMember("window_length", &RollingOptions::window_length),
DataMember("min_periods", &RollingOptions::min_periods),
DataMember("ignore_nulls", &RollingOptions::ignore_nulls));
} // namespace
} // namespace internal

Expand Down Expand Up @@ -224,6 +229,14 @@ PairwiseOptions::PairwiseOptions(int64_t periods)
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
constexpr char PairwiseOptions::kTypeName[];

RollingOptions::RollingOptions(int64_t window_length, int64_t min_periods,
bool ignore_nulls)
: FunctionOptions(internal::kRollingOptionsType),
window_length(window_length),
min_periods(min_periods),
ignore_nulls(ignore_nulls) {}
constexpr char RollingOptions::kTypeName[];

namespace internal {
void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
Expand All @@ -237,6 +250,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRollingOptionsType));
}
} // namespace internal

Expand Down
25 changes: 21 additions & 4 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
#include "arrow/result.h"
#include "arrow/type_fwd.h"

namespace arrow {
namespace compute {
namespace arrow::compute {

class ExecContext;

Expand Down Expand Up @@ -246,6 +245,25 @@ class ARROW_EXPORT PairwiseOptions : public FunctionOptions {
int64_t periods = 1;
};

/// \brief Options for rolling functions
class ARROW_EXPORT RollingOptions : public FunctionOptions {
public:
explicit RollingOptions(int64_t window_length, int64_t min_periods = 1,
bool ignore_nulls = true);
RollingOptions() : RollingOptions(1) {}
static constexpr char const kTypeName[] = "RollingOptions";

/// Length of the window, must be positive
int64_t window_length;
/// Minimum number of elements in window required to have a value
int64_t min_periods = 1;
/// Whether to ignore null values in the window
/// True: Only valid values are used to compute the result, produce valid result
/// if there are >= min_periods valid values in the window
/// False: The result is null if any value in the window is null
bool ignore_nulls = true;
};

/// @}

/// \brief Filter with a boolean selection filter
Expand Down Expand Up @@ -694,5 +712,4 @@ Result<std::shared_ptr<Array>> PairwiseDiff(const Array& array,
bool check_overflow = false,
ExecContext* ctx = NULLPTR);

} // namespace compute
} // namespace arrow
} // namespace arrow::compute
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ add_arrow_compute_test(vector_test
vector_hash_test.cc
vector_nested_test.cc
vector_replace_test.cc
vector_rolling_test.cc
vector_run_end_encode_test.cc
select_k_test.cc
EXTRA_LINK_LIBS
Expand Down
38 changes: 38 additions & 0 deletions cpp/src/arrow/compute/kernels/codegen_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ using internal::FirstTimeBitmapWriter;
using internal::GenerateBitsUnrolled;
using internal::VisitBitBlocks;
using internal::VisitBitBlocksVoid;
using internal::VisitTwoBitBlocks;
using internal::VisitTwoBitBlocksAllCases;
using internal::VisitTwoBitBlocksVoid;
using internal::VisitTwoBitBlocksVoidAllCases;

namespace compute {
namespace internal {
Expand Down Expand Up @@ -455,6 +458,41 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar
std::move(visit_null));
}

template <typename Arg0Type, typename Arg1Type, typename VisitBothNotNull,
typename VisitLeftNull, typename VisitRightNull, typename VisitBothNull>
static void VisitTwoArrayValuesInlineAllCases(const ArraySpan& arr0,
const ArraySpan& arr1,
VisitBothNotNull&& visit_both_not_null,
VisitLeftNull&& visit_left_null,
VisitRightNull&& visit_right_null,
VisitBothNull&& visit_both_null) {
ArrayIterator<Arg0Type> arr0_it(arr0);
ArrayIterator<Arg1Type> arr1_it(arr1);

auto forward_visit_both_not_null = [&](int64_t i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe [&] should be replaced by [visit_both_not_null = std::move(visit_both_not_null)] so the r-value parameters are moved into the local lambdas. As they are not shared, they can be moved.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arr0_it and arr1_it would be captured by & of course as they are shared among the different lambdas.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think capturing by reference is better here because the lambda won't need to create a variable for the func. See https://cppinsights.io/s/690fcf31 for an example.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nevermind. Nothing changes in the final assembly anyway. https://godbolt.org/z/Edaqefsc9

visit_both_not_null(GetViewType<Arg0Type>::LogicalValue(arr0_it()),
GetViewType<Arg1Type>::LogicalValue(arr1_it()));
};
auto forward_visit_left_null = [&](int64_t i) {
arr0_it();
visit_left_null(GetViewType<Arg1Type>::LogicalValue(arr1_it()));
};
auto forward_visit_right_null = [&](int64_t i) {
visit_right_null(GetViewType<Arg0Type>::LogicalValue(arr0_it()));
arr1_it();
};
auto forward_visit_both_null = [&]() {
arr0_it();
arr1_it();
visit_both_null();
};

VisitTwoBitBlocksVoidAllCases(
arr0.buffers[0].data, arr0.offset, arr1.buffers[0].data, arr1.offset, arr0.length,
std::move(forward_visit_both_not_null), std::move(forward_visit_left_null),
std::move(forward_visit_right_null), std::move(forward_visit_both_null));
}

// ----------------------------------------------------------------------
// Reusable type resolvers

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ struct MakeVectorCumulativeBinaryOpFunction {

} // namespace

void RegisterVectorCumulativeSum(FunctionRegistry* registry) {
void RegisterVectorCumulative(FunctionRegistry* registry) {
MakeVectorCumulativeBinaryOpFunction<Add, CumulativeOptions>::Call(
registry, "cumulative_sum", cumulative_sum_doc);
MakeVectorCumulativeBinaryOpFunction<AddChecked, CumulativeOptions>::Call(
Expand Down
Loading