Skip to content

Commit

Permalink
ARROW-17135: [C++] Reduce code size in compute/kernels/scalar_compare…
Browse files Browse the repository at this point in the history
….cc (#13654)

This "leaner" implementation reduces the generated code size of this C++ file from 2307768 bytes to 1192608 bytes in gcc 10.3.0. The benchmarks are also faster (on my avx2 laptop):

before

```
-----------------------------------------------------------------------------------------------
Benchmark                                     Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------------------
GreaterArrayArrayInt64/32768/10000         32.1 us         32.1 us        21533 items_per_second=1020.16M/s null_percent=0.01 size=32.768k
GreaterArrayArrayInt64/32768/100           32.1 us         32.1 us        21603 items_per_second=1019.27M/s null_percent=1 size=32.768k
GreaterArrayArrayInt64/32768/10            32.1 us         32.1 us        21479 items_per_second=1020.82M/s null_percent=10 size=32.768k
GreaterArrayArrayInt64/32768/2             32.0 us         32.0 us        21468 items_per_second=1023.12M/s null_percent=50 size=32.768k
GreaterArrayArrayInt64/32768/1             32.3 us         32.3 us        21720 items_per_second=1013.44M/s null_percent=100 size=32.768k
GreaterArrayArrayInt64/32768/0             31.6 us         31.6 us        21828 items_per_second=1036.94M/s null_percent=0 size=32.768k
GreaterArrayScalarInt64/32768/10000        20.8 us         20.8 us        33461 items_per_second=1.57238G/s null_percent=0.01 size=32.768k
GreaterArrayScalarInt64/32768/100          20.9 us         20.9 us        33625 items_per_second=1.56611G/s null_percent=1 size=32.768k
GreaterArrayScalarInt64/32768/10           20.8 us         20.8 us        33553 items_per_second=1.57338G/s null_percent=10 size=32.768k
GreaterArrayScalarInt64/32768/2            20.9 us         20.9 us        33348 items_per_second=1.5687G/s null_percent=50 size=32.768k
GreaterArrayScalarInt64/32768/1            20.9 us         20.9 us        33419 items_per_second=1.56879G/s null_percent=100 size=32.768k
GreaterArrayScalarInt64/32768/0            20.5 us         20.5 us        34116 items_per_second=1.59837G/s null_percent=0 size=32.768k
```

after

```
-----------------------------------------------------------------------------------------------
Benchmark                                     Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------------------
GreaterArrayArrayInt64/32768/10000         18.1 us         18.1 us        38751 items_per_second=1.81199G/s null_percent=0.01 size=32.768k
GreaterArrayArrayInt64/32768/100           17.5 us         17.5 us        39374 items_per_second=1.86821G/s null_percent=1 size=32.768k
GreaterArrayArrayInt64/32768/10            19.0 us         19.0 us        33941 items_per_second=1.72066G/s null_percent=10 size=32.768k
GreaterArrayArrayInt64/32768/2             18.0 us         18.0 us        39589 items_per_second=1.81817G/s null_percent=50 size=32.768k
GreaterArrayArrayInt64/32768/1             18.1 us         18.1 us        39061 items_per_second=1.80719G/s null_percent=100 size=32.768k
GreaterArrayArrayInt64/32768/0             17.5 us         17.5 us        39813 items_per_second=1.87031G/s null_percent=0 size=32.768k
GreaterArrayScalarInt64/32768/10000        16.3 us         16.3 us        42281 items_per_second=2.01525G/s null_percent=0.01 size=32.768k
GreaterArrayScalarInt64/32768/100          16.5 us         16.5 us        42266 items_per_second=1.98195G/s null_percent=1 size=32.768k
GreaterArrayScalarInt64/32768/10           16.5 us         16.5 us        41872 items_per_second=1.98615G/s null_percent=10 size=32.768k
GreaterArrayScalarInt64/32768/2            16.3 us         16.3 us        42130 items_per_second=2.00447G/s null_percent=50 size=32.768k
GreaterArrayScalarInt64/32768/1            16.2 us         16.2 us        42391 items_per_second=2.02296G/s null_percent=100 size=32.768k
GreaterArrayScalarInt64/32768/0            15.9 us         15.9 us        43498 items_per_second=2.0614G/s null_percent=0 size=32.768k
```

Authored-by: Wes McKinney <wesm@apache.org>
Signed-off-by: Wes McKinney <wesm@apache.org>
  • Loading branch information
wesm committed Jul 20, 2022
1 parent 8a4d611 commit 1214083
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 71 deletions.
4 changes: 0 additions & 4 deletions cpp/src/arrow/compute/kernels/codegen_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ namespace arrow {
namespace compute {
namespace internal {

Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
return Status::NotImplemented("This kernel is malformed");
}

const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
static DataTypeVector example_parametric_types = {
decimal128(12, 2),
Expand Down
33 changes: 18 additions & 15 deletions cpp/src/arrow/compute/kernels/codegen_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -964,8 +964,6 @@ struct FailFunctor<VectorKernel::ChunkedExec> {
}
};

Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);

// GD for numeric types (integer and floating point)
template <template <typename...> class Generator, typename Type0,
typename KernelType = ArrayKernelExec, typename... Args>
Expand Down Expand Up @@ -1009,7 +1007,7 @@ ArrayKernelExec GenerateFloatingPoint(detail::GetTypeId get_id) {
return Generator<Type0, DoubleType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down Expand Up @@ -1037,7 +1035,7 @@ ArrayKernelExec GenerateInteger(detail::GetTypeId get_id) {
return Generator<Type0, UInt64Type, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down Expand Up @@ -1068,7 +1066,7 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
return Generator<Type0, UInt64Type, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down Expand Up @@ -1104,8 +1102,9 @@ KernelType ArithmeticExecFromOp(detail::GetTypeId get_id) {
}
}

template <template <typename... Args> class Generator, typename... Args>
ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
template <typename ReturnType, template <typename... Args> class Generator,
typename... Args>
ReturnType GeneratePhysicalNumericGeneric(detail::GetTypeId get_id) {
switch (get_id.id) {
case Type::INT8:
return Generator<Int8Type, Args...>::Exec;
Expand Down Expand Up @@ -1135,9 +1134,13 @@ ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
return Generator<DoubleType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}
template <template <typename... Args> class Generator, typename... Args>
ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
return GeneratePhysicalNumericGeneric<ArrayKernelExec, Generator, Args...>(get_id);
}

// Generate a kernel given a templated functor for decimal types
template <template <typename... Args> class Generator, typename... Args>
Expand All @@ -1149,7 +1152,7 @@ ArrayKernelExec GenerateDecimalToDecimal(detail::GetTypeId get_id) {
return Generator<Decimal256Type, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1169,7 +1172,7 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) {
return Generator<Type0, Int64Type, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down Expand Up @@ -1249,7 +1252,7 @@ ArrayKernelExec GenerateVarBinaryToVarBinary(detail::GetTypeId get_id) {
return Generator<LargeStringType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1270,7 +1273,7 @@ ArrayKernelExec GenerateVarBinaryBase(detail::GetTypeId get_id) {
return Generator<Type0, LargeBinaryType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1288,7 +1291,7 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId get_id) {
return Generator<Type0, LargeStringType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1312,7 +1315,7 @@ ArrayKernelExec GenerateTemporal(detail::GetTypeId get_id) {
return Generator<Type0, TimestampType, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1328,7 +1331,7 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) {
return Generator<Type0, Decimal256Type, Args...>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1046,7 +1046,7 @@ ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand All @@ -1059,7 +1059,7 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down Expand Up @@ -1188,7 +1188,7 @@ ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id)
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
return ExecFail;
return nullptr;
}
}

Expand Down
Loading

0 comments on commit 1214083

Please sign in to comment.