Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed ternary logic (OR/AND/XOR) calculation in multiple batches #8718

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 37 additions & 29 deletions dbms/src/Functions/FunctionsLogical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,39 +282,43 @@ template <
typename Op, template <typename, size_t> typename OperationApplierImpl, size_t N = 10>
struct OperationApplier
{
template <typename Columns, typename ResultColumn>
static void apply(Columns & in, ResultColumn & result)
template <typename Columns, typename ResultData>
static void apply(Columns & in, ResultData & result_data)
{
while (in.size() > 1)
{
doBatchedApply(in, result->getData());
in.push_back(result.get());
}
/// TODO: Maybe reuse this code for constants (which may form precalculated result)
doBatchedApply<false>(in, result_data);

while (in.size() > 0)
doBatchedApply<true>(in, result_data);
}

template <typename Columns, typename ResultData>
template <bool carryResult, typename Columns, typename ResultData>
static void NO_INLINE doBatchedApply(Columns & in, ResultData & result_data)
{
if (N > in.size())
{
OperationApplier<Op, OperationApplierImpl, N - 1>::doBatchedApply(in, result_data);
OperationApplier<Op, OperationApplierImpl, N - 1>
::template doBatchedApply<carryResult>(in, result_data);
return;
}

const OperationApplierImpl<Op, N> operationApplierImpl(in);
size_t i = 0;
for (auto & res : result_data)
res = operationApplierImpl.apply(i++);
if constexpr (carryResult)
res = Op::apply(res, operationApplierImpl.apply(i++));
else
res = operationApplierImpl.apply(i++);

in.erase(in.end() - N, in.end());
}
};

template <
typename Op, template <typename, size_t> typename OperationApplierImpl>
struct OperationApplier<Op, OperationApplierImpl, 1>
struct OperationApplier<Op, OperationApplierImpl, 0>
{
template <typename Columns, typename Result>
template <bool, typename Columns, typename Result>
static void NO_INLINE doBatchedApply(Columns &, Result &)
{
throw Exception(
Expand All @@ -332,7 +336,7 @@ static void executeForTernaryLogicImpl(ColumnRawPtrs arguments, ColumnWithTypeAn
const bool has_consts = extractConstColumnsTernary<Op>(arguments, const_3v_value);

/// If the constant value uniquely determines the result, return it.
if (has_consts && (arguments.empty() || (Op::isSaturable() && Op::isSaturatedValue(const_3v_value))))
if (has_consts && (arguments.empty() || Op::isSaturatedValue(const_3v_value)))
{
result_info.column = ColumnConst::create(
convertFromTernaryData(UInt8Container({const_3v_value}), result_info.type->isNullable()),
Expand All @@ -350,7 +354,7 @@ static void executeForTernaryLogicImpl(ColumnRawPtrs arguments, ColumnWithTypeAn
arguments.push_back(const_column_holder.get());
}

OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, result_column);
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, result_column->getData());

result_info.column = convertFromTernaryData(result_column->getData(), result_info.type->isNullable());
}
Expand Down Expand Up @@ -425,32 +429,30 @@ static void basicExecuteImpl(ColumnRawPtrs arguments, ColumnWithTypeAndName & re
if (has_consts && Op::apply(const_val, 0) == 0 && Op::apply(const_val, 1) == 1)
has_consts = false;

UInt8ColumnPtrs uint8_args;

auto col_res = ColumnUInt8::create();
auto col_res = ColumnUInt8::create(input_rows_count, const_val);
UInt8Container & vec_res = col_res->getData();
if (has_consts)
{
vec_res.assign(input_rows_count, const_val);
uint8_args.push_back(col_res.get());
}
else
{
vec_res.resize(input_rows_count);
}

/// FastPath detection goes in here
if (arguments.size() == (has_consts ? 1 : 2))
{
if (has_consts)
FastApplierImpl<Op>::apply(*arguments[0], *col_res, col_res->getData());
FastApplierImpl<Op>::apply(*arguments[0], *col_res, vec_res);
else
FastApplierImpl<Op>::apply(*arguments[0], *arguments[1], col_res->getData());
FastApplierImpl<Op>::apply(*arguments[0], *arguments[1], vec_res);

result_info.column = std::move(col_res);
return;
}

UInt8ColumnPtrs uint8_args;
if (has_consts)
{
/// TODO: This will FAIL (or not =) ) after correction b/c we now overwrite
/// the result column in the first pass of OperationApplier
// vec_res.assign(input_rows_count, const_val);
uint8_args.push_back(col_res.get());
}

/// Convert all columns to UInt8
Columns converted_columns;
for (const IColumn * column : arguments)
Expand All @@ -466,11 +468,17 @@ static void basicExecuteImpl(ColumnRawPtrs arguments, ColumnWithTypeAndName & re
}
}

OperationApplier<Op, AssociativeApplierImpl>::apply(uint8_args, col_res);
OperationApplier<Op, AssociativeApplierImpl>::apply(uint8_args, col_res->getData());

/// TODO: The following code is obsolete and is to be removed now
///
/// This is possible if there is exactly one non-constant among the arguments, and it is of type UInt8.
/// Suppose we have all constants folded into a neutral value and there is only one non-constant column.
/// Although not all logical functions have a neutral value.
/*
if (uint8_args[0] != col_res.get())
vec_res.assign(uint8_args[0]->getData());
*/

result_info.column = std::move(col_res);
}
Expand Down
7 changes: 1 addition & 6 deletions dbms/src/Functions/FunctionsLogical.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,7 @@ struct XorImpl

static inline constexpr bool isSaturable() { return false; }
static inline constexpr bool isSaturatedValue(bool) { return false; }
/** Considering that CH uses UInt8 for representation of boolean values this function
* returns 255 as "true" but the current implementation of logical functions suggests that
* any nonzero value is "true" as well. Also the current code provides no guarantee
* for "true" to be represented with the value of 1.
*/
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return (a != b) ? Ternary::True : Ternary::False; }
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return !!a != !!b; }
static inline constexpr bool specialImplementationForNulls() { return false; }

#if USE_EMBEDDED_COMPILER
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
56 changes: 56 additions & 0 deletions dbms/tests/queries/0_stateless/00552_logical_functions_simple.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

-- Test simple logic over smaller batch of columns
SELECT
-- x1, x2, x3, x4,
xor(x1, x2, x3, x4) AS xor1,
xor(xor(x1, x2), xor(x3, x4)) AS xor2,

or(x1, x2, x3, x4) AS or1,
or(x1 or x2, x3 or x4) AS or2,

and(x1, x2, x3, x4) AS and1,
and(x1 and x2, x3 and x4) AS and2
FROM (
SELECT
toUInt8(number % 2) AS x1,
toUInt8(number / 2 % 2) AS x2,
toUInt8(number / 4 % 2) AS x3,
toUInt8(number / 8 % 2) AS x4
FROM numbers(16)
)
WHERE
xor1 != xor2 OR (and1 != and2 OR or1 != or2)
;

-- Test simple logic over multiple batches of columns (currently batch spans over 10 columns)
SELECT
-- x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11,
xor(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS xor1,
xor(x1, xor(xor(xor(x2, x3), xor(x4, x5)), xor(xor(x6, x7), xor(x8, xor(x9, xor(x10, x11)))))) AS xor2,

or(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS or1,
or(x1, or(or(or(x2, x3), or(x4, x5)), or(or(x6, x7), or(x8, or(x9, or(x10, x11)))))) AS or2,

and(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS and1,
and(x1, and((x2 and x3) and (x4 and x5), (x6 and x7) and (x8 and (x9 and (x10 and x11))))) AS and2
FROM (
SELECT
toUInt8(number % 2) AS x1,
toUInt8(number / 2 % 2) AS x2,
toUInt8(number / 4 % 2) AS x3,
toUInt8(number / 8 % 2) AS x4,
toUInt8(number / 16 % 2) AS x5,
toUInt8(number / 32 % 2) AS x6,
toUInt8(number / 64 % 2) AS x7,
toUInt8(number / 128 % 2) AS x8,
toUInt8(number / 256 % 2) AS x9,
toUInt8(number / 512 % 2) AS x10,
toUInt8(number / 1024 % 2) AS x11
FROM numbers(2048)
)
WHERE
xor1 != xor2 OR (and1 != and2 OR or1 != or2)
;


SELECT 'OK';
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
59 changes: 59 additions & 0 deletions dbms/tests/queries/0_stateless/00552_logical_functions_ternary.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@

-- Tests codepath for ternary logic
SELECT
-- x1, x2, x3, x4,
xor(x1, x2, x3, x4) AS xor1,
xor(xor(x1, x2), xor(x3, x4)) AS xor2,

or(x1, x2, x3, x4) AS or1,
or(x1 or x2, x3 or x4) AS or2,

and(x1, x2, x3, x4) AS and1,
and(x1 and x2, x3 and x4) AS and2
FROM (
SELECT
nullIf(toUInt8(number % 3), 2) AS x1,
nullIf(toUInt8(number / 3 % 3), 2) AS x2,
nullIf(toUInt8(number / 9 % 3), 2) AS x3,
nullIf(toUInt8(number / 27 % 3), 2) AS x4
FROM numbers(81)
)
WHERE
(xor1 != xor2 OR (xor1 is NULL) != (xor2 is NULL)) OR
(or1 != or2 OR (or1 is NULL) != (or2 is NULL) OR (and1 != and2 OR (and1 is NULL) != (and2 is NULL)))
;


-- Test ternary logic over multiple batches of columns (currently batch spans over 10 columns)
SELECT
-- x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11,
xor(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS xor1,
xor(x1, xor(xor(xor(x2, x3), xor(x4, x5)), xor(xor(x6, x7), xor(x8, xor(x9, xor(x10, x11)))))) AS xor2,

or(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS or1,
or(x1, or(or(or(x2, x3), or(x4, x5)), or(or(x6, x7), or(x8, or(x9, or(x10, x11)))))) AS or2,

and(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) AS and1,
and(x1, and((x2 and x3) and (x4 and x5), (x6 and x7) and (x8 and (x9 and (x10 and x11))))) AS and2
FROM (
SELECT
nullIf(toUInt8(number % 3), 2) AS x1,
nullIf(toUInt8(number / 3 % 3), 2) AS x2,
nullIf(toUInt8(number / 9 % 3), 2) AS x3,
nullIf(toUInt8(number / 27 % 3), 2) AS x4,
nullIf(toUInt8(number / 81 % 3), 2) AS x5,
nullIf(toUInt8(number / 243 % 3), 2) AS x6,
nullIf(toUInt8(number / 729 % 3), 2) AS x7,
nullIf(toUInt8(number / 2187 % 3), 2) AS x8,
nullIf(toUInt8(number / 6561 % 3), 2) AS x9,
nullIf(toUInt8(number / 19683 % 3), 2) AS x10,
nullIf(toUInt8(number / 59049 % 3), 2) AS x11
FROM numbers(177147)
)
WHERE
(xor1 != xor2 OR (xor1 is NULL) != (xor2 is NULL)) OR
(or1 != or2 OR (or1 is NULL) != (or2 is NULL) OR (and1 != and2 OR (and1 is NULL) != (and2 is NULL)))
;


SELECT 'OK';