Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize bit operations on FixedString when one of the arguments is constant #9454

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 55 additions & 6 deletions dbms/src/Functions/FunctionBinaryArithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,65 @@ struct FixedStringOperationImpl
c[i] = Op::template apply<UInt8>(a[i], b[i]);
}

static void NO_INLINE vector_constant(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
template <bool inverted>
static void NO_INLINE vector_constant_impl(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
{
for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<UInt8>(a[i], b[i % N]);
/// These complications are needed to avoid integer division in inner loop.

/// Create a pattern of repeated values of b with at least 16 bytes,
/// so we can read 16 bytes of this repeated pattern starting from any offset inside b.
///
/// Example:
///
/// N = 6
/// ------
/// [abcdefabcdefabcdefabc]
/// ^^^^^^^^^^^^^^^^
/// 16 bytes starting from the last offset inside b.

const size_t b_repeated_size = N + 15;
UInt8 b_repeated[b_repeated_size];
for (size_t i = 0; i < b_repeated_size; ++i)
b_repeated[i] = b[i % N];

size_t b_offset = 0;
size_t b_increment = 16 % N;

/// Example:
///
/// At first iteration we copy 16 bytes at offset 0 from b_repeated:
/// [abcdefabcdefabcdefabc]
/// ^^^^^^^^^^^^^^^^
/// At second iteration we copy 16 bytes at offset 4 = 16 % 6 from b_repeated:
/// [abcdefabcdefabcdefabc]
/// ^^^^^^^^^^^^^^^^
/// At third iteration we copy 16 bytes at offset 2 = (16 * 2) % 6 from b_repeated:
/// [abcdefabcdefabcdefabc]
/// ^^^^^^^^^^^^^^^^

/// PaddedPODArray allows overflow for 15 bytes.
for (size_t i = 0; i < size; i += 16)
{
/// This loop is formed in a way to be vectorized into two SIMD mov.
for (size_t j = 0; j < 16; ++j)
c[i + j] = inverted
? Op::template apply<UInt8>(a[i + j], b_repeated[b_offset + j])
: Op::template apply<UInt8>(b_repeated[b_offset + j], a[i + j]);

b_offset += b_increment;
if (b_offset >= N) /// This condition is easily predictable.
b_offset -= N;
}
}

static void NO_INLINE constant_vector(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
static void vector_constant(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
{
for (size_t i = 0; i < size; ++i)
c[i] = Op::template apply<UInt8>(a[i % N], b[i]);
vector_constant_impl<false>(a, b, c, size, N);
}

static void constant_vector(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt8 * __restrict c, size_t size, size_t N)
{
vector_constant_impl<true>(b, a, c, size, N);
}
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
aca
acagac
aca
acagac
5 changes: 5 additions & 0 deletions dbms/tests/queries/0_stateless/01090_fixed_string_bit_ops.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
SELECT DISTINCT bitXor(materialize(toFixedString('abc', 3)), toFixedString('\x00\x01\x02', 3)) FROM numbers(10);
SELECT DISTINCT bitXor(materialize(toFixedString('abcdef', 6)), toFixedString('\x00\x01\x02\x03\x04\x05', 6)) FROM numbers(10);

SELECT DISTINCT bitXor(toFixedString('\x00\x01\x02', 3), materialize(toFixedString('abc', 3))) FROM numbers(10);
SELECT DISTINCT bitXor(toFixedString('\x00\x01\x02\x03\x04\x05', 6), materialize(toFixedString('abcdef', 6))) FROM numbers(10);