Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions be/cmake/thirdparty.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ add_thirdparty(curl)
add_thirdparty(lz4)
add_thirdparty(thrift)
add_thirdparty(thriftnb)
add_thirdparty(crc32c)

add_thirdparty(libevent_core LIBNAME "lib/libevent_core.a")
add_thirdparty(libevent_openssl LIBNAME "lib/libevent_openssl.a")
Expand Down
4 changes: 2 additions & 2 deletions be/src/exprs/block_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class BlockBloomFilter {
// Same as above with convenience of hashing the key.
void insert(const StringRef& key) noexcept {
if (key.data) {
insert(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed));
insert(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed));
}
}

Expand Down Expand Up @@ -105,7 +105,7 @@ class BlockBloomFilter {
// Same as above with convenience of hashing the key.
bool find(const StringRef& key) const noexcept {
if (key.data) {
return find(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed));
return find(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed));
}
return false;
}
Expand Down
254 changes: 11 additions & 243 deletions be/src/util/crc32c.cpp

Large diffs are not rendered by default.

23 changes: 3 additions & 20 deletions be/src/util/crc32c.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,22 @@
// specific language governing permissions and limitations
// under the License.

// the following code are modified from RocksDB:
// https://github.com/facebook/rocksdb/blob/master/util/crc32c.h

#pragma once

#include <stddef.h>
#include <stdint.h>

#include <vector>

#include "util/slice.h"

namespace doris {
namespace crc32c {

// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the
// crc32c of some string A. Extend() is often used to maintain the
// crc32c of a stream of data.
extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
uint32_t Extend(uint32_t crc, const char* data, size_t n);

// Return the crc32c of data[0,n-1]
inline uint32_t Value(const char* data, size_t n) {
return Extend(0, data, n);
}
uint32_t Value(const char* data, size_t n);

// Return the crc32c of data content in all slices
inline uint32_t Value(const std::vector<Slice>& slices) {
uint32_t crc = 0;
for (auto& slice : slices) {
crc = Extend(crc, slice.get_data(), slice.get_size());
}
return crc;
}
uint32_t Value(const std::vector<Slice>& slices);

} // namespace crc32c
} // namespace doris
15 changes: 9 additions & 6 deletions be/src/util/hash_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "common/compiler_util.h" // IWYU pragma: keep
#include "util/cpu_info.h"
#include "util/crc32c.h"
#include "util/hash/city.h"
#include "util/murmur_hash3.h"
#include "util/sse_util.hpp"
Expand All @@ -49,7 +50,12 @@ class HashUtil {
return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
}

#if defined(__SSE4_2__) || defined(__aarch64__)
// ATTN: crc32c's result is different with zlib_crc32 coz of different polynomial
// crc32c have better performance than zlib_crc32/crc_hash
static uint32_t crc32c_hash(const void* data, uint32_t bytes, uint32_t hash) {
return crc32c::Extend(hash, static_cast<const char*>(data), bytes);
}

// Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is
// the current hash/seed value.
// This should only be called if SSE is supported.
Expand All @@ -59,6 +65,8 @@ class HashUtil {
// NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
// TODO: crc32 hashes with different seeds do not result in different hash functions.
// The resulting hashes are correlated.
// ATTN: prefer do not use this function anymore, use crc32c_hash instead
// This function is retained because it is not certain whether there are compatibility issues with historical data.
static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
return zlib_crc_hash(data, bytes, hash);
Expand Down Expand Up @@ -117,11 +125,6 @@ class HashUtil {

return converter.u64;
}
#else
static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
return zlib_crc_hash(data, bytes, hash);
}
#endif

// refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
static const uint32_t MURMUR3_32_SEED = 104729;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/column_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ class ColumnDictI32 final : public COWHelper<IColumn, ColumnDictI32> {
if (type == FieldType::OLAP_FIELD_TYPE_CHAR) {
len = strnlen(sv.data, sv.size);
}
uint32_t hash_val = HashUtil::crc_hash(sv.data, static_cast<uint32_t>(len), 0);
uint32_t hash_val = HashUtil::crc32c_hash(sv.data, static_cast<uint32_t>(len), 0);
_hash_values[code] = hash_val;
_compute_hash_value_flags[code] = 1;
return _hash_values[code];
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -4598,7 +4598,7 @@ class FunctionNgramSearch : public IFunction {

uint32_t sub_str_hash(const char* data, int32_t length) const {
constexpr static uint32_t seed = 0;
return HashUtil::crc_hash(data, length, seed);
return HashUtil::crc32c_hash(data, length, seed);
}

template <bool column_const>
Expand Down
Loading