Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISSUES-8317 fix negative with bloom filter #8566

Merged
merged 2 commits into from Jan 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
79 changes: 49 additions & 30 deletions dbms/src/Interpreters/BloomFilterHash.h
Expand Up @@ -34,45 +34,64 @@ struct BloomFilterHash
15033938188484401405ULL, 18286745649494826751ULL, 6852245486148412312ULL, 8886056245089344681ULL, 10151472371158292780ULL
};

static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
template <typename FieldGetType, typename FieldType>
static UInt64 getNumberTypeHash(const Field & field)
{
WhichDataType which(data_type);
UInt64 hash = 0;
bool unexpected_type = false;
/// For negative, we should convert the type to make sure the symbol is in right place
return field.isNull() ? intHash64(0) : intHash64(ext::bit_cast<UInt64>(FieldType(field.safeGet<FieldGetType>())));
}

if (field.isNull())
static UInt64 getStringTypeHash(const Field & field)
{
if (!field.isNull())
{
if (which.isInt() || which.isUInt() || which.isEnum() || which.isDateOrDateTime() || which.isFloat())
hash = intHash64(0);
else if (which.isString())
hash = CityHash_v1_0_2::CityHash64("", 0);
else if (which.isFixedString())
{
const auto * fixed_string_type = typeid_cast<const DataTypeFixedString *>(data_type);
const std::vector<char> value(fixed_string_type->getN(), 0);
hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
else
unexpected_type = true;
const auto & value = field.safeGet<String>();
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
else if (which.isUInt() || which.isDateOrDateTime())
hash = intHash64(field.safeGet<UInt64>());
else if (which.isInt() || which.isEnum())
hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Int64>()));
else if (which.isFloat32() || which.isFloat64())
hash = intHash64(ext::bit_cast<UInt64>(field.safeGet<Float64>()));
else if (which.isString() || which.isFixedString())

return CityHash_v1_0_2::CityHash64("", 0);
}

static UInt64 getFixedStringTypeHash(const Field & field, const IDataType * type)
{
if (!field.isNull())
{
const auto & value = field.safeGet<String>();
hash = CityHash_v1_0_2::CityHash64(value.data(), value.size());
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}
else
unexpected_type = true;

if (unexpected_type)
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
const auto * fixed_string_type = typeid_cast<const DataTypeFixedString *>(type);
const std::vector<char> value(fixed_string_type->getN(), 0);
return CityHash_v1_0_2::CityHash64(value.data(), value.size());
}

static ColumnPtr hashWithField(const IDataType * data_type, const Field & field)
{
const auto & build_hash_column = [&](const UInt64 & hash) -> ColumnPtr
{
return ColumnConst::create(ColumnUInt64::create(1, hash), 1);
};


return ColumnConst::create(ColumnUInt64::create(1, hash), 1);
WhichDataType which(data_type);

if (which.isUInt8()) return build_hash_column(getNumberTypeHash<UInt64, UInt8>(field));
else if (which.isUInt16()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isUInt32()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isUInt64()) return build_hash_column(getNumberTypeHash<UInt64, UInt64>(field));
else if (which.isInt8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isInt16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isInt32()) return build_hash_column(getNumberTypeHash<Int64, Int32>(field));
else if (which.isInt64()) return build_hash_column(getNumberTypeHash<Int64, Int64>(field));
else if (which.isEnum8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isEnum16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isDate()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isDateTime()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isFloat32()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isFloat64()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isString()) return build_hash_column(getStringTypeHash(field));
else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type));
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
}

static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit)
Expand Down
@@ -0,0 +1,4 @@
-1 -1 -1 -1
-1 -1 -1 -1
-1 -1 -1 -1
-1 -1 -1 -1
@@ -0,0 +1,14 @@
SET allow_experimental_data_skipping_indices = 1;

DROP TABLE IF EXISTS test;

CREATE TABLE test (`int8` Int8, `int16` Int16, `int32` Int32, `int64` Int64, INDEX idx (`int8`, `int16`, `int32`, `int64`) TYPE bloom_filter(0.01) GRANULARITY 8192 ) ENGINE = MergeTree() ORDER BY `int8`;

INSERT INTO test VALUES (-1, -1, -1, -1);

SELECT * FROM test WHERE `int8` = -1;
SELECT * FROM test WHERE `int16` = -1;
SELECT * FROM test WHERE `int32` = -1;
SELECT * FROM test WHERE `int64` = -1;

DROP TABLE IF EXISTS test;