Skip to content

Commit

Permalink
Support global query hints
Browse files Browse the repository at this point in the history
* Register local / global hint in Calcite

* Support g_ prefix for global query hint name

* Translate global hint in analyzer

* Add tests

* Apply comments #1: global hint registration

* Apply comments #2: global hint flag identification

* Apply comments #3: global hint translation

* Apply comments #4: remove unnecessary virtual keyword

* Fixup a bug on allow_gpu_hashtable build hint for overlaps join

* Fixup a bug related to a query having multiple identical subqueries

* Add global hint tests related to overlaps join hashtable

* Apply comments #5: misc cleanup
  • Loading branch information
yoonminnam authored and andrewseidl committed Nov 9, 2021
1 parent 3ed2011 commit f094fec
Show file tree
Hide file tree
Showing 10 changed files with 642 additions and 138 deletions.
2 changes: 2 additions & 0 deletions QueryEngine/DataRecycler/HashtableRecycler.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "DataRecycler.h"
#include "QueryEngine/JoinHashTable/HashJoin.h"
#include "QueryEngine/QueryHint.h"

struct QueryPlanMetaInfo {
QueryPlan query_plan_dag;
Expand All @@ -33,6 +34,7 @@ struct OverlapsHashTableMetaInfo {
struct HashtableCacheMetaInfo {
std::optional<QueryPlanMetaInfo> query_plan_meta_info;
std::optional<OverlapsHashTableMetaInfo> overlaps_meta_info;
std::optional<RegisteredQueryHint> registered_query_hint;
};

class HashtableRecycler
Expand Down
39 changes: 29 additions & 10 deletions QueryEngine/JoinHashTable/OverlapsJoinHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,11 +588,30 @@ void OverlapsJoinHashTable::reifyWithLayout(const HashType layout) {
<< query_hint.overlaps_keys_per_bin;
overlaps_target_entries_per_bin = query_hint.overlaps_keys_per_bin;
}
auto data_mgr = executor_->getDataMgr();
// we prioritize CPU when building an overlaps join hashtable, but if we have GPU and
// user-given hint is given we selectively allow GPU to build it but even if we have GPU
// but user foces to set CPU as execution device type we should not allow to use GPU for
// building it
auto allow_gpu_hashtable_build =
query_hint_.isHintRegistered(QueryHint::kOverlapsAllowGpuBuild) &&
query_hint_.overlaps_allow_gpu_build;
if (allow_gpu_hashtable_build) {
if (data_mgr->gpusPresent() &&
memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL) {
VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator";
} else {
allow_gpu_hashtable_build = false;
VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator "
"but we "
"skip it since either GPU is not presented or CPU execution mode is set";
}
}

std::vector<ColumnsForDevice> columns_per_device;
auto data_mgr = executor_->getDataMgr();
std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
if (memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL) {
if (memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL ||
allow_gpu_hashtable_build) {
for (int device_id = 0; device_id < device_count_; ++device_id) {
dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(data_mgr, device_id));
}
Expand All @@ -615,7 +634,8 @@ void OverlapsJoinHashTable::reifyWithLayout(const HashType layout) {
const auto columns_for_device =
fetchColumnsForDevice(fragments,
device_id,
memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL
memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL ||
allow_gpu_hashtable_build
? dev_buff_owners[device_id].get()
: nullptr);
columns_per_device.push_back(columns_for_device);
Expand Down Expand Up @@ -1747,15 +1767,13 @@ std::set<DecodedJoinHashBufferEntry> OverlapsJoinHashTable::toSet(

Data_Namespace::MemoryLevel OverlapsJoinHashTable::getEffectiveMemoryLevel(
const std::vector<InnerOuter>& inner_outer_pairs) const {
// always build on CPU
if (query_hint_.isHintRegistered(QueryHint::kOverlapsAllowGpuBuild) &&
query_hint_.overlaps_allow_gpu_build) {
if (this->executor_->getDataMgr()->gpusPresent() &&
memory_level_ == Data_Namespace::MemoryLevel::CPU_LEVEL) {
VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator";
return Data_Namespace::MemoryLevel::GPU_LEVEL;
}
query_hint_.overlaps_allow_gpu_build &&
this->executor_->getDataMgr()->gpusPresent() &&
memory_level_ == Data_Namespace::MemoryLevel::GPU_LEVEL) {
return Data_Namespace::MemoryLevel::GPU_LEVEL;
}
// otherwise, try to build on CPU
return Data_Namespace::MemoryLevel::CPU_LEVEL;
}

Expand Down Expand Up @@ -1812,6 +1830,7 @@ void OverlapsJoinHashTable::putHashTableOnCpuToCache(
CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
HashtableCacheMetaInfo meta_info;
meta_info.overlaps_meta_info = getOverlapsHashTableMetaInfo();
meta_info.registered_query_hint = query_hint_;
hash_table_cache_->putItemToCache(
key,
hashtable_ptr,
Expand Down
129 changes: 76 additions & 53 deletions QueryEngine/QueryHint.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef OMNISCI_QUERYHINT_H
#define OMNISCI_QUERYHINT_H

#include <algorithm>
#include <optional>

#include <boost/algorithm/string.hpp>
Expand Down Expand Up @@ -49,38 +50,46 @@ static const std::unordered_map<std::string, QueryHint> SupportedQueryHints = {
{"overlaps_no_cache", QueryHint::kOverlapsNoCache},
{"overlaps_keys_per_bin", QueryHint::kOverlapsKeysPerBin}};

struct HintIdentifier {
bool global_hint;
std::string hint_name;

HintIdentifier(bool global_hint, const std::string& hint_name)
: global_hint(global_hint), hint_name(hint_name){};
};

class ExplainedQueryHint {
// this class represents parsed query hint's specification
// our query AST analyzer translates query hint string to understandable form which we
// called "ExplainedQueryHint"
public:
ExplainedQueryHint(QueryHint hint,
bool query_hint,
bool global_hint,
bool is_marker,
bool has_kv_type_options)
: hint_(hint)
, query_hint_(query_hint)
, global_hint_(global_hint)
, is_marker_(is_marker)
, has_kv_type_options_(has_kv_type_options) {}

ExplainedQueryHint(QueryHint hint,
bool query_hint,
bool global_hint,
bool is_marker,
bool has_kv_type_options,
std::vector<std::string>& list_options)
: hint_(hint)
, query_hint_(query_hint)
, global_hint_(global_hint)
, is_marker_(is_marker)
, has_kv_type_options_(has_kv_type_options)
, list_options_(std::move(list_options)) {}

ExplainedQueryHint(QueryHint hint,
bool query_hint,
bool global_hint,
bool is_marker,
bool has_kv_type_options,
std::unordered_map<std::string, std::string>& kv_options)
: hint_(hint)
, query_hint_(query_hint)
, global_hint_(global_hint)
, is_marker_(is_marker)
, has_kv_type_options_(has_kv_type_options)
, kv_options_(std::move(kv_options)) {}
Expand All @@ -107,7 +116,7 @@ class ExplainedQueryHint {

const QueryHint getHint() const { return hint_; }

bool isQueryHint() const { return query_hint_; }
bool isGlobalHint() const { return global_hint_; }

bool hasOptions() const { return is_marker_; }

Expand All @@ -117,7 +126,7 @@ class ExplainedQueryHint {
QueryHint hint_;
// Set true if this hint affects globally
// Otherwise it just affects the node which this hint is included (aka table hint)
bool query_hint_;
bool global_hint_;
// set true if this has no extra options (neither list_options nor kv_options)
bool is_marker_;
// Set true if it is not a marker and has key-value type options
Expand All @@ -136,38 +145,64 @@ struct RegisteredQueryHint {
// registered and its detailed info such as the hint's parameter values given by user
RegisteredQueryHint()
: cpu_mode(false)
, columnar_output(g_enable_columnar_output)
, rowwise_output(!g_enable_columnar_output)
, columnar_output(false)
, rowwise_output(false)
, overlaps_bucket_threshold(std::numeric_limits<double>::max())
, overlaps_max_size(g_overlaps_max_table_size_bytes)
, overlaps_allow_gpu_build(true)
, overlaps_allow_gpu_build(false)
, overlaps_no_cache(false)
, overlaps_keys_per_bin(g_overlaps_target_entries_per_bin)
, registered_hint(QueryHint::kHintCount, false) {}

RegisteredQueryHint& operator=(const RegisteredQueryHint& other) {
cpu_mode = other.cpu_mode;
columnar_output = other.columnar_output;
rowwise_output = other.rowwise_output;
overlaps_bucket_threshold = other.overlaps_bucket_threshold;
overlaps_max_size = other.overlaps_max_size;
overlaps_allow_gpu_build = other.overlaps_allow_gpu_build;
overlaps_no_cache = other.overlaps_no_cache;
overlaps_keys_per_bin = other.overlaps_keys_per_bin;
registered_hint = other.registered_hint;
return *this;
}

RegisteredQueryHint(const RegisteredQueryHint& other) {
cpu_mode = other.cpu_mode;
columnar_output = other.columnar_output;
rowwise_output = other.rowwise_output;
overlaps_bucket_threshold = other.overlaps_bucket_threshold;
overlaps_max_size = other.overlaps_max_size;
overlaps_allow_gpu_build = other.overlaps_allow_gpu_build;
overlaps_no_cache = other.overlaps_no_cache;
overlaps_keys_per_bin = other.overlaps_keys_per_bin;
registered_hint = other.registered_hint;
RegisteredQueryHint operator||(const RegisteredQueryHint& global_hints) const {
CHECK_EQ(registered_hint.size(), global_hints.registered_hint.size());
// apply registered global hint to the local hint if necessary
// we prioritize global hint when both side of hints are enabled simultaneously
RegisteredQueryHint updated_query_hints(*this);

int num_hints = static_cast<int>(QueryHint::kHintCount);
for (int i = 0; i < num_hints; ++i) {
if (global_hints.registered_hint.at(i)) {
updated_query_hints.registered_hint.at(i) = global_hints.registered_hint[i];
switch (i) {
case static_cast<int>(QueryHint::kCpuMode): {
updated_query_hints.cpu_mode = true;
break;
}
case static_cast<int>(QueryHint::kColumnarOutput): {
updated_query_hints.columnar_output = true;
break;
}
case static_cast<int>(QueryHint::kRowwiseOutput): {
updated_query_hints.rowwise_output = true;
break;
}
case static_cast<int>(QueryHint::kOverlapsBucketThreshold): {
updated_query_hints.overlaps_bucket_threshold =
global_hints.overlaps_bucket_threshold;
break;
}
case static_cast<int>(QueryHint::kOverlapsMaxSize): {
updated_query_hints.overlaps_max_size = global_hints.overlaps_max_size;
break;
}
case static_cast<int>(QueryHint::kOverlapsAllowGpuBuild): {
updated_query_hints.overlaps_allow_gpu_build = true;
break;
}
case static_cast<int>(QueryHint::kOverlapsNoCache): {
updated_query_hints.overlaps_no_cache = true;
break;
}
case static_cast<int>(QueryHint::kOverlapsKeysPerBin): {
updated_query_hints.overlaps_keys_per_bin =
global_hints.overlaps_keys_per_bin;
break;
}
}
}
}
return updated_query_hints;
}

// general query execution
Expand All @@ -189,35 +224,23 @@ struct RegisteredQueryHint {
public:
static QueryHint translateQueryHint(const std::string& hint_name) {
const auto lowered_hint_name = boost::algorithm::to_lower_copy(hint_name);
auto it = SupportedQueryHints.find(hint_name);
if (it != SupportedQueryHints.end()) {
return it->second;
}
return QueryHint::kInvalidHint;
auto it = SupportedQueryHints.find(lowered_hint_name);
return it == SupportedQueryHints.end() ? QueryHint::kInvalidHint : it->second;
}

bool isAnyQueryHintDelivered() const {
for (auto flag : registered_hint) {
if (flag) {
return true;
}
}
return false;
const auto identity = [](const bool b) { return b; };
return std::any_of(registered_hint.begin(), registered_hint.end(), identity);
}

void registerHint(const QueryHint hint) {
const auto hint_class = static_cast<int>(hint);
if (hint_class >= 0 && hint_class < QueryHint::kHintCount) {
registered_hint[hint_class] = true;
}
registered_hint.at(hint_class) = true;
}

const bool isHintRegistered(const QueryHint hint) const {
bool isHintRegistered(const QueryHint hint) const {
const auto hint_class = static_cast<int>(hint);
if (hint_class >= 0 && hint_class < QueryHint::kHintCount) {
return registered_hint[hint_class];
}
return false;
return registered_hint.at(hint_class);
}
};

Expand Down
Loading

0 comments on commit f094fec

Please sign in to comment.