From 9406207bfacd0c3afdfe412bf87c271e935414dc Mon Sep 17 00:00:00 2001 From: "Yunfeng,Wu" Date: Mon, 25 Mar 2019 15:55:31 +0800 Subject: [PATCH 01/73] Add Elasticsearch scan reader (#802) --- be/src/http/http_client.cpp | 10 +- be/src/http/http_client.h | 7 +- be/src/util/CMakeLists.txt | 5 +- be/src/util/es_scan_reader.cpp | 132 ++++++++++++++ be/src/util/es_scan_reader.h | 73 ++++++++ be/src/util/es_scroll_parser.cpp | 78 ++++++++ be/src/util/es_scroll_parser.h | 44 +++++ be/src/util/es_scroll_query.cpp | 89 +++++++++ be/src/util/es_scroll_query.h | 48 +++++ be/test/util/CMakeLists.txt | 1 + be/test/util/es_scan_reader_test.cpp | 260 +++++++++++++++++++++++++++ 11 files changed, 741 insertions(+), 6 deletions(-) create mode 100644 be/src/util/es_scan_reader.cpp create mode 100644 be/src/util/es_scan_reader.h create mode 100644 be/src/util/es_scroll_parser.cpp create mode 100644 be/src/util/es_scroll_parser.h create mode 100644 be/src/util/es_scroll_query.cpp create mode 100644 be/src/util/es_scroll_query.h create mode 100644 be/test/util/es_scan_reader_test.cpp diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index 88c4374fe6f618..ca061cfea88a0c 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -145,12 +145,18 @@ size_t HttpClient::on_response_data(const void* data, size_t length) { // return execute(callback); // } -Status HttpClient::execute_post_request(const std::string& post_data, std::string* response) { +Status HttpClient::execute_post_request(const std::string& payload, std::string* response) { set_method(POST); - set_post_body(post_data); + set_payload(payload); return execute(response); } +Status HttpClient::execute_delete_request(const std::string& payload, std::string* response) { + set_method(DELETE); + set_payload(payload); + return execute(response); +} + Status HttpClient::execute(const std::function& callback) { _callback = &callback; auto code = curl_easy_perform(_curl); diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h index 83a27b8d63646f..d54bc680d33aaf 100644 --- a/be/src/http/http_client.h +++ b/be/src/http/http_client.h @@ -61,8 +61,7 @@ class HttpClient { curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _header_list); } - // you must set CURLOPT_POSTFIELDSIZE before CURLOPT_COPYPOSTFIELDS options, otherwise will cause request hanging up - void set_post_body(const std::string& post_body) { + void set_payload(const std::string& post_body) { curl_easy_setopt(_curl, CURLOPT_POSTFIELDSIZE, (long)post_body.length()); curl_easy_setopt(_curl, CURLOPT_COPYPOSTFIELDS, post_body.c_str()); } @@ -114,7 +113,9 @@ class HttpClient { // a file to local_path Status download(const std::string& local_path); - Status execute_post_request(const std::string& post_data, std::string* response); + Status execute_post_request(const std::string& payload, std::string* response); + + Status execute_delete_request(const std::string& payload, std::string* response); // execute a simple method, and its response is saved in response argument Status execute(std::string* response); diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 0c3884e4a65a11..e190ccfac2a788 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -74,7 +74,9 @@ add_library(Util STATIC aes_util.cpp string_util.cpp md5.cpp - frontend_helper.cpp + es_scan_reader.cpp + es_scroll_query.cpp + es_scroll_parser.cpp ) #ADD_BE_TEST(integer-array-test) @@ -88,3 +90,4 @@ add_library(Util STATIC #ADD_BE_TEST(bit-util-test) #ADD_BE_TEST(rle-test) ##ADD_BE_TEST(perf-counters-test) +##ADD_BE_TEST(es-scan-reader-test) diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp new file mode 100644 index 00000000000000..36005c84f4e5ed --- /dev/null +++ b/be/src/util/es_scan_reader.cpp @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include "es_scan_reader.h" +#include "es_scroll_query.h" +#include "common/logging.h" +#include "common/status.h" +#include + +namespace doris { +const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields"; +const std::string REQUEST_SCROLL_PATH = "_scroll"; +const std::string REQUEST_PREFERENCE_PREFIX = "&preference=shards:"; +const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; +const std::string REQUEST_SEPARATOR = "/"; +const std::string REQUEST_SCROLL_TIME = "5m"; + +ESScanReader::ESScanReader(const std::string& target, uint16_t size, const std::map& props) { + LOG(INFO) << "ESScanReader "; + _target = target; + _batch_size = size; + _index = props.at(KEY_INDEX); + _type = props.at(KEY_TYPE); + if (props.find(KEY_USER_NAME) != props.end()) { + _user_name = props.at(KEY_USER_NAME); + } + if (props.find(KEY_PASS_WORD) != props.end()){ + _passwd = props.at(KEY_PASS_WORD); + } + if (props.find(KEY_SHARDS) != props.end()) { + _shards = props.at(KEY_SHARDS); + } + if (props.find(KEY_QUERY) != props.end()) { + _query = props.at(KEY_QUERY); + } + _init_scroll_url = _target + REQUEST_SEPARATOR + _index + REQUEST_SEPARATOR + _type + "/_search?scroll=" + REQUEST_SCROLL_TIME + REQUEST_PREFERENCE_PREFIX + _shards + "&" + REUQEST_SCROLL_FILTER_PATH; + _next_scroll_url = _target + REQUEST_SEARCH_SCROLL_PATH + "?" + REUQEST_SCROLL_FILTER_PATH; + _eos = false; + _parser.set_batch_size(size); +} + +ESScanReader::~ESScanReader() { +} + +Status ESScanReader::open() { + _is_first = true; + RETURN_IF_ERROR(_network_client.init(_init_scroll_url)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_content_type("application/json"); + // phase open, we cached the first response for `get_next` phase + _network_client.execute_post_request(_query, &_cached_response); + long status = _network_client.get_http_status(); + if (status != 200) { + LOG(WARNING) << "invalid response http status for open: " << status; + return Status(_cached_response); + } + VLOG(1) << "open _cached response: " << _cached_response; + RETURN_IF_ERROR(_parser.parse(_cached_response)); + _eos = _parser.has_next(); + return Status::OK; +} + +Status ESScanReader::get_next(bool* eos, std::string* response) { + // if is first scroll request, should return the cached response + if (_is_first) { + // maybe the index or shard is empty + if (_eos) { + *eos = true; + return Status::OK; + } + _is_first = false; + *eos = _eos; + *response = _cached_response; + return Status::OK; + } + RETURN_IF_ERROR(_network_client.init(_next_scroll_url)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_content_type("application/json"); + _network_client.set_timeout_ms(5 * 1000); + RETURN_IF_ERROR(_network_client.execute_post_request(ESScrollQueryBuilder::build_next_scroll_body(_scroll_id, REQUEST_SCROLL_TIME), response)); + long status = _network_client.get_http_status(); + if (status == 404) { + LOG(WARNING) << "request scroll search failure 404[" + << ", response: " << (response->empty() ? "empty response" : *response); + return Status("No search context found for " + _scroll_id); + } + if (status != 200) { + LOG(WARNING) << "request scroll search failure[" + << "http status" << status + << ", response: " << (response->empty() ? "empty response" : *response); + if (status == 404) { + return Status("No search context found for " + _scroll_id); + } + return Status("request scroll search failure: " + (response->empty() ? "empty response" : *response)); + } + RETURN_IF_ERROR(_parser.parse(*response)); + *eos = _eos = _parser.has_next(); + return Status::OK; +} + +Status ESScanReader::close() { + std::string scratch_target = _target + REQUEST_SEARCH_SCROLL_PATH; + RETURN_IF_ERROR(_network_client.init(scratch_target)); + _network_client.set_basic_auth(_user_name, _passwd); + _network_client.set_method(DELETE); + _network_client.set_content_type("application/json"); + _network_client.set_timeout_ms(5 * 1000); + std::string response; + RETURN_IF_ERROR(_network_client.execute_delete_request(ESScrollQueryBuilder::build_clear_scroll_body(_scroll_id), &response)); + if (_network_client.get_http_status() == 200) { + return Status::OK; + } else { + return Status("es_scan_reader delete scroll context failure"); + } +} +} diff --git a/be/src/util/es_scan_reader.h b/be/src/util/es_scan_reader.h new file mode 100644 index 00000000000000..45c413e7df3d6f --- /dev/null +++ b/be/src/util/es_scan_reader.h @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include "http/http_client.h" +#include "es_scroll_parser.h" + +using std::string; + +namespace doris { + +class Status; + +class ESScanReader { + +public: + static constexpr const char* KEY_USER_NAME = "user"; + static constexpr const char* KEY_PASS_WORD = "passwd"; + static constexpr const char* KEY_INDEX = "index"; + static constexpr const char* KEY_TYPE = "type"; + static constexpr const char* KEY_SHARDS = "shards"; + static constexpr const char* KEY_QUERY = "query"; + static constexpr const char* KEY_BATCH_SIZE = "batch_size"; + ESScanReader(const std::string& target, uint16_t size, const std::map& props); + ~ESScanReader(); + + // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next + Status open(); + // invoke get_next to get next batch documents from elasticsearch + Status get_next(bool *eos, std::string* response); + // clear scroll context from elasticsearch + Status close(); + +private: + std::string _target; + std::string _user_name; + std::string _passwd; + std::string _scroll_id; + HttpClient _network_client; + std::string _index; + std::string _type; + // push down filter + std::string _query; + // elaticsearch shards to fetch document + std::string _shards; + // distinguish the first scroll phase and the following scroll + bool _is_first; + std::string _init_scroll_url; + std::string _next_scroll_url; + bool _eos; + uint16_t _batch_size; + + std::string _cached_response; + ScrollParser _parser; +}; +} + diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp new file mode 100644 index 00000000000000..bd2069df98b2f0 --- /dev/null +++ b/be/src/util/es_scroll_parser.cpp @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "es_scroll_parser.h" +#include "rapidjson/document.h" +#include "common/logging.h" +#include "common/status.h" + +namespace doris { + +const char* FIELD_SCROLL_ID = "_scroll_id"; +const char* FIELD_HITS = "hits"; +const char* FIELD_INNER_HITS = "hits"; +const char* FIELD_SOURCE = "_source"; +const char* FIELD_TOTAL = "total"; + +ScrollParser::ScrollParser() { + _eos = false; + _total = 0; +} + +ScrollParser::~ScrollParser() { +} + + +Status ScrollParser::parse(const std::string& scroll_result) { + rapidjson::Document document_node; + document_node.Parse<0>(scroll_result.c_str()); + if (!document_node.HasMember(FIELD_SCROLL_ID)) { + return Status("maybe not a scroll request"); + } + rapidjson::Value &scroll_node = document_node[FIELD_SCROLL_ID]; + _scroll_id = scroll_node.GetString(); + // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} + rapidjson::Value &outer_hits_node = document_node[FIELD_HITS]; + rapidjson::Value &total = document_node[FIELD_TOTAL]; + _total = total.GetInt(); + if (_total == 0) { + _eos = true; + return Status::OK; + } + VLOG(1) << "es_scan_reader total hits: " << _total << " documents"; + rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; + if (!inner_hits_node.IsArray()) { + return Status("invalid response from elasticsearch"); + } + _size = inner_hits_node.Size(); + if (_size < _batch_size) { + _eos = true; + } + return Status::OK; +} + +bool ScrollParser::has_next() { + return _eos; +} + +bool ScrollParser::count() { + return _size; +} + +std::string ScrollParser::get_scroll_id() { + return _scroll_id; +} +} diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h new file mode 100644 index 00000000000000..bd9dbbbac42dff --- /dev/null +++ b/be/src/util/es_scroll_parser.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once +#include + +namespace doris { + +class Status; +class ScrollParser { + +public: + ScrollParser(); + ~ScrollParser(); + std::string get_scroll_id(); + bool count(); + uint32_t total(); + Status parse(const std::string& scroll_result); + bool has_next(); + void set_batch_size(int batch_size) { + _batch_size = batch_size; + } + +private: + std::string _scroll_id; + bool _eos; + int _total; + int _size; + int _batch_size; +}; +} diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp new file mode 100644 index 00000000000000..57e936d2284801 --- /dev/null +++ b/be/src/util/es_scroll_query.cpp @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "es_scroll_query.h" +#include +#include +#include "common/logging.h" +#include "rapidjson/document.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" + +namespace doris { + +ESScrollQueryBuilder::ESScrollQueryBuilder() { + +} + +ESScrollQueryBuilder::~ESScrollQueryBuilder() { + +} + +std::string ESScrollQueryBuilder::build_next_scroll_body(const std::string& scroll_id, const std::string& scroll) { + rapidjson::Document scroll_dsl; + rapidjson::Document::AllocatorType &allocator = scroll_dsl.GetAllocator(); + scroll_dsl.SetObject(); + rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); + scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); + rapidjson::Value scroll_value(scroll.c_str(), allocator); + scroll_dsl.AddMember("scroll", scroll_value, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + scroll_dsl.Accept(writer); + return buffer.GetString(); +} +std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scroll_id) { + rapidjson::Document delete_scroll_dsl; + rapidjson::Document::AllocatorType &allocator = delete_scroll_dsl.GetAllocator(); + delete_scroll_dsl.SetObject(); + rapidjson::Value scroll_id_value(scroll_id.c_str(), allocator); + delete_scroll_dsl.AddMember("scroll_id", scroll_id_value, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + delete_scroll_dsl.Accept(writer); + return buffer.GetString(); +} + + +std::string ESScrollQueryBuilder::build() { + rapidjson::Document es_query_dsl; + rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); + es_query_dsl.SetObject(); + if (_fields.size() > 0) { + rapidjson::Value source_node(rapidjson::kArrayType); + for (auto iter = _fields.begin(); iter != _fields.end(); iter++) { + rapidjson::Value field(iter->c_str(), allocator); + source_node.PushBack(field, allocator); + } + es_query_dsl.AddMember("_source", source_node, allocator); + } + + rapidjson::Value sort_node(rapidjson::kArrayType); + rapidjson::Value field("_doc", allocator); + sort_node.PushBack(field, allocator); + es_query_dsl.AddMember("sort", sort_node, allocator); + + es_query_dsl.AddMember("size", _size, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + es_query_dsl.Accept(writer); + std::string es_query_dsl_json = buffer.GetString(); + return es_query_dsl_json; +} + +} diff --git a/be/src/util/es_scroll_query.h b/be/src/util/es_scroll_query.h new file mode 100644 index 00000000000000..766a0e09b60574 --- /dev/null +++ b/be/src/util/es_scroll_query.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#pragma once +#include +#include + +namespace doris { + +class ESScrollQueryBuilder { + +public: + ESScrollQueryBuilder(); + ~ESScrollQueryBuilder(); + // build the query DSL for elasticsearch + std::string build(); + + + void set_batch_size(uint16_t batch_size) { + _size = batch_size; + } + void set_selected_fields(const std::vector& fields) { + _fields = fields; + } + + static std::string build_next_scroll_body(const std::string& scroll_id, const std::string& scroll); + static std::string build_clear_scroll_body(const std::string& scroll_id); + +private: + std::vector _fields; + uint16_t _size; +}; +} diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index 0ac6e774d4661f..ac87fbab2f3dfb 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -38,3 +38,4 @@ ADD_BE_TEST(uid_util_test) ADD_BE_TEST(arena_test) ADD_BE_TEST(aes_util_test) ADD_BE_TEST(md5_test) +ADD_BE_TEST(es_scan_reader_test) diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp new file mode 100644 index 00000000000000..97bf654384e63c --- /dev/null +++ b/be/test/util/es_scan_reader_test.cpp @@ -0,0 +1,260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "util/es_scan_reader.h" +#include "util/es_scroll_query.h" +#include +#include "common/logging.h" +#include "http/ev_http_server.h" +#include "http/http_channel.h" +#include "http/http_handler.h" +#include "http/http_request.h" +#include "rapidjson/document.h" +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include +#include +#include + +namespace doris { + +class RestSearchAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json"); + if (req->method() == HttpMethod::POST) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + int size = 1; + if (post_doc.HasMember("size")) { + rapidjson::Value& size_value = post_doc["size"]; + size = size_value.GetInt(); + } + std::string _scroll_id(std::to_string(size)); + rapidjson::Document search_result; + rapidjson::Document::AllocatorType &allocator = search_result.GetAllocator(); + search_result.SetObject(); + rapidjson::Value scroll_id_value(_scroll_id.c_str(), allocator); + search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + rapidjson::Value inner_hits(rapidjson::kArrayType); + rapidjson::Value source_docuement(rapidjson::kObjectType); + source_docuement.AddMember("id", 1, allocator); + rapidjson::Value value_node("1", allocator); + source_docuement.AddMember("value", value_node, allocator); + inner_hits.PushBack(source_docuement, allocator); + outer_hits.AddMember("hits", inner_hits, allocator); + search_result.AddMember("hits", outer_hits, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + search_result.Accept(writer); + //send DELETE scorll post request + std::string search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, search_result_json); + } else { + std::string response = "test1"; + HttpChannel::send_reply(req, response); + } + } +}; + +class RestSearchScrollAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + if (req->method() == HttpMethod::POST) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + int size = 1; + std::string scroll_id; + if (!post_doc.HasMember("scroll_id")) { + HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); + return; + } else { + rapidjson::Value& scroll_id_value = post_doc["scroll_id"]; + scroll_id = scroll_id_value.GetString(); + int offset = atoi(scroll_id.c_str()); + if (offset > 10) { + rapidjson::Document end_search_result; + rapidjson::Document::AllocatorType &allocator = end_search_result.GetAllocator(); + end_search_result.SetObject(); + rapidjson::Value scroll_id_value("11", allocator); + end_search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + end_search_result.AddMember("hits", outer_hits, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + end_search_result.Accept(writer); + //send DELETE scorll post request + std::string end_search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, end_search_result_json); + return; + } else { + int start = offset + 1; + rapidjson::Document search_result; + rapidjson::Document::AllocatorType &allocator = search_result.GetAllocator(); + search_result.SetObject(); + rapidjson::Value scroll_id_value(std::to_string(start).c_str(), allocator); + search_result.AddMember("_scroll_id", scroll_id_value, allocator); + + rapidjson::Value outer_hits(rapidjson::kObjectType); + outer_hits.AddMember("total", 10, allocator); + rapidjson::Value inner_hits(rapidjson::kArrayType); + rapidjson::Value source_docuement(rapidjson::kObjectType); + source_docuement.AddMember("id", start, allocator); + rapidjson::Value value_node(std::to_string(start).c_str(), allocator); + source_docuement.AddMember("value", value_node, allocator); + inner_hits.PushBack(source_docuement, allocator); + outer_hits.AddMember("hits", inner_hits, allocator); + search_result.AddMember("hits", outer_hits, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + search_result.Accept(writer); + //send DELETE scorll post request + std::string search_result_json = buffer.GetString(); + HttpChannel::send_reply(req, search_result_json); + return; + } + + } + } + } +}; + +class RestClearScrollAction : public HttpHandler { +public: + void handle(HttpRequest* req) override { + std::string user; + std::string passwd; + if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { + HttpChannel::send_basic_challenge(req, "abc"); + return; + } + if (req->method() == HttpMethod::DELETE) { + std::string post_body = req->get_request_body(); + rapidjson::Document post_doc; + post_doc.Parse<0>(post_body.c_str()); + int size = 1; + std::string scroll_id; + if (!post_doc.HasMember("scroll_id")) { + HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); + return; + } else { + rapidjson::Document clear_scroll_result; + rapidjson::Document::AllocatorType &allocator = clear_scroll_result.GetAllocator(); + clear_scroll_result.SetObject(); + clear_scroll_result.AddMember("succeeded", true, allocator); + clear_scroll_result.AddMember("num_freed", 1, allocator); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + clear_scroll_result.Accept(writer); + std::string clear_scroll_result_json = buffer.GetString(); + HttpChannel::send_reply(req, clear_scroll_result_json); + return; + } + } + } +}; + +static RestSearchAction rest_search_action = RestSearchAction(); +static RestSearchScrollAction rest_search_scroll_action = RestSearchScrollAction(); +static RestClearScrollAction rest_clear_scroll_action = RestClearScrollAction(); +static EvHttpServer* mock_es_server = nullptr; + +class MockESServerTest : public testing::Test { +public: + MockESServerTest() { } + ~MockESServerTest() override { } + + static void SetUpTestCase() { + mock_es_server = new EvHttpServer(29386); + mock_es_server->register_handler(POST, "/{index}/{type}/_search", &rest_search_action); + mock_es_server->register_handler(POST, "/_search/scroll", &rest_search_scroll_action); + mock_es_server->register_handler(DELETE, "/_search/scroll", &rest_clear_scroll_action); + mock_es_server->start(); + } + + static void TearDownTestCase() { + delete mock_es_server; + } +}; + +TEST_F(MockESServerTest, workflow) { + std::string target = "http://127.0.0.1:29386"; + ESScrollQueryBuilder scroll_query_builder; + scroll_query_builder.set_batch_size(1); + std::vector fields = {"id", "value"}; + scroll_query_builder.set_selected_fields(fields); + std::map props; + props[ESScanReader::KEY_INDEX] = "tindex"; + props[ESScanReader::KEY_TYPE] = "doc"; + props[ESScanReader::KEY_USER_NAME] = "root"; + props[ESScanReader::KEY_PASS_WORD] = "root"; + props[ESScanReader::KEY_SHARDS] = "0"; + props[ESScanReader::KEY_QUERY] = scroll_query_builder.build(); + ESScanReader reader(target, 1, props); + auto st = reader.open(); + // ASSERT_TRUE(st.ok()); + bool eos = false; + while(!eos){ + std::string response; + st = reader.get_next(&eos, &response); + if(eos) { + break; + } + rapidjson::Document docuemnt_node; + docuemnt_node.Parse<0>(response.c_str()); + rapidjson::Value &scroll_node = docuemnt_node["_scroll_id"]; + std::string _scroll_id = scroll_node.GetString(); + int id = atoi(_scroll_id.c_str()); + rapidjson::Value &outer_hits_node = docuemnt_node["hits"]; + rapidjson::Value &inner_hits_node = outer_hits_node["hits"]; + rapidjson::Value &source_node = inner_hits_node[0]; + rapidjson::Value &id_node = source_node["id"]; + rapidjson::Value &value_node = source_node["value"]; + ASSERT_EQ(id, id_node.GetInt()); + std::string value = value_node.GetString(); + ASSERT_EQ(id, atoi(value.c_str())); + ASSERT_TRUE(st.ok()); + } + auto cst = reader.close(); + ASSERT_TRUE(cst.ok()); +} +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 5232bd4ca40435aa8532e89251ff3c9e0dfc4ac3 Mon Sep 17 00:00:00 2001 From: lide Date: Mon, 25 Mar 2019 16:14:12 +0800 Subject: [PATCH 02/73] Introduce ES http scan node (#807) * Palo on Es http Init version (compile passed) * Add EsQueryBuilder and adjust EsScanReader * Intruduce ES Predicate process * Fix compile issues --- be/CMakeLists.txt | 2 +- be/src/exec/CMakeLists.txt | 3 + be/src/exec/es_http_scan_node.cpp | 407 ++++++++++++++++++ be/src/exec/es_http_scan_node.h | 121 ++++++ be/src/exec/es_http_scanner.cpp | 130 ++++++ be/src/exec/es_http_scanner.h | 107 +++++ be/src/exec/es_predicate.cpp | 287 ++++++++++++ be/src/exec/es_predicate.h | 140 ++++++ be/src/exec/es_query_builder.h | 44 ++ be/src/exec/es_scan_reader.h | 66 +++ be/src/exec/exec_node.cpp | 6 + be/src/exprs/expr_context.h | 1 + .../org/apache/doris/planner/EsScanNode.java | 2 +- gensrc/thrift/PlanNodes.thrift | 3 +- 14 files changed, 1316 insertions(+), 3 deletions(-) create mode 100644 be/src/exec/es_http_scan_node.cpp create mode 100644 be/src/exec/es_http_scan_node.h create mode 100644 be/src/exec/es_http_scanner.cpp create mode 100644 be/src/exec/es_http_scanner.h create mode 100644 be/src/exec/es_predicate.cpp create mode 100644 be/src/exec/es_predicate.h create mode 100644 be/src/exec/es_query_builder.h create mode 100644 be/src/exec/es_scan_reader.h diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 152651a8119948..09a2487c2f3f93 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -284,7 +284,7 @@ set(CXX_GCC_FLAGS "-g -Wno-unused-local-typedefs") # Debug information is stored as dwarf2 to be as compatible as possible # -Werror: compile warnings should be errors when using the toolchain compiler. # Only enable for debug builds because this is what we test in pre-commit tests. -set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -ggdb") +set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -O0 -gdwarf-2") # For CMAKE_BUILD_TYPE=Release # -O3: Enable all compiler optimizations diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index f9ff83b1bb3312..14f499183cbe0f 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -63,6 +63,9 @@ set(EXEC_FILES csv_scan_node.cpp csv_scanner.cpp es_scan_node.cpp + es_http_scan_node.cpp + es_http_scanner.cpp + es_predicate.cpp spill_sort_node.cc union_node.cpp union_node_ir.cpp diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp new file mode 100644 index 00000000000000..668fdbd8e5160d --- /dev/null +++ b/be/src/exec/es_http_scan_node.cpp @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scan_node.h" + +#include +#include + +#include "common/object_pool.h" +#include "exprs/expr.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/dpp_sink_internal.h" +#include "service/backend_options.h" +#include "util/runtime_profile.h" +#include "exec/es_scan_reader.h" +#include "exec/es_predicate.h" + +namespace doris { + +EsHttpScanNode::EsHttpScanNode( + ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : + ScanNode(pool, tnode, descs), + _tuple_id(tnode.es_scan_node.tuple_id), + _runtime_state(nullptr), + _tuple_desc(nullptr), + _query_builder(nullptr), + _num_running_scanners(0), + _scan_finished(false), + _eos(false), + _max_buffered_batches(1024), + _wait_scanner_timer(nullptr) { +} + +EsHttpScanNode::~EsHttpScanNode() { +} + +Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { + RETURN_IF_ERROR(ScanNode::init(tnode)); + _properties = tnode.es_scan_node.properties; + return Status::OK; +} + +Status EsHttpScanNode::prepare(RuntimeState* state) { + VLOG_QUERY << "EsHttpScanNode prepare"; + RETURN_IF_ERROR(ScanNode::prepare(state)); + + _runtime_state = state; + _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); + if (_tuple_desc == nullptr) { + std::stringstream ss; + ss << "Failed to get tuple descriptor, _tuple_id=" << _tuple_id; + return Status(ss.str()); + } + + for (auto slot_desc : _tuple_desc->slots()) { + if (!slot_desc->is_materialized()) { + continue; + } + _column_names.push_back(slot_desc->col_name()); + } + + _wait_scanner_timer = ADD_TIMER(runtime_profile(), "WaitScannerTime"); + + return Status::OK; +} + +void EsHttpScanNode::build_predicates() { + for (int i = 0; i < _conjunct_ctxs.size(); ++i) { + std::shared_ptr predicate( + new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); + if (predicate->build_disjuncts()) { + _predicates.push_back(predicate); + _predicate_to_conjunct.push_back(i); + } + } +} + +Status EsHttpScanNode::open(RuntimeState* state) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + RETURN_IF_ERROR(ExecNode::open(state)); + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); + RETURN_IF_CANCELLED(state); + + for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { + // if conjunct is constant, compute direct and set eos = true + if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { + void* value = _conjunct_ctxs[conj_idx]->get_value(NULL); + if (value == NULL || *reinterpret_cast(value) == false) { + _eos = true; + } + } + } + + build_predicates(); + + RETURN_IF_ERROR(start_scanners()); + + return Status::OK; +} + +Status EsHttpScanNode::start_scanners() { + { + std::unique_lock l(_batch_queue_lock); + _num_running_scanners = 1; + } + _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, 0, + _scan_ranges.size()); + return Status::OK; +} + +Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, + bool* eos) { + SCOPED_TIMER(_runtime_profile->total_time_counter()); + if (state->is_cancelled()) { + std::unique_lock l(_batch_queue_lock); + if (update_status(Status::CANCELLED)) { + _queue_writer_cond.notify_all(); + } + } + + if (_eos) { + *eos = true; + return Status::OK; + } + + if (_scan_finished.load()) { + *eos = true; + return Status::OK; + } + + std::shared_ptr scanner_batch; + { + std::unique_lock l(_batch_queue_lock); + while (_process_status.ok() && + !_runtime_state->is_cancelled() && + _num_running_scanners > 0 && + _batch_queue.empty()) { + SCOPED_TIMER(_wait_scanner_timer); + _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); + } + if (!_process_status.ok()) { + // Some scanner process failed. + return _process_status; + } + if (_runtime_state->is_cancelled()) { + if (update_status(Status::CANCELLED)) { + _queue_writer_cond.notify_all(); + } + return _process_status; + } + if (!_batch_queue.empty()) { + scanner_batch = _batch_queue.front(); + _batch_queue.pop_front(); + } + } + + // All scanner has been finished, and all cached batch has been read + if (scanner_batch == nullptr) { + _scan_finished.store(true); + *eos = true; + return Status::OK; + } + + // notify one scanner + _queue_writer_cond.notify_one(); + + // get scanner's batch memory + row_batch->acquire_state(scanner_batch.get()); + _num_rows_returned += row_batch->num_rows(); + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + + // This is first time reach limit. + // Only valid when query 'select * from table1 limit 20' + if (reached_limit()) { + int num_rows_over = _num_rows_returned - _limit; + row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); + _num_rows_returned -= num_rows_over; + COUNTER_SET(_rows_returned_counter, _num_rows_returned); + + _scan_finished.store(true); + _queue_writer_cond.notify_all(); + *eos = true; + } else { + *eos = false; + } + + if (VLOG_ROW_IS_ON) { + for (int i = 0; i < row_batch->num_rows(); ++i) { + TupleRow* row = row_batch->get_row(i); + VLOG_ROW << "EsHttpScanNode output row: " + << Tuple::to_string(row->get_tuple(0), *_tuple_desc); + } + } + + return Status::OK; +} + +Status EsHttpScanNode::close(RuntimeState* state) { + if (is_closed()) { + return Status::OK; + } + RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE)); + SCOPED_TIMER(_runtime_profile->total_time_counter()); + _scan_finished.store(true); + _queue_writer_cond.notify_all(); + _queue_reader_cond.notify_all(); + for (int i = 0; i < _scanner_threads.size(); ++i) { + _scanner_threads[i].join(); + } + + _batch_queue.clear(); + + return ExecNode::close(state); +} + +// This function is called after plan node has been prepared. +Status EsHttpScanNode::set_scan_ranges(const std::vector& scan_ranges) { + _scan_ranges = scan_ranges; + return Status::OK; +} + +void EsHttpScanNode::debug_string(int ident_level, std::stringstream* out) const { + (*out) << "EsHttpScanNode"; +} + +Status EsHttpScanNode::scanner_scan( + TupleId _tuple_id, + std::map properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter) { + std::unique_ptr scanner(new EsHttpScanner( + _runtime_state, + runtime_profile(), + _tuple_id, + properties, + conjunct_ctxs, + counter)); + RETURN_IF_ERROR(scanner->open()); + bool scanner_eof = false; + + while (!scanner_eof) { + // Fill one row batch + std::shared_ptr row_batch( + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker())); + + // create new tuple buffer for row_batch + MemPool* tuple_pool = row_batch->tuple_data_pool(); + int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); + void* tuple_buffer = tuple_pool->allocate(tuple_buffer_size); + if (tuple_buffer == nullptr) { + return Status("Allocate memory for row batch failed."); + } + + Tuple* tuple = reinterpret_cast(tuple_buffer); + while (!scanner_eof) { + RETURN_IF_CANCELLED(_runtime_state); + // If we have finished all works + if (_scan_finished.load()) { + return Status::OK; + } + + // This row batch has been filled up, and break this + if (row_batch->is_full()) { + break; + } + + int row_idx = row_batch->add_row(); + TupleRow* row = row_batch->get_row(row_idx); + // scan node is the first tuple of tuple row + row->set_tuple(0, tuple); + memset(tuple, 0, _tuple_desc->num_null_bytes()); + + // Get from scanner + RETURN_IF_ERROR(scanner->get_next(tuple, tuple_pool, &scanner_eof)); + if (scanner_eof) { + continue; + } + + // eval conjuncts of this row. + if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { + row_batch->commit_last_row(); + char* new_tuple = reinterpret_cast(tuple); + new_tuple += _tuple_desc->byte_size(); + tuple = reinterpret_cast(new_tuple); + counter->num_rows_returned++; + } else { + counter->num_rows_filtered++; + } + } + + // Row batch has been filled, push this to the queue + if (row_batch->num_rows() > 0) { + std::unique_lock l(_batch_queue_lock); + while (_process_status.ok() && + !_scan_finished.load() && + !_runtime_state->is_cancelled() && + _batch_queue.size() >= _max_buffered_batches) { + _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); + } + // Process already set failed, so we just return OK + if (!_process_status.ok()) { + return Status::OK; + } + // Scan already finished, just return + if (_scan_finished.load()) { + return Status::OK; + } + // Runtime state is canceled, just return cancel + if (_runtime_state->is_cancelled()) { + return Status::CANCELLED; + } + // Queue size Must be samller than _max_buffered_batches + _batch_queue.push_back(row_batch); + + // Notify reader to + _queue_reader_cond.notify_one(); + } + } + + return Status::OK; +} + +static std::string get_host_port(const std::vector& es_hosts) { + + std::string host_port; + std::string localhost = BackendOptions::get_localhost(); + + TNetworkAddress host = es_hosts[0]; + for (auto& es_host : es_hosts) { + if (es_host.hostname == localhost) { + host = es_host; + break; + } + } + + host_port = host.hostname; + host_port += ":"; + host_port += std::to_string(host.port); + return host_port; +} + +void EsHttpScanNode::scanner_worker(int start_idx, int length) { + // Clone expr context + std::vector scanner_expr_ctxs; + auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, + &scanner_expr_ctxs); + if (!status.ok()) { + LOG(WARNING) << "Clone conjuncts failed."; + } + + EsScanCounter counter; + for (int i = 0; i < length && status.ok(); ++i) { + const TEsScanRange& es_scan_range = + _scan_ranges[start_idx + i].scan_range.es_scan_range; + + _properties[EsScanReader::INDEX] = es_scan_range.index; + if (es_scan_range.__isset.type) { + _properties[EsScanReader::TYPE] = es_scan_range.type; + } + _properties[EsScanReader::SHARD_ID] = std::to_string(es_scan_range.shard_id); + _properties[EsScanReader::BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); + _properties[EsScanReader::HOST] = get_host_port(es_scan_range.es_hosts); + _properties[EsScanReader::QUERY] = EsQueryBuilder::build(_properties, _column_names, _predicates); + + status = scanner_scan(_tuple_id, _properties, scanner_expr_ctxs, &counter); + if (!status.ok()) { + LOG(WARNING) << "Scanner[" << start_idx + i << "] prcess failed. status=" + << status.get_error_msg(); + } + } + + // Update stats + _runtime_state->update_num_rows_load_success(counter.num_rows_returned); + _runtime_state->update_num_rows_load_filtered(counter.num_rows_filtered); + + // scanner is going to finish + { + std::lock_guard l(_batch_queue_lock); + if (!status.ok()) { + update_status(status); + } + // This scanner will finish + _num_running_scanners--; + } + _queue_reader_cond.notify_all(); + // If one scanner failed, others don't need scan any more + if (!status.ok()) { + _queue_writer_cond.notify_all(); + } + Expr::close(scanner_expr_ctxs, _runtime_state); +} +} diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h new file mode 100644 index 00000000000000..3b5658d4d17524 --- /dev/null +++ b/be/src/exec/es_http_scan_node.h @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_HTTP_SCAN_NODE_H +#define BE_EXEC_ES_HTTP_SCAN_NODE_H + +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "exec/scan_node.h" +#include "exec/es_http_scanner.h" +#include "exec/es_query_builder.h" +#include "gen_cpp/PaloInternalService_types.h" + +namespace doris { + +class RuntimeState; +class PartRangeKey; +class PartitionInfo; +class EsHttpScanCounter; +class EsPredicate; + +class EsHttpScanNode : public ScanNode { +public: + EsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + virtual ~EsHttpScanNode(); + + virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; + + virtual Status prepare(RuntimeState* state) override; + + virtual Status open(RuntimeState* state) override; + + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; + + virtual Status close(RuntimeState* state) override; + + virtual Status set_scan_ranges(const std::vector& scan_ranges) override; + +protected: + // Write debug string of this into out. + virtual void debug_string(int indentation_level, std::stringstream* out) const override; + +private: + // Update process status to one failed status, + // NOTE: Must hold the mutex of this scan node + bool update_status(const Status& new_status) { + if (_process_status.ok()) { + _process_status = new_status; + return true; + } + return false; + } + + // Create scanners to do scan job + Status start_scanners(); + + // One scanner worker, This scanner will hanle 'length' ranges start from start_idx + void scanner_worker(int start_idx, int length); + + // Scan one range + Status scanner_scan(TupleId _tuple_id, + std::map properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter); + +private: + + void build_predicates(); + + TupleId _tuple_id; + RuntimeState* _runtime_state; + TupleDescriptor* _tuple_desc; + std::unique_ptr _query_builder; + + int _num_running_scanners; + std::atomic _scan_finished; + bool _eos; + int _max_buffered_batches; + RuntimeProfile::Counter* _wait_scanner_timer; + + bool _all_scanners_finished; + Status _process_status; + + std::vector _scanner_threads; + std::map _properties; + std::vector _scan_ranges; + std::vector _column_names; + + std::mutex _batch_queue_lock; + std::condition_variable _queue_reader_cond; + std::condition_variable _queue_writer_cond; + std::deque> _batch_queue; + std::vector> _predicates; + + std::vector _predicate_to_conjunct; +}; + +} + +#endif diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp new file mode 100644 index 00000000000000..a44f44297fdcd9 --- /dev/null +++ b/be/src/exec/es_http_scanner.cpp @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scanner.h" + +#include +#include + +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "runtime/mem_tracker.h" +#include "runtime/raw_value.h" +#include "runtime/tuple.h" +#include "exprs/expr.h" +#include "exec/es_scan_reader.h" +#include "exec/text_converter.h" +#include "exec/text_converter.hpp" + +namespace doris { + +EsHttpScanner::EsHttpScanner( + RuntimeState* state, + RuntimeProfile* profile, + TupleId tuple_id, + std::map properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter) : + _state(state), + _profile(profile), + _tuple_id(tuple_id), + _properties(properties), + _conjunct_ctxs(conjunct_ctxs), + _next_range(0), + _line_eof(false), +#if BE_TEST + _mem_tracker(new MemTracker()), + _mem_pool(_mem_tracker.get()), +#else + _mem_tracker(new MemTracker(-1, "EsHttp Scanner", state->instance_mem_tracker())), + _mem_pool(_state->instance_mem_tracker()), +#endif + _tuple_desc(nullptr), + _counter(counter), + _es_reader(nullptr), + _rows_read_counter(nullptr), + _read_timer(nullptr), + _materialize_timer(nullptr) { +} + +EsHttpScanner::~EsHttpScanner() { + close(); +} + +Status EsHttpScanner::open() { + _tuple_desc = _state->desc_tbl().get_tuple_descriptor(_tuple_id); + if (_tuple_desc == nullptr) { + std::stringstream ss; + ss << "Unknown tuple descriptor, tuple_id=" << _tuple_id; + return Status(ss.str()); + } + + for (auto slot : _tuple_desc->slots()) { + auto pair = _slots_map.emplace(slot->col_name(), slot); + if (!pair.second) { + std::stringstream ss; + ss << "Failed to insert slot, col_name=" << slot->col_name(); + return Status(ss.str()); + } + } + + const std::string& host = _properties.at(EsScanReader::HOST); + _es_reader.reset(new EsScanReader(host, _properties)); + if (_es_reader == nullptr) { + return Status("Es reader construct failed."); + } + + _es_reader->open(); + + //_text_converter.reset(new(std::nothrow) TextConverter('\\')); + //if (_text_converter == nullptr) { + // return Status("No memory error."); + //} + + _rows_read_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); + _read_timer = ADD_TIMER(_profile, "TotalRawReadTime(*)"); + _materialize_timer = ADD_TIMER(_profile, "MaterializeTupleTime(*)"); + + return Status::OK; +} + +Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { + SCOPED_TIMER(_read_timer); + while (!eof) { + std::string batch_row_buffer; + if (_line_eof) { + //RETURN_IF_ERROR(_es_reader->get_next(&eof, &batch_row_buffer)); + } + //get_next_line(&batch_row_buffer); + { + COUNTER_UPDATE(_rows_read_counter, 1); + SCOPED_TIMER(_materialize_timer); + //if (convert_one_row(Slice(ptr, size), tuple, tuple_pool)) { + // break; + //} + } + } + return Status::OK; +} + +void EsHttpScanner::close() { + if (_es_reader != nullptr) { + _es_reader->close(); + } +} + +} diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h new file mode 100644 index 00000000000000..850db04e79a569 --- /dev/null +++ b/be/src/exec/es_http_scanner.h @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_HTTP_SCANNER_H +#define BE_EXEC_ES_HTTP_SCANNER_H + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "common/global_types.h" +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" +#include "runtime/mem_pool.h" +#include "util/slice.h" +#include "util/runtime_profile.h" + +namespace doris { + +class Tuple; +class SlotDescriptor; +class Slice; +class RuntimeState; +class ExprContext; +class TextConverter; +class TupleDescriptor; +class TupleRow; +class RowDescriptor; +class MemTracker; +class RuntimeProfile; +class EsScanReader; + +struct EsScanCounter { + EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) { + } + + int64_t num_rows_returned; + int64_t num_rows_filtered; +}; + +class EsHttpScanner { +public: + EsHttpScanner( + RuntimeState* state, + RuntimeProfile* profile, + TupleId tuple_id, + std::map properties, + const std::vector& conjunct_ctxs, + EsScanCounter* counter); + ~EsHttpScanner(); + + Status open(); + + Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof); + + void close(); + +private: + + RuntimeState* _state; + RuntimeProfile* _profile; + TupleId _tuple_id; + const std::map& _properties; + const std::vector& _conjunct_ctxs; + + std::unique_ptr _text_converter; + + int _next_range; + bool _line_eof; + + std::vector _slot_descs; + std::unique_ptr _row_desc; + + std::unique_ptr _mem_tracker; + MemPool _mem_pool; + + const TupleDescriptor* _tuple_desc; + EsScanCounter* _counter; + std::unique_ptr _es_reader; + std::map _slots_map; + + // Profile + RuntimeProfile::Counter* _rows_read_counter; + RuntimeProfile::Counter* _read_timer; + RuntimeProfile::Counter* _materialize_timer; +}; + +} + +#endif diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp new file mode 100644 index 00000000000000..73d6073956fcd9 --- /dev/null +++ b/be/src/exec/es_predicate.cpp @@ -0,0 +1,287 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_predicate.h" + +#include +#include +#include +#include + +#include "common/status.h" +#include "common/logging.h" +#include "exprs/expr.h" +#include "exprs/expr_context.h" +#include "exprs/in_predicate.h" + +#include "gen_cpp/PlanNodes_types.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "runtime/client_cache.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" + +#include "service/backend_options.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" + +namespace doris { + +using namespace std; + +EsPredicate::EsPredicate(ExprContext* conjunct_ctx, + const TupleDescriptor* tuple_desc) : + _context(conjunct_ctx), + _disjuncts_num(0), + _tuple_desc(tuple_desc) { +} + +EsPredicate::~EsPredicate() { +} + +bool EsPredicate::build_disjuncts() { + return build_disjuncts(_context->root(), _disjuncts); +} + +vector EsPredicate::get_predicate_list(){ + return _disjuncts; +} + +bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjuncts) { + if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { + if (conjunct->children().size() != 2) { + VLOG(1) << "get disjuncts fail: number of childs is not 2"; + return false; + } + + SlotRef* slotRef = nullptr; + TExprOpcode::type op; + Expr* expr = nullptr; + if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { + expr = conjunct->get_child(1); + slotRef = (SlotRef*)(conjunct->get_child(0)); + op = conjunct->op(); + } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { + expr = conjunct->get_child(0); + slotRef = (SlotRef*)(conjunct->get_child(1)); + op = conjunct->op(); + } else { + VLOG(1) << "get disjuncts fail: no SLOT_REF child"; + return false; + } + + SlotDescriptor* slot_desc = get_slot_desc(slotRef); + if (slot_desc == nullptr) { + VLOG(1) << "get disjuncts fail: slot_desc is null"; + return false; + } + + TExtLiteral literal; + if (!to_ext_literal(_context, expr, &literal)) { + VLOG(1) << "get disjuncts fail: can't get literal, node_type=" + << expr->node_type(); + return false; + } + + std::unique_ptr predicate(new ExtBinaryPredicate( + TExprNodeType::BINARY_PRED, + slot_desc->col_name(), + slot_desc->type(), + op, + literal)); + + disjuncts.emplace_back(std::move(*predicate)); + return true; + } + + if (is_match_func(conjunct)) { + TExtLiteral literal; + if (!to_ext_literal(_context, conjunct->get_child(1), &literal)) { + VLOG(1) << "get disjuncts fail: can't get literal, node_type=" + << conjunct->get_child(1)->node_type(); + return false; + } + + vector query_conditions; + query_conditions.push_back(std::move(literal)); + vector cols; //TODO + + std::unique_ptr predicate(new ExtFunction( + TExprNodeType::FUNCTION_CALL, + conjunct->fn().name.function_name, + cols, + query_conditions)); + disjuncts.emplace_back(std::move(*predicate)); + + return true; + } + + if (TExprNodeType::IN_PRED == conjunct->node_type()) { + TExtInPredicate ext_in_predicate; + vector in_pred_values; + InPredicate* pred = dynamic_cast(conjunct); + ext_in_predicate.__set_is_not_in(pred->is_not_in()); + if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { + return false; + } + + SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0)); + SlotDescriptor* slot_desc = get_slot_desc(slot_ref); + if (slot_desc == nullptr) { + return false; + } + + for (int i = 1; i < pred->children().size(); ++i) { + // varchar, string, all of them are string type, but varchar != string + // TODO add date, datetime support? + if (pred->get_child(0)->type().is_string_type()) { + if (!pred->get_child(i)->type().is_string_type()) { + return false; + } + } else { + if (pred->get_child(i)->type().type != pred->get_child(0)->type().type) { + return false; + } + } + TExtLiteral literal; + if (!to_ext_literal(_context, pred->get_child(i), &literal)) { + VLOG(1) << "get disjuncts fail: can't get literal, node_type=" + << pred->get_child(i)->node_type(); + return false; + } + in_pred_values.push_back(literal); + } + + std::unique_ptr predicate(new ExtInPredicate( + TExprNodeType::IN_PRED, + slot_desc->col_name(), + slot_desc->type(), + in_pred_values)); + + disjuncts.emplace_back(std::move(*predicate)); + + return true; + } + + if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) { + if (TExprOpcode::COMPOUND_OR != conjunct->op()) { + VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR"; + return false; + } + if (!build_disjuncts(conjunct->get_child(0), disjuncts)) { + return false; + } + if (!build_disjuncts(conjunct->get_child(1), disjuncts)) { + return false; + } + + return true; + } + + // if go to here, report error + VLOG(1) << "get disjuncts fail: node type is " << conjunct->node_type() + << ", should be BINARY_PRED or COMPOUND_PRED"; + return false; +} + +bool EsPredicate::is_match_func(Expr* conjunct) { + if (TExprNodeType::FUNCTION_CALL == conjunct->node_type() + && conjunct->fn().name.function_name == "esquery") { + return true; + } + return false; +} + +SlotDescriptor* EsPredicate::get_slot_desc(SlotRef* slotRef) { + std::vector slot_ids; + slotRef->get_slot_ids(&slot_ids); + SlotDescriptor* slot_desc = nullptr; + for (SlotDescriptor* slot : _tuple_desc->slots()) { + if (slot->id() == slot_ids[0]) { + slot_desc = slot; + break; + } + } + return slot_desc; +} + +bool EsPredicate::to_ext_literal(ExprContext* _context, Expr* expr, TExtLiteral* literal) { + literal->__set_node_type(expr->node_type()); + switch (expr->node_type()) { + case TExprNodeType::BOOL_LITERAL: { + TBoolLiteral bool_literal; + void* value = _context->get_value(expr, NULL); + bool_literal.__set_value(*reinterpret_cast(value)); + literal->__set_bool_literal(bool_literal); + return true; + } + case TExprNodeType::DATE_LITERAL: { + void* value = _context->get_value(expr, NULL); + DateTimeValue date_value = *reinterpret_cast(value); + char str[MAX_DTVALUE_STR_LEN]; + date_value.to_string(str); + TDateLiteral date_literal; + date_literal.__set_value(str); + literal->__set_date_literal(date_literal); + return true; + } + case TExprNodeType::FLOAT_LITERAL: { + TFloatLiteral float_literal; + void* value = _context->get_value(expr, NULL); + float_literal.__set_value(*reinterpret_cast(value)); + literal->__set_float_literal(float_literal); + return true; + } + case TExprNodeType::INT_LITERAL: { + TIntLiteral int_literal; + void* value = _context->get_value(expr, NULL); + int_literal.__set_value(*reinterpret_cast(value)); + literal->__set_int_literal(int_literal); + return true; + } + case TExprNodeType::STRING_LITERAL: { + TStringLiteral string_literal; + void* value = _context->get_value(expr, NULL); + string_literal.__set_value(*reinterpret_cast(value)); + literal->__set_string_literal(string_literal); + return true; + } + case TExprNodeType::DECIMAL_LITERAL: { + TDecimalLiteral decimal_literal; + void* value = _context->get_value(expr, NULL); + decimal_literal.__set_value(reinterpret_cast(value)->to_string()); + literal->__set_decimal_literal(decimal_literal); + return true; + } + case TExprNodeType::LARGE_INT_LITERAL: { + char buf[48]; + int len = 48; + void* value = _context->get_value(expr, NULL); + char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len); + TLargeIntLiteral large_int_literal; + large_int_literal.__set_value(v); + literal->__set_large_int_literal(large_int_literal); + return true; + } + default: + return false; + } +} + +} diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h new file mode 100644 index 00000000000000..ee328a69c7bdf6 --- /dev/null +++ b/be/src/exec/es_predicate.h @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BE_EXEC_ES_PREDICATE_H +#define BE_EXEC_ES_PREDICATE_H + +#include +#include + +#include "exprs/slot_ref.h" +#include "gen_cpp/Exprs_types.h" +#include "gen_cpp/Opcodes_types.h" +#include "gen_cpp/PaloExternalDataSourceService_types.h" +#include "runtime/descriptors.h" +#include "runtime/tuple.h" + +namespace doris { + +class Status; +class ExprContext; +class ExtBinaryPredicate; + +struct ExtPredicate { + ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { + } + + TExprNodeType::type node_type; +}; + +struct ExtColumnDesc { + ExtColumnDesc(std::string name, TypeDescriptor type) : + name(name), + type(type) { + } + + std::string name; + TypeDescriptor type; +}; + +struct ExtBinaryPredicate : public ExtPredicate { + ExtBinaryPredicate( + TExprNodeType::type node_type, + std::string name, + TypeDescriptor type, + TExprOpcode::type op, + TExtLiteral value) : + ExtPredicate(node_type), + col(name, type), + op(op), + value(value) { + } + + ExtColumnDesc col; + TExprOpcode::type op; + TExtLiteral value; +}; + +struct ExtInPredicate : public ExtPredicate { + ExtInPredicate( + TExprNodeType::type node_type, + std::string name, + TypeDescriptor type, + vector values) : + ExtPredicate(node_type), + is_not_in(false), + col(name, type), + values(values) { + } + + bool is_not_in; + ExtColumnDesc col; + vector values; +}; + +struct ExtLikePredicate : public ExtPredicate { + ExtColumnDesc col; + TExtLiteral value; +}; + +struct ExtIsNullPredicate : public ExtPredicate { + bool is_not_null; + ExtColumnDesc col; +}; + +struct ExtFunction : public ExtPredicate { + ExtFunction( + TExprNodeType::type node_type, + string func_name, + vector cols, + vector values) : + ExtPredicate(node_type), + func_name(func_name), + cols(cols), + values(values) { + } + + string func_name; + vector cols; + vector values; +}; + +class EsPredicate { + + public: + EsPredicate(ExprContext* conjunct_ctx, + const TupleDescriptor* tuple_desc); + ~EsPredicate(); + vector get_predicate_list(); + bool build_disjuncts(); + + private: + + bool build_disjuncts(Expr* conjunct, vector& disjuncts); + bool to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal); + bool is_match_func(Expr* conjunct); + SlotDescriptor* get_slot_desc(SlotRef* slotRef); + + ExprContext* _context; + int _disjuncts_num; + const TupleDescriptor* _tuple_desc; + vector _disjuncts; +}; + +} + +#endif diff --git a/be/src/exec/es_query_builder.h b/be/src/exec/es_query_builder.h new file mode 100644 index 00000000000000..8d61a0ea4eb17a --- /dev/null +++ b/be/src/exec/es_query_builder.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include + +#include "common/status.h" + +namespace doris { + +class EsPredicate; + +class EsQueryBuilder { +public: + EsQueryBuilder() {}; + ~EsQueryBuilder() {}; + + static std::string build(const std::map& properties, + const std::vector& columns, + std::vector>) { + return std::string("xxx"); + } +}; + +} + diff --git a/be/src/exec/es_scan_reader.h b/be/src/exec/es_scan_reader.h new file mode 100644 index 00000000000000..fd5516d7c0e6de --- /dev/null +++ b/be/src/exec/es_scan_reader.h @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include + +#include "common/status.h" + +namespace doris { + +class EsScanReader { +public: + constexpr static const char* HOST = "host"; + constexpr static const char* INDEX = "index"; + constexpr static const char* TYPE = "type"; + constexpr static const char* SHARD_ID = "shard_id"; + constexpr static const char* BATCH_SIZE = "batch_size"; + constexpr static const char* QUERY = "query"; + + EsScanReader(const std::string& target, + const std::map& properties) : + _target(target), + _properties(properties), + _eof(false) { + } + + ~EsScanReader() {}; + + Status open() { return Status::OK; } + + Status get_next(bool* eof, std::string* buf) { + const char* json = "{\"_scroll_id\": \"DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAA1ewWbEhKNHRWX1NTNG04bERuV05RUlA5Zw==\",\"hits\": {\"total\": 10,\"hits\": [{\"_source\": {\"id\": 1}},{\"_source\": {\"id\": 2}}]}}"; + buf->append(json); + *eof = true; + return Status::OK; + } + + void close() {}; + +private: + + const std::string& _target; + const std::map& _properties; + bool _eof; +}; + +} + diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 679d42c21d9249..c934cf5fff20f7 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -31,6 +31,7 @@ #include "exec/new_partitioned_aggregation_node.h" #include "exec/csv_scan_node.h" #include "exec/es_scan_node.h" +#include "exec/es_http_scan_node.h" #include "exec/pre_aggregation_node.h" #include "exec/hash_join_node.h" #include "exec/broker_scan_node.h" @@ -366,6 +367,10 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN *node = pool->add(new EsScanNode(pool, tnode, descs)); return Status::OK; + case TPlanNodeType::ES_HTTP_SCAN_NODE: + *node = pool->add(new EsHttpScanNode(pool, tnode, descs)); + return Status::OK; + case TPlanNodeType::SCHEMA_SCAN_NODE: *node = pool->add(new SchemaScanNode(pool, tnode, descs)); return Status::OK; @@ -515,6 +520,7 @@ void ExecNode::collect_scan_nodes(vector* nodes) { collect_nodes(TPlanNodeType::OLAP_SCAN_NODE, nodes); collect_nodes(TPlanNodeType::BROKER_SCAN_NODE, nodes); collect_nodes(TPlanNodeType::ES_SCAN_NODE, nodes); + collect_nodes(TPlanNodeType::ES_HTTP_SCAN_NODE, nodes); } void ExecNode::init_runtime_profile(const std::string& name) { diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index cbf2b6ea991134..de57638857be3f 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -176,6 +176,7 @@ class ExprContext { friend class InPredicate; friend class OlapScanNode; friend class EsScanNode; + friend class EsPredicate; /// FunctionContexts for each registered expression. The FunctionContexts are created /// and owned by this ExprContext. diff --git a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java index 790b976ebb0d44..38b0f3296135d4 100644 --- a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -128,7 +128,7 @@ public void finalize(Analyzer analyzer) throws UserException { @Override protected void toThrift(TPlanNode msg) { - msg.node_type = TPlanNodeType.ES_SCAN_NODE; + msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; Map properties = Maps.newHashMap(); properties.put(EsTable.USER, table.getUserName()); properties.put(EsTable.PASSWORD, table.getPasswd()); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 69476511499d69..67fbef8f807116 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -43,7 +43,8 @@ enum TPlanNodeType { BROKER_SCAN_NODE, EMPTY_SET_NODE, UNION_NODE, - ES_SCAN_NODE + ES_SCAN_NODE, + ES_HTTP_SCAN_NODE } // phases of an execution node From 88ff41f99043aec9898d3e8f8cf379632e59cde5 Mon Sep 17 00:00:00 2001 From: "Yunfeng,Wu" Date: Mon, 25 Mar 2019 19:34:18 +0800 Subject: [PATCH 03/73] Moidfy scan interface (#809) --- be/src/util/es_scan_reader.cpp | 12 ++++---- be/src/util/es_scan_reader.h | 7 +++-- be/src/util/es_scroll_query.cpp | 44 +++++++++++++++++++++++----- be/src/util/es_scroll_query.h | 26 +++++++++------- be/test/util/es_scan_reader_test.cpp | 11 ++++--- 5 files changed, 66 insertions(+), 34 deletions(-) diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index 36005c84f4e5ed..097ea5d865cebb 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -31,10 +31,8 @@ const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; const std::string REQUEST_SEPARATOR = "/"; const std::string REQUEST_SCROLL_TIME = "5m"; -ESScanReader::ESScanReader(const std::string& target, uint16_t size, const std::map& props) { - LOG(INFO) << "ESScanReader "; +ESScanReader::ESScanReader(const std::string& target, const std::map& props) { _target = target; - _batch_size = size; _index = props.at(KEY_INDEX); _type = props.at(KEY_TYPE); if (props.find(KEY_USER_NAME) != props.end()) { @@ -43,16 +41,18 @@ ESScanReader::ESScanReader(const std::string& target, uint16_t size, const std:: if (props.find(KEY_PASS_WORD) != props.end()){ _passwd = props.at(KEY_PASS_WORD); } - if (props.find(KEY_SHARDS) != props.end()) { - _shards = props.at(KEY_SHARDS); + if (props.find(KEY_SHARD) != props.end()) { + _shards = props.at(KEY_SHARD); } if (props.find(KEY_QUERY) != props.end()) { _query = props.at(KEY_QUERY); } + std::string batch_size_str = props.at(KEY_BATCH_SIZE); + _batch_size = atoi(batch_size_str.c_str()); _init_scroll_url = _target + REQUEST_SEPARATOR + _index + REQUEST_SEPARATOR + _type + "/_search?scroll=" + REQUEST_SCROLL_TIME + REQUEST_PREFERENCE_PREFIX + _shards + "&" + REUQEST_SCROLL_FILTER_PATH; _next_scroll_url = _target + REQUEST_SEARCH_SCROLL_PATH + "?" + REUQEST_SCROLL_FILTER_PATH; _eos = false; - _parser.set_batch_size(size); + _parser.set_batch_size(_batch_size); } ESScanReader::~ESScanReader() { diff --git a/be/src/util/es_scan_reader.h b/be/src/util/es_scan_reader.h index 45c413e7df3d6f..8b2d13776635af 100644 --- a/be/src/util/es_scan_reader.h +++ b/be/src/util/es_scan_reader.h @@ -32,12 +32,13 @@ class ESScanReader { public: static constexpr const char* KEY_USER_NAME = "user"; static constexpr const char* KEY_PASS_WORD = "passwd"; + static constexpr const char* KEY_HOST_PORT = "host_port"; static constexpr const char* KEY_INDEX = "index"; static constexpr const char* KEY_TYPE = "type"; - static constexpr const char* KEY_SHARDS = "shards"; + static constexpr const char* KEY_SHARD = "shard_id"; static constexpr const char* KEY_QUERY = "query"; static constexpr const char* KEY_BATCH_SIZE = "batch_size"; - ESScanReader(const std::string& target, uint16_t size, const std::map& props); + ESScanReader(const std::string& target, const std::map& props); ~ESScanReader(); // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next @@ -64,7 +65,7 @@ class ESScanReader { std::string _init_scroll_url; std::string _next_scroll_url; bool _eos; - uint16_t _batch_size; + int _batch_size; std::string _cached_response; ScrollParser _parser; diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index 57e936d2284801..2948ec31881148 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -22,7 +22,7 @@ #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" - +#include "util/es_scan_reader.h" namespace doris { ESScrollQueryBuilder::ESScrollQueryBuilder() { @@ -58,32 +58,60 @@ std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scr return buffer.GetString(); } - -std::string ESScrollQueryBuilder::build() { +std::string ESScrollQueryBuilder::build(const std::map& properties, + const std::vector& fields, + std::vector> predicates) { rapidjson::Document es_query_dsl; rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); es_query_dsl.SetObject(); - if (_fields.size() > 0) { + if (fields.size() > 0) { rapidjson::Value source_node(rapidjson::kArrayType); - for (auto iter = _fields.begin(); iter != _fields.end(); iter++) { + for (auto iter = fields.begin(); iter != fields.end(); iter++) { rapidjson::Value field(iter->c_str(), allocator); source_node.PushBack(field, allocator); } es_query_dsl.AddMember("_source", source_node, allocator); } - + int size = atoi(properties.at(ESScanReader::BATCH_SIZE).c_str()); rapidjson::Value sort_node(rapidjson::kArrayType); rapidjson::Value field("_doc", allocator); sort_node.PushBack(field, allocator); es_query_dsl.AddMember("sort", sort_node, allocator); - es_query_dsl.AddMember("size", _size, allocator); + es_query_dsl.AddMember("size", size, allocator); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); es_query_dsl.Accept(writer); std::string es_query_dsl_json = buffer.GetString(); - return es_query_dsl_json; + return es_query_dsl_json; + } +// std::string ESScrollQueryBuilder::build() { +// rapidjson::Document es_query_dsl; +// rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); +// es_query_dsl.SetObject(); +// if (_fields.size() > 0) { +// rapidjson::Value source_node(rapidjson::kArrayType); +// for (auto iter = _fields.begin(); iter != _fields.end(); iter++) { +// rapidjson::Value field(iter->c_str(), allocator); +// source_node.PushBack(field, allocator); +// } +// es_query_dsl.AddMember("_source", source_node, allocator); +// } + +// rapidjson::Value sort_node(rapidjson::kArrayType); +// rapidjson::Value field("_doc", allocator); +// sort_node.PushBack(field, allocator); +// es_query_dsl.AddMember("sort", sort_node, allocator); + +// es_query_dsl.AddMember("size", _size, allocator); + +// rapidjson::StringBuffer buffer; +// rapidjson::Writer writer(buffer); +// es_query_dsl.Accept(writer); +// std::string es_query_dsl_json = buffer.GetString(); +// return es_query_dsl_json; +// } } diff --git a/be/src/util/es_scroll_query.h b/be/src/util/es_scroll_query.h index 766a0e09b60574..f30378d30c3b1e 100644 --- a/be/src/util/es_scroll_query.h +++ b/be/src/util/es_scroll_query.h @@ -19,6 +19,7 @@ #pragma once #include #include +#include "exec/es_predicate.h" namespace doris { @@ -28,21 +29,24 @@ class ESScrollQueryBuilder { ESScrollQueryBuilder(); ~ESScrollQueryBuilder(); // build the query DSL for elasticsearch - std::string build(); + // std::string build(); - void set_batch_size(uint16_t batch_size) { - _size = batch_size; - } - void set_selected_fields(const std::vector& fields) { - _fields = fields; - } + // void set_batch_size(uint16_t batch_size) { + // _size = batch_size; + // } + // void set_selected_fields(const std::vector& fields) { + // _fields = fields; + // } static std::string build_next_scroll_body(const std::string& scroll_id, const std::string& scroll); static std::string build_clear_scroll_body(const std::string& scroll_id); - -private: - std::vector _fields; - uint16_t _size; + static std::string build(const std::map& properties, + const std::vector& fields, + std::vector>); +// private: +// std::vector _fields; +// uint16_t _size; +// }; }; } diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 97bf654384e63c..7b1df013a19ddd 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -213,18 +213,17 @@ class MockESServerTest : public testing::Test { TEST_F(MockESServerTest, workflow) { std::string target = "http://127.0.0.1:29386"; - ESScrollQueryBuilder scroll_query_builder; - scroll_query_builder.set_batch_size(1); std::vector fields = {"id", "value"}; - scroll_query_builder.set_selected_fields(fields); std::map props; props[ESScanReader::KEY_INDEX] = "tindex"; props[ESScanReader::KEY_TYPE] = "doc"; props[ESScanReader::KEY_USER_NAME] = "root"; props[ESScanReader::KEY_PASS_WORD] = "root"; - props[ESScanReader::KEY_SHARDS] = "0"; - props[ESScanReader::KEY_QUERY] = scroll_query_builder.build(); - ESScanReader reader(target, 1, props); + props[ESScanReader::KEY_SHARD] = "0"; + props[ESScanReader::KEY_BATCH_SIZE] = "1"; + std::vector> predicates; + props[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(props, fields, predicates); + ESScanReader reader(target, props); auto st = reader.open(); // ASSERT_TRUE(st.ok()); bool eos = false; From 473739f651161a10911b09d27eb8c2f9cd43bb80 Mon Sep 17 00:00:00 2001 From: "Yunfeng,Wu" Date: Mon, 25 Mar 2019 20:24:09 +0800 Subject: [PATCH 04/73] Bug-fix for some wrong logic (#810) --- be/src/util/es_scroll_query.cpp | 2 +- be/test/util/es_scan_reader_test.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index 2948ec31881148..abdb9f2a72463c 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -72,7 +72,7 @@ std::string ESScrollQueryBuilder::build(const std::map } es_query_dsl.AddMember("_source", source_node, allocator); } - int size = atoi(properties.at(ESScanReader::BATCH_SIZE).c_str()); + int size = atoi(properties.at(ESScanReader::KEY_BATCH_SIZE).c_str()); rapidjson::Value sort_node(rapidjson::kArrayType); rapidjson::Value field("_doc", allocator); sort_node.PushBack(field, allocator); diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 7b1df013a19ddd..2b5d7f84b34b81 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -94,7 +94,6 @@ class RestSearchScrollAction : public HttpHandler { std::string post_body = req->get_request_body(); rapidjson::Document post_doc; post_doc.Parse<0>(post_body.c_str()); - int size = 1; std::string scroll_id; if (!post_doc.HasMember("scroll_id")) { HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); @@ -166,7 +165,6 @@ class RestClearScrollAction : public HttpHandler { std::string post_body = req->get_request_body(); rapidjson::Document post_doc; post_doc.Parse<0>(post_body.c_str()); - int size = 1; std::string scroll_id; if (!post_doc.HasMember("scroll_id")) { HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); From 40891a5cf221866c758226796dc5e7350325f99a Mon Sep 17 00:00:00 2001 From: lide Date: Tue, 26 Mar 2019 10:32:15 +0800 Subject: [PATCH 05/73] Introduce ExtLiteral and revise interface with reader (#813) --- be/src/exec/es_http_scan_node.cpp | 24 ++++--- be/src/exec/es_http_scan_node.h | 4 +- be/src/exec/es_http_scanner.cpp | 5 +- be/src/exec/es_http_scanner.h | 4 +- be/src/exec/es_predicate.cpp | 111 ++++++------------------------ be/src/exec/es_predicate.h | 28 +++++--- be/src/exec/es_query_builder.h | 44 ------------ be/src/exec/es_scan_reader.h | 66 ------------------ 8 files changed, 56 insertions(+), 230 deletions(-) delete mode 100644 be/src/exec/es_query_builder.h delete mode 100644 be/src/exec/es_scan_reader.h diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 668fdbd8e5160d..3ca1f838709427 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -27,7 +27,8 @@ #include "runtime/dpp_sink_internal.h" #include "service/backend_options.h" #include "util/runtime_profile.h" -#include "exec/es_scan_reader.h" +#include "util/es_scan_reader.h" +#include "util/es_scroll_query.h" #include "exec/es_predicate.h" namespace doris { @@ -38,7 +39,6 @@ EsHttpScanNode::EsHttpScanNode( _tuple_id(tnode.es_scan_node.tuple_id), _runtime_state(nullptr), _tuple_desc(nullptr), - _query_builder(nullptr), _num_running_scanners(0), _scan_finished(false), _eos(false), @@ -79,11 +79,11 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { return Status::OK; } -void EsHttpScanNode::build_predicates() { +void EsHttpScanNode::build_conjuncts_list() { for (int i = 0; i < _conjunct_ctxs.size(); ++i) { std::shared_ptr predicate( new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); - if (predicate->build_disjuncts()) { + if (predicate->build_disjuncts_list()) { _predicates.push_back(predicate); _predicate_to_conjunct.push_back(i); } @@ -106,7 +106,7 @@ Status EsHttpScanNode::open(RuntimeState* state) { } } - build_predicates(); + build_conjuncts_list(); RETURN_IF_ERROR(start_scanners()); @@ -368,14 +368,16 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { const TEsScanRange& es_scan_range = _scan_ranges[start_idx + i].scan_range.es_scan_range; - _properties[EsScanReader::INDEX] = es_scan_range.index; + _properties[ESScanReader::KEY_INDEX] = es_scan_range.index; if (es_scan_range.__isset.type) { - _properties[EsScanReader::TYPE] = es_scan_range.type; + _properties[ESScanReader::KEY_TYPE] = es_scan_range.type; } - _properties[EsScanReader::SHARD_ID] = std::to_string(es_scan_range.shard_id); - _properties[EsScanReader::BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); - _properties[EsScanReader::HOST] = get_host_port(es_scan_range.es_hosts); - _properties[EsScanReader::QUERY] = EsQueryBuilder::build(_properties, _column_names, _predicates); + + _properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); + _properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); + _properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); + _properties[ESScanReader::KEY_QUERY] + = ESScrollQueryBuilder::build(_properties, _column_names, _predicates); status = scanner_scan(_tuple_id, _properties, scanner_expr_ctxs, &counter); if (!status.ok()) { diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 3b5658d4d17524..c9d077ee4ab1ae 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -29,7 +29,6 @@ #include "common/status.h" #include "exec/scan_node.h" #include "exec/es_http_scanner.h" -#include "exec/es_query_builder.h" #include "gen_cpp/PaloInternalService_types.h" namespace doris { @@ -86,12 +85,11 @@ class EsHttpScanNode : public ScanNode { private: - void build_predicates(); + void build_conjuncts_list(); TupleId _tuple_id; RuntimeState* _runtime_state; TupleDescriptor* _tuple_desc; - std::unique_ptr _query_builder; int _num_running_scanners; std::atomic _scan_finished; diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index a44f44297fdcd9..bbf0e14ed2b49c 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -26,7 +26,6 @@ #include "runtime/raw_value.h" #include "runtime/tuple.h" #include "exprs/expr.h" -#include "exec/es_scan_reader.h" #include "exec/text_converter.h" #include "exec/text_converter.hpp" @@ -82,8 +81,8 @@ Status EsHttpScanner::open() { } } - const std::string& host = _properties.at(EsScanReader::HOST); - _es_reader.reset(new EsScanReader(host, _properties)); + const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); + _es_reader.reset(new ESScanReader(host, _properties)); if (_es_reader == nullptr) { return Status("Es reader construct failed."); } diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 850db04e79a569..9db167cbce4e0b 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -30,6 +30,7 @@ #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" #include "util/slice.h" +#include "util/es_scan_reader.h" #include "util/runtime_profile.h" namespace doris { @@ -45,7 +46,6 @@ class TupleRow; class RowDescriptor; class MemTracker; class RuntimeProfile; -class EsScanReader; struct EsScanCounter { EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) { @@ -93,7 +93,7 @@ class EsHttpScanner { const TupleDescriptor* _tuple_desc; EsScanCounter* _counter; - std::unique_ptr _es_reader; + std::unique_ptr _es_reader; std::map _slots_map; // Profile diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 73d6073956fcd9..fb35ae3e7d53f9 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -55,15 +55,15 @@ EsPredicate::EsPredicate(ExprContext* conjunct_ctx, EsPredicate::~EsPredicate() { } -bool EsPredicate::build_disjuncts() { - return build_disjuncts(_context->root(), _disjuncts); +bool EsPredicate::build_disjuncts_list() { + return build_disjuncts_list(_context->root(), _disjuncts); } vector EsPredicate::get_predicate_list(){ return _disjuncts; } -bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjuncts) { +bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; @@ -92,34 +92,28 @@ bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjunct return false; } - TExtLiteral literal; - if (!to_ext_literal(_context, expr, &literal)) { - VLOG(1) << "get disjuncts fail: can't get literal, node_type=" - << expr->node_type(); - return false; - } + std::shared_ptr literal(new ExtLiteral(expr->node_type())); + literal->value = _context->get_value(expr, NULL); std::unique_ptr predicate(new ExtBinaryPredicate( TExprNodeType::BINARY_PRED, slot_desc->col_name(), slot_desc->type(), op, - literal)); + *literal)); disjuncts.emplace_back(std::move(*predicate)); return true; } if (is_match_func(conjunct)) { - TExtLiteral literal; - if (!to_ext_literal(_context, conjunct->get_child(1), &literal)) { - VLOG(1) << "get disjuncts fail: can't get literal, node_type=" - << conjunct->get_child(1)->node_type(); - return false; - } - vector query_conditions; - query_conditions.push_back(std::move(literal)); + Expr* expr = conjunct->get_child(1); + std::shared_ptr literal(new ExtLiteral(expr->node_type())); + literal->value = _context->get_value(expr, NULL); + + vector query_conditions; + query_conditions.push_back(std::move(*literal)); vector cols; //TODO std::unique_ptr predicate(new ExtFunction( @@ -134,7 +128,7 @@ bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjunct if (TExprNodeType::IN_PRED == conjunct->node_type()) { TExtInPredicate ext_in_predicate; - vector in_pred_values; + vector in_pred_values; InPredicate* pred = dynamic_cast(conjunct); ext_in_predicate.__set_is_not_in(pred->is_not_in()); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { @@ -159,13 +153,11 @@ bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjunct return false; } } - TExtLiteral literal; - if (!to_ext_literal(_context, pred->get_child(i), &literal)) { - VLOG(1) << "get disjuncts fail: can't get literal, node_type=" - << pred->get_child(i)->node_type(); - return false; - } - in_pred_values.push_back(literal); + + Expr* expr = conjunct->get_child(i); + std::shared_ptr literal(new ExtLiteral(expr->node_type())); + literal->value = _context->get_value(expr, NULL); + in_pred_values.push_back(*literal); } std::unique_ptr predicate(new ExtInPredicate( @@ -184,10 +176,10 @@ bool EsPredicate::build_disjuncts(Expr* conjunct, vector& disjunct VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR"; return false; } - if (!build_disjuncts(conjunct->get_child(0), disjuncts)) { + if (!build_disjuncts_list(conjunct->get_child(0), disjuncts)) { return false; } - if (!build_disjuncts(conjunct->get_child(1), disjuncts)) { + if (!build_disjuncts_list(conjunct->get_child(1), disjuncts)) { return false; } @@ -221,67 +213,4 @@ SlotDescriptor* EsPredicate::get_slot_desc(SlotRef* slotRef) { return slot_desc; } -bool EsPredicate::to_ext_literal(ExprContext* _context, Expr* expr, TExtLiteral* literal) { - literal->__set_node_type(expr->node_type()); - switch (expr->node_type()) { - case TExprNodeType::BOOL_LITERAL: { - TBoolLiteral bool_literal; - void* value = _context->get_value(expr, NULL); - bool_literal.__set_value(*reinterpret_cast(value)); - literal->__set_bool_literal(bool_literal); - return true; - } - case TExprNodeType::DATE_LITERAL: { - void* value = _context->get_value(expr, NULL); - DateTimeValue date_value = *reinterpret_cast(value); - char str[MAX_DTVALUE_STR_LEN]; - date_value.to_string(str); - TDateLiteral date_literal; - date_literal.__set_value(str); - literal->__set_date_literal(date_literal); - return true; - } - case TExprNodeType::FLOAT_LITERAL: { - TFloatLiteral float_literal; - void* value = _context->get_value(expr, NULL); - float_literal.__set_value(*reinterpret_cast(value)); - literal->__set_float_literal(float_literal); - return true; - } - case TExprNodeType::INT_LITERAL: { - TIntLiteral int_literal; - void* value = _context->get_value(expr, NULL); - int_literal.__set_value(*reinterpret_cast(value)); - literal->__set_int_literal(int_literal); - return true; - } - case TExprNodeType::STRING_LITERAL: { - TStringLiteral string_literal; - void* value = _context->get_value(expr, NULL); - string_literal.__set_value(*reinterpret_cast(value)); - literal->__set_string_literal(string_literal); - return true; - } - case TExprNodeType::DECIMAL_LITERAL: { - TDecimalLiteral decimal_literal; - void* value = _context->get_value(expr, NULL); - decimal_literal.__set_value(reinterpret_cast(value)->to_string()); - literal->__set_decimal_literal(decimal_literal); - return true; - } - case TExprNodeType::LARGE_INT_LITERAL: { - char buf[48]; - int len = 48; - void* value = _context->get_value(expr, NULL); - char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len); - TLargeIntLiteral large_int_literal; - large_int_literal.__set_value(v); - literal->__set_large_int_literal(large_int_literal); - return true; - } - default: - return false; - } -} - } diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index ee328a69c7bdf6..8c603b21e6be0f 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -34,6 +34,7 @@ class Status; class ExprContext; class ExtBinaryPredicate; + struct ExtPredicate { ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { } @@ -41,6 +42,14 @@ struct ExtPredicate { TExprNodeType::type node_type; }; +struct ExtLiteral : public ExtPredicate { + ExtLiteral(TExprNodeType::type node_type) : + ExtPredicate(node_type) { + } + + void *value; +}; + struct ExtColumnDesc { ExtColumnDesc(std::string name, TypeDescriptor type) : name(name), @@ -57,7 +66,7 @@ struct ExtBinaryPredicate : public ExtPredicate { std::string name, TypeDescriptor type, TExprOpcode::type op, - TExtLiteral value) : + ExtLiteral value) : ExtPredicate(node_type), col(name, type), op(op), @@ -66,7 +75,7 @@ struct ExtBinaryPredicate : public ExtPredicate { ExtColumnDesc col; TExprOpcode::type op; - TExtLiteral value; + ExtLiteral value; }; struct ExtInPredicate : public ExtPredicate { @@ -74,7 +83,7 @@ struct ExtInPredicate : public ExtPredicate { TExprNodeType::type node_type, std::string name, TypeDescriptor type, - vector values) : + vector values) : ExtPredicate(node_type), is_not_in(false), col(name, type), @@ -83,12 +92,12 @@ struct ExtInPredicate : public ExtPredicate { bool is_not_in; ExtColumnDesc col; - vector values; + vector values; }; struct ExtLikePredicate : public ExtPredicate { ExtColumnDesc col; - TExtLiteral value; + ExtLiteral value; }; struct ExtIsNullPredicate : public ExtPredicate { @@ -101,7 +110,7 @@ struct ExtFunction : public ExtPredicate { TExprNodeType::type node_type, string func_name, vector cols, - vector values) : + vector values) : ExtPredicate(node_type), func_name(func_name), cols(cols), @@ -110,7 +119,7 @@ struct ExtFunction : public ExtPredicate { string func_name; vector cols; - vector values; + vector values; }; class EsPredicate { @@ -120,12 +129,11 @@ class EsPredicate { const TupleDescriptor* tuple_desc); ~EsPredicate(); vector get_predicate_list(); - bool build_disjuncts(); + bool build_disjuncts_list(); private: - bool build_disjuncts(Expr* conjunct, vector& disjuncts); - bool to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal); + bool build_disjuncts_list(Expr* conjunct, vector& disjuncts); bool is_match_func(Expr* conjunct); SlotDescriptor* get_slot_desc(SlotRef* slotRef); diff --git a/be/src/exec/es_query_builder.h b/be/src/exec/es_query_builder.h deleted file mode 100644 index 8d61a0ea4eb17a..00000000000000 --- a/be/src/exec/es_query_builder.h +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include -#include - -#include "common/status.h" - -namespace doris { - -class EsPredicate; - -class EsQueryBuilder { -public: - EsQueryBuilder() {}; - ~EsQueryBuilder() {}; - - static std::string build(const std::map& properties, - const std::vector& columns, - std::vector>) { - return std::string("xxx"); - } -}; - -} - diff --git a/be/src/exec/es_scan_reader.h b/be/src/exec/es_scan_reader.h deleted file mode 100644 index fd5516d7c0e6de..00000000000000 --- a/be/src/exec/es_scan_reader.h +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include -#include - -#include "common/status.h" - -namespace doris { - -class EsScanReader { -public: - constexpr static const char* HOST = "host"; - constexpr static const char* INDEX = "index"; - constexpr static const char* TYPE = "type"; - constexpr static const char* SHARD_ID = "shard_id"; - constexpr static const char* BATCH_SIZE = "batch_size"; - constexpr static const char* QUERY = "query"; - - EsScanReader(const std::string& target, - const std::map& properties) : - _target(target), - _properties(properties), - _eof(false) { - } - - ~EsScanReader() {}; - - Status open() { return Status::OK; } - - Status get_next(bool* eof, std::string* buf) { - const char* json = "{\"_scroll_id\": \"DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAA1ewWbEhKNHRWX1NTNG04bERuV05RUlA5Zw==\",\"hits\": {\"total\": 10,\"hits\": [{\"_source\": {\"id\": 1}},{\"_source\": {\"id\": 2}}]}}"; - buf->append(json); - *eof = true; - return Status::OK; - } - - void close() {}; - -private: - - const std::string& _target; - const std::map& _properties; - bool _eof; -}; - -} - From 519472e3b0dd3c504dc23670ac941bda6116ba1e Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 26 Mar 2019 17:43:37 +0800 Subject: [PATCH 06/73] Modify ESScanReader::get_next and ScrollParser --- be/src/exec/es_http_scanner.cpp | 51 +++++++++++++++--- be/src/exec/es_http_scanner.h | 6 ++- be/src/util/es_scan_reader.cpp | 80 +++++++++++++++++----------- be/src/util/es_scan_reader.h | 5 +- be/src/util/es_scroll_parser.cpp | 62 ++++++++++++--------- be/src/util/es_scroll_parser.h | 29 +++++----- be/test/util/es_scan_reader_test.cpp | 32 +++++------ 7 files changed, 170 insertions(+), 95 deletions(-) diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index bbf0e14ed2b49c..183f89cc2b919e 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -55,6 +55,7 @@ EsHttpScanner::EsHttpScanner( _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), + _tuple_row(nullptr), _rows_read_counter(nullptr), _read_timer(nullptr), _materialize_timer(nullptr) { @@ -81,6 +82,10 @@ Status EsHttpScanner::open() { } } + Tuple* tuple = (Tuple*) _mem_pool.allocate(_tuple_desc->byte_size()); + _tuple_row = (TupleRow*) _mem_pool.allocate(sizeof(Tuple*)); + _tuple_row->set_tuple(0, tuple); + const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); _es_reader.reset(new ESScanReader(host, _properties)); if (_es_reader == nullptr) { @@ -101,20 +106,54 @@ Status EsHttpScanner::open() { return Status::OK; } +bool EsHttpScanner::fill_tuple(const char* ptr, size_t size, + Tuple* tuple, MemPool* mem_pool) { + //int ctx_idx = 0; + for (auto slot_desc : _tuple_desc->slots()) { + if (!slot_desc->is_materialized()) { + continue; + } + // ExprContext* ctx = _dest_expr_ctx[ctx_idx++]; + // void* value = ctx->get_value(_tuple_row); + // if (value == nullptr) { + // if (slot_desc->is_nullable()) { + // tuple->set_null(slot_desc->null_indicator_offset()); + // continue; + // } else { + // std::stringstream error_msg; + // error_msg << "column(" << slot_desc->col_name() << ") value is null"; + // _state->append_error_msg_to_file( + // std::string(ptr, size), error_msg.str()); + // _counter->num_rows_filtered++; + // return false; + // } + // } + // tuple->set_not_null(slot_desc->null_indicator_offset()); + // void* slot = tuple->get_slot(slot_desc->tuple_offset()); + // RawValue::write(value, slot, slot_desc->type(), mem_pool); + } + return true; +} + Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { SCOPED_TIMER(_read_timer); while (!eof) { - std::string batch_row_buffer; + ScrollParser* parser = nullptr; if (_line_eof) { - //RETURN_IF_ERROR(_es_reader->get_next(&eof, &batch_row_buffer)); + RETURN_IF_ERROR(_es_reader->get_next(eof, &parser)); + } + const char* ptr = nullptr; + size_t size = 0; + RETURN_IF_ERROR(parser->read_next_line(&ptr, &size, &_line_eof)); + if (size == 0) { + continue; } - //get_next_line(&batch_row_buffer); { COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - //if (convert_one_row(Slice(ptr, size), tuple, tuple_pool)) { - // break; - //} + if (fill_tuple(ptr, size, tuple, tuple_pool)) { + break; + } } } return Status::OK; diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 9db167cbce4e0b..bd438cf0c81754 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -29,7 +29,6 @@ #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" -#include "util/slice.h" #include "util/es_scan_reader.h" #include "util/runtime_profile.h" @@ -37,7 +36,6 @@ namespace doris { class Tuple; class SlotDescriptor; -class Slice; class RuntimeState; class ExprContext; class TextConverter; @@ -74,6 +72,9 @@ class EsHttpScanner { private: + bool fill_tuple(const char* ptr, size_t size, + Tuple* tuple, MemPool* mem_pool); + RuntimeState* _state; RuntimeProfile* _profile; TupleId _tuple_id; @@ -95,6 +96,7 @@ class EsHttpScanner { EsScanCounter* _counter; std::unique_ptr _es_reader; std::map _slots_map; + TupleRow* _tuple_row; // Profile RuntimeProfile::Counter* _rows_read_counter; diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index 097ea5d865cebb..17c2549dc92ff8 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -52,7 +52,6 @@ ESScanReader::ESScanReader(const std::string& target, const std::mapempty() ? "empty response" : *response); - return Status("No search context found for " + _scroll_id); + + scroll_parser = ScrollParser::parse_from_string(response); + + // maybe the index or shard is empty + if (scroll_parser == nullptr || scroll_parser->get_total() == 0) { + _eos = *scan_eos = true; + *parser = nullptr; + return Status::OK; } - if (status != 200) { - LOG(WARNING) << "request scroll search failure[" - << "http status" << status - << ", response: " << (response->empty() ? "empty response" : *response); - if (status == 404) { - return Status("No search context found for " + _scroll_id); - } - return Status("request scroll search failure: " + (response->empty() ? "empty response" : *response)); + + if (scroll_parser->get_size() < _batch_size) { + _eos = true; + *scan_eos = false; + } else { + _eos = *scan_eos = false; } - RETURN_IF_ERROR(_parser.parse(*response)); - *eos = _eos = _parser.has_next(); + + *parser = scroll_parser; return Status::OK; } diff --git a/be/src/util/es_scan_reader.h b/be/src/util/es_scan_reader.h index 8b2d13776635af..6579cf6fbbb64d 100644 --- a/be/src/util/es_scan_reader.h +++ b/be/src/util/es_scan_reader.h @@ -19,7 +19,7 @@ #include #include "http/http_client.h" -#include "es_scroll_parser.h" +#include "util/es_scroll_parser.h" using std::string; @@ -44,7 +44,7 @@ class ESScanReader { // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next Status open(); // invoke get_next to get next batch documents from elasticsearch - Status get_next(bool *eos, std::string* response); + Status get_next(bool *eos, ScrollParser** parser); // clear scroll context from elasticsearch Status close(); @@ -68,7 +68,6 @@ class ESScanReader { int _batch_size; std::string _cached_response; - ScrollParser _parser; }; } diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index bd2069df98b2f0..42a28b060bd083 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -14,8 +14,9 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + #include "es_scroll_parser.h" -#include "rapidjson/document.h" + #include "common/logging.h" #include "common/status.h" @@ -27,52 +28,65 @@ const char* FIELD_INNER_HITS = "hits"; const char* FIELD_SOURCE = "_source"; const char* FIELD_TOTAL = "total"; -ScrollParser::ScrollParser() { - _eos = false; - _total = 0; +ScrollParser::ScrollParser(const std::string& scroll_id, int total, int size) : + _scroll_id(scroll_id), + _total(total), + _size(size) { } ScrollParser::~ScrollParser() { } -Status ScrollParser::parse(const std::string& scroll_result) { +ScrollParser* ScrollParser::parse_from_string(const std::string& scroll_result) { + ScrollParser* scroll_parser = nullptr; rapidjson::Document document_node; document_node.Parse<0>(scroll_result.c_str()); + if (!document_node.HasMember(FIELD_SCROLL_ID)) { - return Status("maybe not a scroll request"); + LOG(ERROR) << "maybe not a scroll request"; + return nullptr; } + rapidjson::Value &scroll_node = document_node[FIELD_SCROLL_ID]; - _scroll_id = scroll_node.GetString(); + std::string scroll_id = scroll_node.GetString(); // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} rapidjson::Value &outer_hits_node = document_node[FIELD_HITS]; - rapidjson::Value &total = document_node[FIELD_TOTAL]; - _total = total.GetInt(); - if (_total == 0) { - _eos = true; - return Status::OK; + rapidjson::Value &field_total = document_node[FIELD_TOTAL]; + int total = field_total.GetInt(); + if (total == 0) { + scroll_parser = new ScrollParser(scroll_id, total, 0); + return scroll_parser; } - VLOG(1) << "es_scan_reader total hits: " << _total << " documents"; + + VLOG(1) << "es_scan_reader total hits: " << total << " documents"; rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; if (!inner_hits_node.IsArray()) { - return Status("invalid response from elasticsearch"); - } - _size = inner_hits_node.Size(); - if (_size < _batch_size) { - _eos = true; + LOG(ERROR) << "maybe not a scroll request"; + return nullptr; } - return Status::OK; -} -bool ScrollParser::has_next() { - return _eos; + int size = inner_hits_node.Size(); + scroll_parser = new ScrollParser(scroll_id, total, size); + return scroll_parser; } -bool ScrollParser::count() { +int ScrollParser::get_size() { return _size; } -std::string ScrollParser::get_scroll_id() { +const std::string& ScrollParser::get_scroll_id() { return _scroll_id; } + +int ScrollParser::get_total() { + return _total; +} + + +Status ScrollParser::read_next_line(const char** ptr, size_t* size, bool* line_eof) { + *line_eof = true; + return Status::OK; +} + } diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index bd9dbbbac42dff..f74c677dee5b76 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -14,31 +14,36 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + #pragma once + #include +#include "rapidjson/document.h" namespace doris { class Status; + class ScrollParser { public: - ScrollParser(); + ScrollParser(const std::string& scroll_id, int total, int size); ~ScrollParser(); - std::string get_scroll_id(); - bool count(); - uint32_t total(); - Status parse(const std::string& scroll_result); - bool has_next(); - void set_batch_size(int batch_size) { - _batch_size = batch_size; - } + + static ScrollParser* parse_from_string(const std::string& scroll_result); + + Status read_next_line(const char** ptr, size_t* size, bool* line_eof); + + const std::string& get_scroll_id(); + int get_total(); + int get_size(); private: - std::string _scroll_id; - bool _eos; + + const std::string& _scroll_id; int _total; int _size; - int _batch_size; + + //const rapidjson::Value& _inner_hits_node; }; } diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 2b5d7f84b34b81..f0159efc924d2f 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -226,25 +226,25 @@ TEST_F(MockESServerTest, workflow) { // ASSERT_TRUE(st.ok()); bool eos = false; while(!eos){ - std::string response; - st = reader.get_next(&eos, &response); + ScrollParser* parser = nullptr; + st = reader.get_next(&eos, &parser); if(eos) { break; } - rapidjson::Document docuemnt_node; - docuemnt_node.Parse<0>(response.c_str()); - rapidjson::Value &scroll_node = docuemnt_node["_scroll_id"]; - std::string _scroll_id = scroll_node.GetString(); - int id = atoi(_scroll_id.c_str()); - rapidjson::Value &outer_hits_node = docuemnt_node["hits"]; - rapidjson::Value &inner_hits_node = outer_hits_node["hits"]; - rapidjson::Value &source_node = inner_hits_node[0]; - rapidjson::Value &id_node = source_node["id"]; - rapidjson::Value &value_node = source_node["value"]; - ASSERT_EQ(id, id_node.GetInt()); - std::string value = value_node.GetString(); - ASSERT_EQ(id, atoi(value.c_str())); - ASSERT_TRUE(st.ok()); + //rapidjson::Document docuemnt_node; + //docuemnt_node.Parse<0>(response.c_str()); + //rapidjson::Value &scroll_node = docuemnt_node["_scroll_id"]; + //std::string _scroll_id = scroll_node.GetString(); + //int id = atoi(_scroll_id.c_str()); + //rapidjson::Value &outer_hits_node = docuemnt_node["hits"]; + //rapidjson::Value &inner_hits_node = outer_hits_node["hits"]; + //rapidjson::Value &source_node = inner_hits_node[0]; + //rapidjson::Value &id_node = source_node["id"]; + //rapidjson::Value &value_node = source_node["value"]; + //ASSERT_EQ(id, id_node.GetInt()); + //std::string value = value_node.GetString(); + //ASSERT_EQ(id, atoi(value.c_str())); + //ASSERT_TRUE(st.ok()); } auto cst = reader.close(); ASSERT_TRUE(cst.ok()); From e893d36f916a4d4df973a754ee913a21d535f3c3 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 26 Mar 2019 22:16:31 +0800 Subject: [PATCH 07/73] Add fill_tuple to ScrollParser --- be/src/exec/es_http_scanner.cpp | 57 +---------- be/src/exec/es_http_scanner.h | 4 - be/src/util/es_scroll_parser.cpp | 171 +++++++++++++++++++++++++++++-- be/src/util/es_scroll_parser.h | 15 ++- 4 files changed, 178 insertions(+), 69 deletions(-) diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 183f89cc2b919e..6ce0cc41fa8761 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -55,7 +55,6 @@ EsHttpScanner::EsHttpScanner( _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), - _tuple_row(nullptr), _rows_read_counter(nullptr), _read_timer(nullptr), _materialize_timer(nullptr) { @@ -82,10 +81,6 @@ Status EsHttpScanner::open() { } } - Tuple* tuple = (Tuple*) _mem_pool.allocate(_tuple_desc->byte_size()); - _tuple_row = (TupleRow*) _mem_pool.allocate(sizeof(Tuple*)); - _tuple_row->set_tuple(0, tuple); - const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); _es_reader.reset(new ESScanReader(host, _properties)); if (_es_reader == nullptr) { @@ -94,11 +89,6 @@ Status EsHttpScanner::open() { _es_reader->open(); - //_text_converter.reset(new(std::nothrow) TextConverter('\\')); - //if (_text_converter == nullptr) { - // return Status("No memory error."); - //} - _rows_read_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); _read_timer = ADD_TIMER(_profile, "TotalRawReadTime(*)"); _materialize_timer = ADD_TIMER(_profile, "MaterializeTupleTime(*)"); @@ -106,35 +96,6 @@ Status EsHttpScanner::open() { return Status::OK; } -bool EsHttpScanner::fill_tuple(const char* ptr, size_t size, - Tuple* tuple, MemPool* mem_pool) { - //int ctx_idx = 0; - for (auto slot_desc : _tuple_desc->slots()) { - if (!slot_desc->is_materialized()) { - continue; - } - // ExprContext* ctx = _dest_expr_ctx[ctx_idx++]; - // void* value = ctx->get_value(_tuple_row); - // if (value == nullptr) { - // if (slot_desc->is_nullable()) { - // tuple->set_null(slot_desc->null_indicator_offset()); - // continue; - // } else { - // std::stringstream error_msg; - // error_msg << "column(" << slot_desc->col_name() << ") value is null"; - // _state->append_error_msg_to_file( - // std::string(ptr, size), error_msg.str()); - // _counter->num_rows_filtered++; - // return false; - // } - // } - // tuple->set_not_null(slot_desc->null_indicator_offset()); - // void* slot = tuple->get_slot(slot_desc->tuple_offset()); - // RawValue::write(value, slot, slot_desc->type(), mem_pool); - } - return true; -} - Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { SCOPED_TIMER(_read_timer); while (!eof) { @@ -142,19 +103,11 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { if (_line_eof) { RETURN_IF_ERROR(_es_reader->get_next(eof, &parser)); } - const char* ptr = nullptr; - size_t size = 0; - RETURN_IF_ERROR(parser->read_next_line(&ptr, &size, &_line_eof)); - if (size == 0) { - continue; - } - { - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - if (fill_tuple(ptr, size, tuple, tuple_pool)) { - break; - } - } + + COUNTER_UPDATE(_rows_read_counter, 1); + SCOPED_TIMER(_materialize_timer); + RETURN_IF_ERROR( + parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); } return Status::OK; } diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index bd438cf0c81754..8320df7b7aabba 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -72,9 +72,6 @@ class EsHttpScanner { private: - bool fill_tuple(const char* ptr, size_t size, - Tuple* tuple, MemPool* mem_pool); - RuntimeState* _state; RuntimeProfile* _profile; TupleId _tuple_id; @@ -96,7 +93,6 @@ class EsHttpScanner { EsScanCounter* _counter; std::unique_ptr _es_reader; std::map _slots_map; - TupleRow* _tuple_row; // Profile RuntimeProfile::Counter* _rows_read_counter; diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 42a28b060bd083..ffe04b0f5ca1b7 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -17,21 +17,34 @@ #include "es_scroll_parser.h" +#include +#include +#include + #include "common/logging.h" #include "common/status.h" +#include "runtime/mem_pool.h" +#include "runtime/mem_tracker.h" namespace doris { -const char* FIELD_SCROLL_ID = "_scroll_id"; -const char* FIELD_HITS = "hits"; -const char* FIELD_INNER_HITS = "hits"; -const char* FIELD_SOURCE = "_source"; -const char* FIELD_TOTAL = "total"; +static const char* FIELD_SCROLL_ID = "_scroll_id"; +static const char* FIELD_HITS = "hits"; +static const char* FIELD_INNER_HITS = "hits"; +static const char* FIELD_SOURCE = "_source"; +static const char* FIELD_TOTAL = "total"; + +static const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " + "Expected value of type $0 based on column metadata. This likely indicates a " + "problem with the data source library."; +static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " + "$1 bytes for $2."; ScrollParser::ScrollParser(const std::string& scroll_id, int total, int size) : _scroll_id(scroll_id), _total(total), - _size(size) { + _size(size), + _line_index(0) { } ScrollParser::~ScrollParser() { @@ -55,7 +68,7 @@ ScrollParser* ScrollParser::parse_from_string(const std::string& scroll_result) rapidjson::Value &field_total = document_node[FIELD_TOTAL]; int total = field_total.GetInt(); if (total == 0) { - scroll_parser = new ScrollParser(scroll_id, total, 0); + scroll_parser = new ScrollParser(scroll_id, total); return scroll_parser; } @@ -68,6 +81,7 @@ ScrollParser* ScrollParser::parse_from_string(const std::string& scroll_result) int size = inner_hits_node.Size(); scroll_parser = new ScrollParser(scroll_id, total, size); + scroll_parser->set_inner_hits_node(inner_hits_node); return scroll_parser; } @@ -83,10 +97,147 @@ int ScrollParser::get_total() { return _total; } +Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, + Tuple* tuple, MemPool* tuple_pool, bool* line_eof) { + if (_size <= 0 || _line_index >= _size) { + *line_eof = true; + return Status::OK; + } + + rapidjson::Value& obj = _inner_hits_node[_line_index++]; + rapidjson::Value& line = obj[FIELD_SOURCE]; + if (!line.IsObject()) { + *line_eof = true; + return Status("Parse inner hits failed"); + } + + tuple->init(tuple_desc->byte_size()); + for (int i = 0; i < tuple_desc->slots().size(); ++i) { + const SlotDescriptor* slot_desc = tuple_desc->slots()[i]; + + if (!slot_desc->is_materialized()) { + continue; + } + + const char* col_name = slot_desc->col_name().c_str(); + rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); + if (itr == line.MemberEnd()) { + tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + + tuple->set_not_null(slot_desc->null_indicator_offset()); + rapidjson::Value &col = line[col_name]; + + void* slot = tuple->get_slot(slot_desc->tuple_offset()); + switch (slot_desc->type().type) { + case TYPE_CHAR: + case TYPE_VARCHAR: { + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); + } + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + if (UNLIKELY(buffer == NULL)) { + string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", + val_size, "string slot"); + return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); + } + memcpy(buffer, val.data(), val_size); + reinterpret_cast(slot)->ptr = buffer; + reinterpret_cast(slot)->len = val_size; + break; + } + + case TYPE_TINYINT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); + } + *reinterpret_cast(slot) = (int8_t)col.GetInt(); + break; + } + + case TYPE_SMALLINT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); + } + *reinterpret_cast(slot) = (int16_t)col.GetInt(); + break; + } + + case TYPE_INT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); + } + *reinterpret_cast(slot) = (int32_t)col.GetInt(); + break; + } + + case TYPE_BIGINT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); + } + *reinterpret_cast(slot) = col.GetInt64(); + break; + } + + case TYPE_LARGEINT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); + } + *reinterpret_cast(slot) = col.GetInt64(); + break; + } + + case TYPE_DOUBLE: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); + } + *reinterpret_cast(slot) = col.GetDouble(); + break; + } + + case TYPE_FLOAT: { + if (!col.IsNumber()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); + } + *reinterpret_cast(slot) = col.GetDouble(); + break; + } + + case TYPE_BOOLEAN: { + if (!col.IsBool()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); + } + *reinterpret_cast(slot) = col.GetBool(); + break; + } + + case TYPE_DATE: { + if (!col.IsNumber() || + !reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + } + reinterpret_cast(slot)->cast_to_date(); + break; + } + + case TYPE_DATETIME: { + if (!col.IsNumber() || + !reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + } + reinterpret_cast(slot)->set_type(TIME_DATETIME); + break; + } + + default: + DCHECK(false); + break; + } + } -Status ScrollParser::read_next_line(const char** ptr, size_t* size, bool* line_eof) { - *line_eof = true; return Status::OK; } - } diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index f74c677dee5b76..22e9645cd7fa28 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -18,7 +18,10 @@ #pragma once #include + #include "rapidjson/document.h" +#include "runtime/descriptors.h" +#include "runtime/tuple.h" namespace doris { @@ -27,12 +30,17 @@ class Status; class ScrollParser { public: - ScrollParser(const std::string& scroll_id, int total, int size); + ScrollParser(const std::string& scroll_id, int total, int size = 0); ~ScrollParser(); static ScrollParser* parse_from_string(const std::string& scroll_result); - Status read_next_line(const char** ptr, size_t* size, bool* line_eof); + Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, + MemPool* mem_pool, bool* line_eof); + + void set_inner_hits_node(rapidjson::Value& inner_hits_node) { + _inner_hits_node = inner_hits_node; + } const std::string& get_scroll_id(); int get_total(); @@ -43,7 +51,8 @@ class ScrollParser { const std::string& _scroll_id; int _total; int _size; + rapidjson::SizeType _line_index; - //const rapidjson::Value& _inner_hits_node; + rapidjson::Value _inner_hits_node; }; } From d1f679d64a61a051d916efee425e8d1bc8fb93b9 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 27 Mar 2019 16:55:25 +0800 Subject: [PATCH 08/73] Add member functions to ExtLiteral --- be/src/exec/es_http_scanner.cpp | 3 +- be/src/exec/es_predicate.cpp | 90 ++++++++++++++++++++++++++++---- be/src/exec/es_predicate.h | 52 ++++++++++++------ be/src/runtime/large_int_value.h | 7 +++ be/src/runtime/string_value.cpp | 4 ++ be/src/runtime/string_value.h | 2 + 6 files changed, 129 insertions(+), 29 deletions(-) diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 6ce0cc41fa8761..0ee80a4e1a6833 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -106,8 +106,7 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR( - parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); + RETURN_IF_ERROR(parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); } return Status::OK; } diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index fb35ae3e7d53f9..409e8dba1041c3 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -34,6 +34,8 @@ #include "runtime/client_cache.h" #include "runtime/runtime_state.h" #include "runtime/row_batch.h" +#include "runtime/datetime_value.h" +#include "runtime/large_int_value.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" @@ -45,6 +47,72 @@ namespace doris { using namespace std; +ExtLiteral::~ExtLiteral(){ +} + +int8_t ExtLiteral::to_byte() { + DCHECK(_type != TYPE_TINYINT); + return *(reinterpret_cast(_value)); +} + +int16_t ExtLiteral::to_short() { + DCHECK(_type != TYPE_SMALLINT); + return *(reinterpret_cast(_value)); +} + +int32_t ExtLiteral::to_int() { + DCHECK(_type != TYPE_INT); + return *(reinterpret_cast(_value)); +} + +int64_t ExtLiteral::to_long() { + DCHECK(_type != TYPE_BIGINT); + return *(reinterpret_cast(_value)); +} + +float ExtLiteral::to_float() { + DCHECK(_type != TYPE_FLOAT); + return *(reinterpret_cast(_value)); +} + +double ExtLiteral::to_double() { + DCHECK(_type != TYPE_DOUBLE); + return *(reinterpret_cast(_value)); +} + +std::string ExtLiteral::to_string() { + DCHECK(_type != TYPE_VARCHAR && _type != TYPE_CHAR); + return (reinterpret_cast(_value))->to_string(); +} + +std::string ExtLiteral::to_date_string() { + DCHECK(_type != TYPE_DATE && _type != TYPE_DATETIME); + DateTimeValue date_value = *reinterpret_cast(_value); + char str[MAX_DTVALUE_STR_LEN]; + date_value.to_string(str); + return std::string(str, strlen(str)); +} + +bool ExtLiteral::to_bool() { + DCHECK(_type != TYPE_BOOLEAN); + return *(reinterpret_cast(_value)); +} + +std::string ExtLiteral::to_decimal_string() { + DCHECK(_type != TYPE_DECIMAL); + return reinterpret_cast(_value)->to_string(); +} + +std::string ExtLiteral::to_decimalv2_string() { + DCHECK(_type != TYPE_DECIMALV2); + return reinterpret_cast(_value)->to_string(); +} + +std::string ExtLiteral::to_largeint_string() { + DCHECK(_type != TYPE_LARGEINT); + return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); +} + EsPredicate::EsPredicate(ExprContext* conjunct_ctx, const TupleDescriptor* tuple_desc) : _context(conjunct_ctx), @@ -93,14 +161,14 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis } - std::shared_ptr literal(new ExtLiteral(expr->node_type())); - literal->value = _context->get_value(expr, NULL); + std::shared_ptr literal(new ExtLiteral( + expr->type().type, _context->get_value(expr, NULL))); std::unique_ptr predicate(new ExtBinaryPredicate( TExprNodeType::BINARY_PRED, slot_desc->col_name(), slot_desc->type(), op, - *literal)); + literal)); disjuncts.emplace_back(std::move(*predicate)); return true; @@ -109,11 +177,11 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis if (is_match_func(conjunct)) { Expr* expr = conjunct->get_child(1); - std::shared_ptr literal(new ExtLiteral(expr->node_type())); - literal->value = _context->get_value(expr, NULL); + std::shared_ptr literal(new ExtLiteral( + expr->type().type, _context->get_value(expr, NULL))); - vector query_conditions; - query_conditions.push_back(std::move(*literal)); + vector> query_conditions; + query_conditions.push_back(literal); vector cols; //TODO std::unique_ptr predicate(new ExtFunction( @@ -128,7 +196,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis if (TExprNodeType::IN_PRED == conjunct->node_type()) { TExtInPredicate ext_in_predicate; - vector in_pred_values; + vector> in_pred_values; InPredicate* pred = dynamic_cast(conjunct); ext_in_predicate.__set_is_not_in(pred->is_not_in()); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { @@ -155,9 +223,9 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis } Expr* expr = conjunct->get_child(i); - std::shared_ptr literal(new ExtLiteral(expr->node_type())); - literal->value = _context->get_value(expr, NULL); - in_pred_values.push_back(*literal); + std::shared_ptr literal(new ExtLiteral( + expr->type().type, _context->get_value(expr, NULL))); + in_pred_values.push_back(literal); } std::unique_ptr predicate(new ExtInPredicate( diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 8c603b21e6be0f..866eb7bc01f6f9 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -27,13 +27,14 @@ #include "gen_cpp/PaloExternalDataSourceService_types.h" #include "runtime/descriptors.h" #include "runtime/tuple.h" +#include "runtime/primitive_type.h" namespace doris { class Status; class ExprContext; class ExtBinaryPredicate; - +class ExtLiteral; struct ExtPredicate { ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { @@ -42,14 +43,6 @@ struct ExtPredicate { TExprNodeType::type node_type; }; -struct ExtLiteral : public ExtPredicate { - ExtLiteral(TExprNodeType::type node_type) : - ExtPredicate(node_type) { - } - - void *value; -}; - struct ExtColumnDesc { ExtColumnDesc(std::string name, TypeDescriptor type) : name(name), @@ -66,7 +59,7 @@ struct ExtBinaryPredicate : public ExtPredicate { std::string name, TypeDescriptor type, TExprOpcode::type op, - ExtLiteral value) : + std::shared_ptr value) : ExtPredicate(node_type), col(name, type), op(op), @@ -75,7 +68,7 @@ struct ExtBinaryPredicate : public ExtPredicate { ExtColumnDesc col; TExprOpcode::type op; - ExtLiteral value; + std::shared_ptr value; }; struct ExtInPredicate : public ExtPredicate { @@ -83,7 +76,7 @@ struct ExtInPredicate : public ExtPredicate { TExprNodeType::type node_type, std::string name, TypeDescriptor type, - vector values) : + vector> values) : ExtPredicate(node_type), is_not_in(false), col(name, type), @@ -92,12 +85,12 @@ struct ExtInPredicate : public ExtPredicate { bool is_not_in; ExtColumnDesc col; - vector values; + vector> values; }; struct ExtLikePredicate : public ExtPredicate { ExtColumnDesc col; - ExtLiteral value; + std::shared_ptr value; }; struct ExtIsNullPredicate : public ExtPredicate { @@ -110,7 +103,7 @@ struct ExtFunction : public ExtPredicate { TExprNodeType::type node_type, string func_name, vector cols, - vector values) : + vector> values) : ExtPredicate(node_type), func_name(func_name), cols(cols), @@ -119,7 +112,34 @@ struct ExtFunction : public ExtPredicate { string func_name; vector cols; - vector values; + vector> values; +}; + +class ExtLiteral { + public: + ExtLiteral(PrimitiveType type, void *value) : + _type(type), + _value(value) { + } + ~ExtLiteral(); + + int8_t to_byte(); + int16_t to_short(); + int32_t to_int(); + int64_t to_long(); + float to_float(); + double to_double(); + std::string to_string(); + std::string to_date_string(); + bool to_bool(); + std::string to_decimal_string(); + std::string to_decimalv2_string(); + std::string to_largeint_string(); + + private: + + PrimitiveType _type; + void *_value; }; class EsPredicate { diff --git a/be/src/runtime/large_int_value.h b/be/src/runtime/large_int_value.h index 4ced5bb0d43242..4b7d4f1a6df21e 100644 --- a/be/src/runtime/large_int_value.h +++ b/be/src/runtime/large_int_value.h @@ -52,6 +52,13 @@ class LargeIntValue { *len = (buffer + *len) - d; return d; } + + static std::string to_string(__int128 value) { + char buf[64] = {0}; + int len = 64; + char *str = to_string(value, buf, &len); + return std::string(str, len); + } }; std::ostream& operator<<(std::ostream& os, __int128 const& value); diff --git a/be/src/runtime/string_value.cpp b/be/src/runtime/string_value.cpp index 71a91faba50ea1..8ac089236fbc2f 100644 --- a/be/src/runtime/string_value.cpp +++ b/be/src/runtime/string_value.cpp @@ -27,6 +27,10 @@ std::string StringValue::debug_string() const { return std::string(ptr, len); } +std::string StringValue::to_string() const { + return std::string(ptr, len); +} + std::ostream& operator<<(std::ostream& os, const StringValue& string_value) { return os << string_value.debug_string(); } diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h index f840f0d905dcb8..d03604625c4f1d 100644 --- a/be/src/runtime/string_value.h +++ b/be/src/runtime/string_value.h @@ -105,6 +105,8 @@ struct StringValue { std::string debug_string() const; + std::string to_string() const; + // Returns the substring starting at start_pos until the end of string. StringValue substring(int start_pos) const; From 231b9fd0867fd19033b7c950e00fb1bda50328bf Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 27 Mar 2019 18:23:55 +0800 Subject: [PATCH 09/73] Add value_to_string in ExtLiteral --- be/src/exec/es_predicate.cpp | 49 ++++++++++++++++++++++++++++++++++++ be/src/exec/es_predicate.h | 2 ++ 2 files changed, 51 insertions(+) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 409e8dba1041c3..358986c052fc14 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -113,6 +114,54 @@ std::string ExtLiteral::to_largeint_string() { return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); } +std::string ExtLiteral::value_to_string() { + std::stringstream ss; + switch (_type) { + case TYPE_TINYINT: + ss << to_byte(); + break; + case TYPE_SMALLINT: + ss << to_short(); + break; + case TYPE_INT: + ss << to_int(); + break; + case TYPE_BIGINT: + ss << to_long(); + break; + case TYPE_FLOAT: + ss << to_float(); + break; + case TYPE_DOUBLE: + ss << to_double(); + break; + case TYPE_CHAR: + case TYPE_VARCHAR: + ss << to_string(); + break; + case TYPE_DATE: + case TYPE_DATETIME: + ss << to_date_string(); + break; + case TYPE_BOOLEAN: + ss << to_bool(); + break; + case TYPE_DECIMAL: + ss << to_decimal_string(); + break; + case TYPE_DECIMALV2: + ss << to_decimalv2_string(); + break; + case TYPE_LARGEINT: + ss << to_largeint_string(); + break; + default: + DCHECK(false); + break; + } + return ss.str(); +} + EsPredicate::EsPredicate(ExprContext* conjunct_ctx, const TupleDescriptor* tuple_desc) : _context(conjunct_ctx), diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 866eb7bc01f6f9..1a310516753e8e 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -136,6 +136,8 @@ class ExtLiteral { std::string to_decimalv2_string(); std::string to_largeint_string(); + std::string value_to_string(); + private: PrimitiveType _type; From 70f3fa055098acdb502314ec2ab6056ec79480dc Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 27 Mar 2019 21:03:40 +0800 Subject: [PATCH 10/73] Fix some issues --- be/src/exec/es_http_scanner.cpp | 16 ++++++++++------ be/src/exec/es_predicate.cpp | 16 ++++++++-------- be/src/exec/es_predicate.h | 7 ++++--- be/src/util/es_scan_reader.h | 2 +- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 0ee80a4e1a6833..bda0c6f4e4d980 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -98,16 +98,20 @@ Status EsHttpScanner::open() { Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { SCOPED_TIMER(_read_timer); - while (!eof) { + do { ScrollParser* parser = nullptr; - if (_line_eof) { + if (!_line_eof) { RETURN_IF_ERROR(_es_reader->get_next(eof, &parser)); + if (*eof) break; } - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); - } + if (parser != nullptr) { + COUNTER_UPDATE(_rows_read_counter, 1); + SCOPED_TIMER(_materialize_timer); + RETURN_IF_ERROR(parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); + } + } while (!*eof); + return Status::OK; } diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 358986c052fc14..d91b2cceb26335 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -176,11 +176,11 @@ bool EsPredicate::build_disjuncts_list() { return build_disjuncts_list(_context->root(), _disjuncts); } -vector EsPredicate::get_predicate_list(){ +vector> EsPredicate::get_predicate_list(){ return _disjuncts; } -bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { +bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector>& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; @@ -212,14 +212,14 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis std::shared_ptr literal(new ExtLiteral( expr->type().type, _context->get_value(expr, NULL))); - std::unique_ptr predicate(new ExtBinaryPredicate( + std::shared_ptr predicate(new ExtBinaryPredicate( TExprNodeType::BINARY_PRED, slot_desc->col_name(), slot_desc->type(), op, literal)); - disjuncts.emplace_back(std::move(*predicate)); + disjuncts.push_back(predicate); return true; } @@ -233,12 +233,12 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis query_conditions.push_back(literal); vector cols; //TODO - std::unique_ptr predicate(new ExtFunction( + std::shared_ptr predicate(new ExtFunction( TExprNodeType::FUNCTION_CALL, conjunct->fn().name.function_name, cols, query_conditions)); - disjuncts.emplace_back(std::move(*predicate)); + disjuncts.push_back(predicate); return true; } @@ -277,13 +277,13 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& dis in_pred_values.push_back(literal); } - std::unique_ptr predicate(new ExtInPredicate( + std::shared_ptr predicate(new ExtInPredicate( TExprNodeType::IN_PRED, slot_desc->col_name(), slot_desc->type(), in_pred_values)); - disjuncts.emplace_back(std::move(*predicate)); + disjuncts.push_back(predicate); return true; } diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 1a310516753e8e..b941ac00df8799 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -150,19 +150,20 @@ class EsPredicate { EsPredicate(ExprContext* conjunct_ctx, const TupleDescriptor* tuple_desc); ~EsPredicate(); - vector get_predicate_list(); + vector> get_predicate_list(); bool build_disjuncts_list(); private: - bool build_disjuncts_list(Expr* conjunct, vector& disjuncts); + bool build_disjuncts_list(Expr* conjunct, + vector>& disjuncts); bool is_match_func(Expr* conjunct); SlotDescriptor* get_slot_desc(SlotRef* slotRef); ExprContext* _context; int _disjuncts_num; const TupleDescriptor* _tuple_desc; - vector _disjuncts; + vector> _disjuncts; }; } diff --git a/be/src/util/es_scan_reader.h b/be/src/util/es_scan_reader.h index 6579cf6fbbb64d..d7499ff27bafdf 100644 --- a/be/src/util/es_scan_reader.h +++ b/be/src/util/es_scan_reader.h @@ -31,7 +31,7 @@ class ESScanReader { public: static constexpr const char* KEY_USER_NAME = "user"; - static constexpr const char* KEY_PASS_WORD = "passwd"; + static constexpr const char* KEY_PASS_WORD = "password"; static constexpr const char* KEY_HOST_PORT = "host_port"; static constexpr const char* KEY_INDEX = "index"; static constexpr const char* KEY_TYPE = "type"; From 9049dc9f451811ce1ab69b885edf3235dd255725 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 28 Mar 2019 14:48:02 +0800 Subject: [PATCH 11/73] Improve EsPredicate related structures --- be/src/exec/es_http_scan_node.cpp | 31 +++---- be/src/exec/es_http_scan_node.h | 3 +- be/src/exec/es_http_scanner.cpp | 2 +- be/src/exec/es_http_scanner.h | 2 +- be/src/exec/es_predicate.cpp | 54 ++++++------ be/src/exec/es_predicate.h | 141 +++++++++++++++++------------- 6 files changed, 121 insertions(+), 112 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 3ca1f838709427..081060b4925c4c 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -239,17 +239,9 @@ void EsHttpScanNode::debug_string(int ident_level, std::stringstream* out) const } Status EsHttpScanNode::scanner_scan( - TupleId _tuple_id, - std::map properties, + std::unique_ptr scanner, const std::vector& conjunct_ctxs, EsScanCounter* counter) { - std::unique_ptr scanner(new EsHttpScanner( - _runtime_state, - runtime_profile(), - _tuple_id, - properties, - conjunct_ctxs, - counter)); RETURN_IF_ERROR(scanner->open()); bool scanner_eof = false; @@ -368,18 +360,21 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { const TEsScanRange& es_scan_range = _scan_ranges[start_idx + i].scan_range.es_scan_range; - _properties[ESScanReader::KEY_INDEX] = es_scan_range.index; + std::map properties(_properties); + properties[ESScanReader::KEY_INDEX] = es_scan_range.index; if (es_scan_range.__isset.type) { - _properties[ESScanReader::KEY_TYPE] = es_scan_range.type; + properties[ESScanReader::KEY_TYPE] = es_scan_range.type; } - - _properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); - _properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); - _properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); - _properties[ESScanReader::KEY_QUERY] - = ESScrollQueryBuilder::build(_properties, _column_names, _predicates); + properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); + properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); + properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); + properties[ESScanReader::KEY_QUERY] + = ESScrollQueryBuilder::build(properties, _column_names, _predicates); - status = scanner_scan(_tuple_id, _properties, scanner_expr_ctxs, &counter); + std::unique_ptr scanner(new EsHttpScanner( + _runtime_state, runtime_profile(), _tuple_id, + properties, scanner_expr_ctxs, &counter)); + status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); if (!status.ok()) { LOG(WARNING) << "Scanner[" << start_idx + i << "] prcess failed. status=" << status.get_error_msg(); diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index c9d077ee4ab1ae..b80bf263882e56 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -78,8 +78,7 @@ class EsHttpScanNode : public ScanNode { void scanner_worker(int start_idx, int length); // Scan one range - Status scanner_scan(TupleId _tuple_id, - std::map properties, + Status scanner_scan(std::unique_ptr scanner, const std::vector& conjunct_ctxs, EsScanCounter* counter); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index bda0c6f4e4d980..60d7d82757bb96 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -35,7 +35,7 @@ EsHttpScanner::EsHttpScanner( RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - std::map properties, + const std::map& properties, const std::vector& conjunct_ctxs, EsScanCounter* counter) : _state(state), diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 8320df7b7aabba..5413e691b480d4 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -59,7 +59,7 @@ class EsHttpScanner { RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - std::map properties, + const std::map& properties, const std::vector& conjunct_ctxs, EsScanCounter* counter); ~EsHttpScanner(); diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index d91b2cceb26335..311b15b75f440e 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -162,25 +162,29 @@ std::string ExtLiteral::value_to_string() { return ss.str(); } -EsPredicate::EsPredicate(ExprContext* conjunct_ctx, +EsPredicate::EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc) : - _context(conjunct_ctx), + _context(context), _disjuncts_num(0), _tuple_desc(tuple_desc) { } EsPredicate::~EsPredicate() { + for(int i=0; i < _disjuncts.size(); i++) { + delete _disjuncts[i]; + } + _disjuncts.clear(); } bool EsPredicate::build_disjuncts_list() { return build_disjuncts_list(_context->root(), _disjuncts); } -vector> EsPredicate::get_predicate_list(){ +vector EsPredicate::get_predicate_list(){ return _disjuncts; } -bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector>& disjuncts) { +bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; @@ -203,21 +207,20 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector literal(new ExtLiteral( - expr->type().type, _context->get_value(expr, NULL))); - std::shared_ptr predicate(new ExtBinaryPredicate( + ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + ExtPredicate* predicate = new ExtBinaryPredicate( TExprNodeType::BINARY_PRED, slot_desc->col_name(), slot_desc->type(), op, - literal)); + literal); disjuncts.push_back(predicate); return true; @@ -226,18 +229,15 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vectorget_child(1); - std::shared_ptr literal(new ExtLiteral( - expr->type().type, _context->get_value(expr, NULL))); - - vector> query_conditions; - query_conditions.push_back(literal); + ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + vector query_conditions; + query_conditions.emplace_back(literal); vector cols; //TODO - - std::shared_ptr predicate(new ExtFunction( + ExtPredicate* predicate = new ExtFunction( TExprNodeType::FUNCTION_CALL, conjunct->fn().name.function_name, cols, - query_conditions)); + query_conditions); disjuncts.push_back(predicate); return true; @@ -245,7 +245,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vectornode_type()) { TExtInPredicate ext_in_predicate; - vector> in_pred_values; + vector in_pred_values; InPredicate* pred = dynamic_cast(conjunct); ext_in_predicate.__set_is_not_in(pred->is_not_in()); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { @@ -253,7 +253,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vectorget_child(0)); - SlotDescriptor* slot_desc = get_slot_desc(slot_ref); + const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); if (slot_desc == nullptr) { return false; } @@ -272,17 +272,15 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vectorget_child(i); - std::shared_ptr literal(new ExtLiteral( - expr->type().type, _context->get_value(expr, NULL))); - in_pred_values.push_back(literal); + ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + in_pred_values.emplace_back(literal); } - std::shared_ptr predicate(new ExtInPredicate( + ExtPredicate* predicate = new ExtInPredicate( TExprNodeType::IN_PRED, slot_desc->col_name(), slot_desc->type(), - in_pred_values)); - + in_pred_values); disjuncts.push_back(predicate); return true; @@ -309,7 +307,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vectornode_type() && conjunct->fn().name.function_name == "esquery") { return true; @@ -317,10 +315,10 @@ bool EsPredicate::is_match_func(Expr* conjunct) { return false; } -SlotDescriptor* EsPredicate::get_slot_desc(SlotRef* slotRef) { +const SlotDescriptor* EsPredicate::get_slot_desc(SlotRef* slotRef) { std::vector slot_ids; slotRef->get_slot_ids(&slot_ids); - SlotDescriptor* slot_desc = nullptr; + const SlotDescriptor* slot_desc = nullptr; for (SlotDescriptor* slot : _tuple_desc->slots()) { if (slot->id() == slot_ids[0]) { slot_desc = slot; diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index b941ac00df8799..8c16482d199286 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -34,32 +34,60 @@ namespace doris { class Status; class ExprContext; class ExtBinaryPredicate; -class ExtLiteral; -struct ExtPredicate { - ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { - } +class ExtLiteral { + public: + ExtLiteral(PrimitiveType type, void *value) : + _type(type), + _value(value) { + } + ~ExtLiteral(); - TExprNodeType::type node_type; + int8_t to_byte(); + int16_t to_short(); + int32_t to_int(); + int64_t to_long(); + float to_float(); + double to_double(); + std::string to_string(); + std::string to_date_string(); + bool to_bool(); + std::string to_decimal_string(); + std::string to_decimalv2_string(); + std::string to_largeint_string(); + + std::string value_to_string(); + + private: + + PrimitiveType _type; + void *_value; }; struct ExtColumnDesc { - ExtColumnDesc(std::string name, TypeDescriptor type) : + ExtColumnDesc(const std::string& name, const TypeDescriptor& type) : name(name), type(type) { } - std::string name; - TypeDescriptor type; + const std::string& name; + const TypeDescriptor& type; +}; + +struct ExtPredicate { + ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) { + } + + TExprNodeType::type node_type; }; struct ExtBinaryPredicate : public ExtPredicate { ExtBinaryPredicate( TExprNodeType::type node_type, - std::string name, - TypeDescriptor type, + const std::string& name, + const TypeDescriptor& type, TExprOpcode::type op, - std::shared_ptr value) : + const ExtLiteral& value) : ExtPredicate(node_type), col(name, type), op(op), @@ -68,15 +96,15 @@ struct ExtBinaryPredicate : public ExtPredicate { ExtColumnDesc col; TExprOpcode::type op; - std::shared_ptr value; + const ExtLiteral& value; }; struct ExtInPredicate : public ExtPredicate { ExtInPredicate( TExprNodeType::type node_type, - std::string name, - TypeDescriptor type, - vector> values) : + const std::string& name, + const TypeDescriptor& type, + const std::vector& values) : ExtPredicate(node_type), is_not_in(false), col(name, type), @@ -85,85 +113,74 @@ struct ExtInPredicate : public ExtPredicate { bool is_not_in; ExtColumnDesc col; - vector> values; + std::vector values; }; struct ExtLikePredicate : public ExtPredicate { + ExtLikePredicate( + TExprNodeType::type node_type, + const std::string& name, + const TypeDescriptor& type, + ExtLiteral value) : + ExtPredicate(node_type), + col(name, type), + value(value) { + } + ExtColumnDesc col; - std::shared_ptr value; + ExtLiteral value; }; struct ExtIsNullPredicate : public ExtPredicate { - bool is_not_null; + ExtIsNullPredicate( + TExprNodeType::type node_type, + const std::string& name, + const TypeDescriptor& type, + ExtLiteral value) : + ExtPredicate(node_type), + col(name, type), + is_not_null(false) { + } + ExtColumnDesc col; + bool is_not_null; }; struct ExtFunction : public ExtPredicate { - ExtFunction( - TExprNodeType::type node_type, - string func_name, - vector cols, - vector> values) : + ExtFunction(TExprNodeType::type node_type, + const std::string& func_name, + std::vector cols, + std::vector values) : ExtPredicate(node_type), func_name(func_name), cols(cols), values(values) { } - string func_name; - vector cols; - vector> values; -}; - -class ExtLiteral { - public: - ExtLiteral(PrimitiveType type, void *value) : - _type(type), - _value(value) { - } - ~ExtLiteral(); - - int8_t to_byte(); - int16_t to_short(); - int32_t to_int(); - int64_t to_long(); - float to_float(); - double to_double(); - std::string to_string(); - std::string to_date_string(); - bool to_bool(); - std::string to_decimal_string(); - std::string to_decimalv2_string(); - std::string to_largeint_string(); - - std::string value_to_string(); - - private: - - PrimitiveType _type; - void *_value; + const std::string& func_name; + std::vector cols; + std::vector values; }; class EsPredicate { public: - EsPredicate(ExprContext* conjunct_ctx, - const TupleDescriptor* tuple_desc); + EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); ~EsPredicate(); - vector> get_predicate_list(); + std::vector get_predicate_list(); bool build_disjuncts_list(); private: bool build_disjuncts_list(Expr* conjunct, - vector>& disjuncts); - bool is_match_func(Expr* conjunct); - SlotDescriptor* get_slot_desc(SlotRef* slotRef); + std::vector& disjuncts); + bool is_match_func(const Expr* conjunct); + const SlotDescriptor* get_slot_desc(SlotRef* slotRef); ExprContext* _context; int _disjuncts_num; const TupleDescriptor* _tuple_desc; - vector> _disjuncts; + std::vector _disjuncts; }; } From 9799c809ea94c53201017a1d85a05f83087db82a Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 28 Mar 2019 20:12:01 +0800 Subject: [PATCH 12/73] Fix some issues and query successfully --- be/src/exec/es_http_scanner.cpp | 34 +++++++++++++------ be/src/exec/es_http_scanner.h | 2 ++ be/src/util/es_scan_reader.cpp | 12 +++---- be/src/util/es_scroll_parser.cpp | 11 +++--- be/src/util/es_scroll_parser.h | 4 +-- .../apache/doris/external/EsShardRouting.java | 2 +- 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 60d7d82757bb96..0e4e32b9231f58 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -45,6 +45,7 @@ EsHttpScanner::EsHttpScanner( _conjunct_ctxs(conjunct_ctxs), _next_range(0), _line_eof(false), + _batch_eof(false), #if BE_TEST _mem_tracker(new MemTracker()), _mem_pool(_mem_tracker.get()), @@ -55,6 +56,7 @@ EsHttpScanner::EsHttpScanner( _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), + _parser(nullptr), _rows_read_counter(nullptr), _read_timer(nullptr), _materialize_timer(nullptr) { @@ -98,19 +100,31 @@ Status EsHttpScanner::open() { Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { SCOPED_TIMER(_read_timer); - do { - ScrollParser* parser = nullptr; - if (!_line_eof) { - RETURN_IF_ERROR(_es_reader->get_next(eof, &parser)); - if (*eof) break; + if (_line_eof && _batch_eof) { + *eof = true; + return Status::OK; + } + + while (!_batch_eof) { + if (_line_eof || _parser == nullptr) { + if (_parser != nullptr) { + delete _parser; + _parser = nullptr; + } + RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, &_parser)); + if (_batch_eof || _parser == nullptr) { + *eof = true; + return Status::OK; + } } - if (parser != nullptr) { - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); + COUNTER_UPDATE(_rows_read_counter, 1); + SCOPED_TIMER(_materialize_timer); + RETURN_IF_ERROR(_parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); + if (!_line_eof) { + break; } - } while (!*eof); + } return Status::OK; } diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 5413e691b480d4..a59024f9b40b28 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -82,6 +82,7 @@ class EsHttpScanner { int _next_range; bool _line_eof; + bool _batch_eof; std::vector _slot_descs; std::unique_ptr _row_desc; @@ -93,6 +94,7 @@ class EsHttpScanner { EsScanCounter* _counter; std::unique_ptr _es_reader; std::map _slots_map; + ScrollParser* _parser; // Profile RuntimeProfile::Counter* _rows_read_counter; diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index 17c2549dc92ff8..bfc4fed045e2fd 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -77,9 +77,9 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { std::string response; ScrollParser* scroll_parser = nullptr; // if is first scroll request, should return the cached response + *parser = nullptr; + *scan_eos = true; if (_eos) { - *parser = nullptr; - *scan_eos = true; return Status::OK; } @@ -111,22 +111,22 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { } scroll_parser = ScrollParser::parse_from_string(response); + _scroll_id = scroll_parser->get_scroll_id(); // maybe the index or shard is empty if (scroll_parser == nullptr || scroll_parser->get_total() == 0) { - _eos = *scan_eos = true; - *parser = nullptr; + _eos = true; return Status::OK; } if (scroll_parser->get_size() < _batch_size) { _eos = true; - *scan_eos = false; } else { - _eos = *scan_eos = false; + _eos = false; } *parser = scroll_parser; + *scan_eos = false; return Status::OK; } diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index ffe04b0f5ca1b7..7d92cc86688c7a 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -40,7 +40,7 @@ static const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " "$1 bytes for $2."; -ScrollParser::ScrollParser(const std::string& scroll_id, int total, int size) : +ScrollParser::ScrollParser(std::string scroll_id, int total, int size) : _scroll_id(scroll_id), _total(total), _size(size), @@ -65,7 +65,7 @@ ScrollParser* ScrollParser::parse_from_string(const std::string& scroll_result) std::string scroll_id = scroll_node.GetString(); // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} rapidjson::Value &outer_hits_node = document_node[FIELD_HITS]; - rapidjson::Value &field_total = document_node[FIELD_TOTAL]; + rapidjson::Value &field_total = outer_hits_node[FIELD_TOTAL]; int total = field_total.GetInt(); if (total == 0) { scroll_parser = new ScrollParser(scroll_id, total); @@ -99,15 +99,14 @@ int ScrollParser::get_total() { Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, MemPool* tuple_pool, bool* line_eof) { + *line_eof = true; if (_size <= 0 || _line_index >= _size) { - *line_eof = true; return Status::OK; } rapidjson::Value& obj = _inner_hits_node[_line_index++]; rapidjson::Value& line = obj[FIELD_SOURCE]; if (!line.IsObject()) { - *line_eof = true; return Status("Parse inner hits failed"); } @@ -232,12 +231,14 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - default: + default: { DCHECK(false); break; + } } } + *line_eof = false; return Status::OK; } } diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index 22e9645cd7fa28..53e207c2e4fe6a 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -30,7 +30,7 @@ class Status; class ScrollParser { public: - ScrollParser(const std::string& scroll_id, int total, int size = 0); + ScrollParser(std::string scroll_id, int total, int size = 0); ~ScrollParser(); static ScrollParser* parse_from_string(const std::string& scroll_result); @@ -48,7 +48,7 @@ class ScrollParser { private: - const std::string& _scroll_id; + std::string _scroll_id; int _total; int _size; rapidjson::SizeType _line_index; diff --git a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java index 721edc9a26bae7..8c5d06932fbdfe 100644 --- a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java +++ b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java @@ -42,7 +42,7 @@ public static EsShardRouting parseShardRoutingV55(String indexName, String shard JSONObject nodeInfo = nodesMap.getJSONObject(nodeId); String[] transportAddr = nodeInfo.getString("transport_address").split(":"); // get thrift port from node info - String thriftPort = nodeInfo.getJSONObject("attributes").getString("thrift_port"); + String thriftPort = "8200";//nodeInfo.getJSONObject("attributes").getString("thrift_port"); TNetworkAddress addr = new TNetworkAddress(transportAddr[0], Integer.valueOf(thriftPort)); boolean isPrimary = shardInfo.getBoolean("primary"); return new EsShardRouting(indexName, Integer.valueOf(shardKey), From 34efebbcaa88a5257fead63e8d921ce9dd146cf8 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 1 Apr 2019 14:00:54 +0800 Subject: [PATCH 13/73] Ignore new_filter_in when pushing down --- be/src/exec/es_http_scan_node.cpp | 10 +++++++- be/src/exec/es_http_scanner.cpp | 29 ++++++++------------- be/src/exec/es_http_scanner.h | 5 +--- be/src/exec/es_predicate.cpp | 42 +++++++++++++++++++++---------- 4 files changed, 49 insertions(+), 37 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 081060b4925c4c..eca40a74047393 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -51,6 +51,8 @@ EsHttpScanNode::~EsHttpScanNode() { Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ScanNode::init(tnode)); + + // use TEsScanNode _properties = tnode.es_scan_node.properties; return Status::OK; } @@ -67,6 +69,7 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { return Status(ss.str()); } + // set up column name vector for ESScrollQueryBuilder for (auto slot_desc : _tuple_desc->slots()) { if (!slot_desc->is_materialized()) { continue; @@ -79,6 +82,7 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { return Status::OK; } +// build predicate void EsHttpScanNode::build_conjuncts_list() { for (int i = 0; i < _conjunct_ctxs.size(); ++i) { std::shared_ptr predicate( @@ -96,8 +100,8 @@ Status EsHttpScanNode::open(RuntimeState* state) { RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); RETURN_IF_CANCELLED(state); + // if conjunct is constant, compute direct and set eos = true for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - // if conjunct is constant, compute direct and set eos = true if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { void* value = _conjunct_ctxs[conj_idx]->get_value(NULL); if (value == NULL || *reinterpret_cast(value) == false) { @@ -284,6 +288,7 @@ Status EsHttpScanNode::scanner_scan( } // eval conjuncts of this row. + // TODO exclude those predicates which ES applied by _predicate_to_conjunct if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { row_batch->commit_last_row(); char* new_tuple = reinterpret_cast(tuple); @@ -327,6 +332,7 @@ Status EsHttpScanNode::scanner_scan( return Status::OK; } +// Prefer to the local host static std::string get_host_port(const std::vector& es_hosts) { std::string host_port; @@ -360,6 +366,7 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { const TEsScanRange& es_scan_range = _scan_ranges[start_idx + i].scan_range.es_scan_range; + // Collect the informations from scan range to perperties std::map properties(_properties); properties[ESScanReader::KEY_INDEX] = es_scan_range.index; if (es_scan_range.__isset.type) { @@ -371,6 +378,7 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { properties[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(properties, _column_names, _predicates); + // start scanner to scan std::unique_ptr scanner(new EsHttpScanner( _runtime_state, runtime_profile(), _tuple_id, properties, scanner_expr_ctxs, &counter)); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 0e4e32b9231f58..36eb09d57ac0fe 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -24,10 +24,9 @@ #include "runtime/exec_env.h" #include "runtime/mem_tracker.h" #include "runtime/raw_value.h" +#include "runtime/runtime_state.h" #include "runtime/tuple.h" #include "exprs/expr.h" -#include "exec/text_converter.h" -#include "exec/text_converter.hpp" namespace doris { @@ -56,7 +55,7 @@ EsHttpScanner::EsHttpScanner( _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), - _parser(nullptr), + _es_scroll_parser(nullptr), _rows_read_counter(nullptr), _read_timer(nullptr), _materialize_timer(nullptr) { @@ -74,15 +73,6 @@ Status EsHttpScanner::open() { return Status(ss.str()); } - for (auto slot : _tuple_desc->slots()) { - auto pair = _slots_map.emplace(slot->col_name(), slot); - if (!pair.second) { - std::stringstream ss; - ss << "Failed to insert slot, col_name=" << slot->col_name(); - return Status(ss.str()); - } - } - const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); _es_reader.reset(new ESScanReader(host, _properties)); if (_es_reader == nullptr) { @@ -106,13 +96,13 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { } while (!_batch_eof) { - if (_line_eof || _parser == nullptr) { - if (_parser != nullptr) { - delete _parser; - _parser = nullptr; + if (_line_eof || _es_scroll_parser == nullptr) { + if (_es_scroll_parser != nullptr) { + delete _es_scroll_parser; + _es_scroll_parser = nullptr; } - RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, &_parser)); - if (_batch_eof || _parser == nullptr) { + RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, &_es_scroll_parser)); + if (_batch_eof || _es_scroll_parser == nullptr) { *eof = true; return Status::OK; } @@ -120,7 +110,8 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof)); + RETURN_IF_ERROR(_es_scroll_parser->fill_tuple( + _tuple_desc, tuple, tuple_pool, &_line_eof)); if (!_line_eof) { break; } diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index a59024f9b40b28..fa5bde12a8cb9c 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -78,8 +78,6 @@ class EsHttpScanner { const std::map& _properties; const std::vector& _conjunct_ctxs; - std::unique_ptr _text_converter; - int _next_range; bool _line_eof; bool _batch_eof; @@ -93,8 +91,7 @@ class EsHttpScanner { const TupleDescriptor* _tuple_desc; EsScanCounter* _counter; std::unique_ptr _es_reader; - std::map _slots_map; - ScrollParser* _parser; + ScrollParser* _es_scroll_parser; // Profile RuntimeProfile::Counter* _rows_read_counter; diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 311b15b75f440e..ff59c961e40c68 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -184,6 +184,16 @@ vector EsPredicate::get_predicate_list(){ return _disjuncts; } +static bool ignore_cast(const SlotDescriptor* slot, const Expr* expr) { + if (slot->type().is_date_type() && expr->type().is_date_type()) { + return true; + } + if (slot->type().is_string_type() && expr->type().is_string_type()) { + return true; + } + return false; +} + bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { @@ -244,6 +254,13 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di } if (TExprNodeType::IN_PRED == conjunct->node_type()) { + // the op code maybe FILTER_NEW_IN, it means there is function in list + // like col_a in (abs(1)) + if (TExprOpcode::FILTER_IN != conjunct->op() + && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { + return false; + } + TExtInPredicate ext_in_predicate; vector in_pred_values; InPredicate* pred = dynamic_cast(conjunct); @@ -258,22 +275,21 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return false; } - for (int i = 1; i < pred->children().size(); ++i) { - // varchar, string, all of them are string type, but varchar != string - // TODO add date, datetime support? - if (pred->get_child(0)->type().is_string_type()) { - if (!pred->get_child(i)->type().is_string_type()) { - return false; - } - } else { - if (pred->get_child(i)->type().type != pred->get_child(0)->type().type) { - return false; - } + if (pred->get_child(0)->type().type != slot_desc->type().type) { + if (!ignore_cast(slot_desc, pred->get_child(0))) { + return false; + } + } + + HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin(); + while (iter->has_next()) { + if (nullptr == iter->get_value()) { + return false; } - Expr* expr = conjunct->get_child(i); - ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); + ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); in_pred_values.emplace_back(literal); + iter->next(); } ExtPredicate* predicate = new ExtInPredicate( From 8387dba4a5ff5812c985a627b130984c4e7f232f Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 1 Apr 2019 14:40:28 +0800 Subject: [PATCH 14/73] Add PushDown filters for predicates --- be/src/exec/es_predicate.h | 5 + be/src/util/CMakeLists.txt | 1 + be/src/util/es_query_builder.cpp | 321 +++++++++++++++++++++++++ be/src/util/es_query_builder.h | 115 +++++++++ be/src/util/es_scroll_query.cpp | 42 +--- be/src/util/es_scroll_query.h | 19 +- be/test/util/CMakeLists.txt | 1 + be/test/util/es_query_builder_test.cpp | 307 +++++++++++++++++++++++ 8 files changed, 764 insertions(+), 47 deletions(-) create mode 100644 be/src/util/es_query_builder.cpp create mode 100644 be/src/util/es_query_builder.h create mode 100644 be/test/util/es_query_builder_test.cpp diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 8c16482d199286..1a4400c9d022aa 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -169,6 +169,11 @@ class EsPredicate { ~EsPredicate(); std::vector get_predicate_list(); bool build_disjuncts_list(); + // public for tests + EsPredicate(std::vector& all_predicates) { + _disjuncts = all_predicates; + }; + private: diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index e190ccfac2a788..1dce557dca9f4e 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -77,6 +77,7 @@ add_library(Util STATIC es_scan_reader.cpp es_scroll_query.cpp es_scroll_parser.cpp + es_query_builder.cpp ) #ADD_BE_TEST(integer-array-test) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp new file mode 100644 index 00000000000000..d9c1760fb16ab5 --- /dev/null +++ b/be/src/util/es_query_builder.cpp @@ -0,0 +1,321 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include +#include "util/es_query_builder.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "common/logging.h" + +namespace doris { + +ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) { + +} +ESQueryBuilder::ESQueryBuilder(ExtFunction* es_query) { + auto first = es_query->values.front(); + _es_query_str = first.value_to_string(); +} + +rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& docuemnt) { + rapidjson::Document draft; + draft.Parse<0>(_es_query_str.c_str()); + rapidjson::Document::AllocatorType& draft_allocator = draft.GetAllocator(); + rapidjson::Value query_key; + rapidjson::Value query_value; + //{ "term": { "dv": "2" } } + if (!draft.HasParseError()) { + for (rapidjson::Value::ConstMemberIterator itr = draft.MemberBegin(); itr != draft.MemberEnd(); itr++) { + query_key.CopyFrom(itr->name, draft_allocator); + query_value.CopyFrom(itr->value, draft_allocator); + if (query_key.IsString()) { + // if we found one key, then we should end loop + break; + } + } + } + rapidjson::Document::AllocatorType& allocator = docuemnt.GetAllocator(); + rapidjson::Value es_query(rapidjson::kObjectType); + es_query.SetObject(); + es_query.AddMember(query_key, query_value, allocator); + return es_query; +} +rapidjson::Value WildCardQueryBuilder::to_json(rapidjson::Document& document) { + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Value term_node(rapidjson::kObjectType); + term_node.SetObject(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value term_value(_like_value.c_str(), allocator); + term_node.AddMember(field_value, term_value, allocator); + rapidjson::Value wildcard_query(rapidjson::kObjectType); + wildcard_query.SetObject(); + wildcard_query.AddMember("wildcard", term_node, allocator); + return wildcard_query; + +} +WildCardQueryBuilder::WildCardQueryBuilder(ExtLikePredicate* like_predicate) { + _like_value = like_predicate->value.value_to_string(); + std::replace(_like_value.begin(), _like_value.end(), '_', '?'); + std::replace(_like_value.begin(), _like_value.end(), '%', '*'); + _field = like_predicate->col.name; +} + +TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) : _field(field), _term(term) { + +} + +TermQueryBuilder::TermQueryBuilder(ExtBinaryPredicate* binary_predicate) { + _field = binary_predicate->col.name; + ExtLiteral literal = binary_predicate->value; + _term = literal.value_to_string(); +} + +rapidjson::Value TermQueryBuilder::to_json(rapidjson::Document& docuemnt) { + rapidjson::Document::AllocatorType& allocator = docuemnt.GetAllocator(); + rapidjson::Value term_node(rapidjson::kObjectType); + term_node.SetObject(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value term_value(_term.c_str(), allocator); + term_node.AddMember(field_value, term_value, allocator); + rapidjson::Value term_query(rapidjson::kObjectType); + term_query.SetObject(); + term_query.AddMember("term", term_node, allocator); + return term_query; +} + +rapidjson::Value TermsInSetQueryBuilder::to_json(rapidjson::Document& document) { + std::string field = _in_predicate->col.name; + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Value terms_node(rapidjson::kObjectType); + rapidjson::Value values_node(rapidjson::kArrayType); + for (auto value : _in_predicate->values) { + rapidjson::Value value_value(value.value_to_string().c_str(), allocator); + values_node.PushBack(value_value, allocator); + } + rapidjson::Value field_value(field.c_str(), allocator); + terms_node.AddMember(field_value, values_node, allocator); + rapidjson::Value terms_in_set_query(rapidjson::kObjectType); + terms_in_set_query.SetObject(); + terms_in_set_query.AddMember("terms", terms_node, allocator); + return terms_in_set_query; +} + +TermsInSetQueryBuilder::TermsInSetQueryBuilder(ExtInPredicate* in_predicate) { + _in_predicate = in_predicate; +} + +rapidjson::Value RangeQueryBuilder::to_json(rapidjson::Document& document) { + std::string field = _range_predicate->col.name; + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Value field_value(field.c_str(), allocator); + ExtLiteral b_value = _range_predicate->value; + rapidjson::Value value(b_value.value_to_string().c_str(), allocator); + rapidjson::Value op_node(rapidjson::kObjectType); + op_node.SetObject(); + switch (_range_predicate->op) + { + case TExprOpcode::LT: + op_node.AddMember("lt", value, allocator); + break; + case TExprOpcode::LE: + op_node.AddMember("le", value, allocator); + break; + case TExprOpcode::GT: + op_node.AddMember("gt", value, allocator); + break; + case TExprOpcode::GE: + op_node.AddMember("ge", value, allocator); + break; + default: + break; + } + rapidjson::Value field_node(rapidjson::kObjectType); + field_node.SetObject(); + field_node.AddMember(field_value, op_node, allocator); + + rapidjson::Value range_query(rapidjson::kObjectType); + range_query.SetObject(); + range_query.AddMember("range", field_node, allocator); + return range_query; +} + +RangeQueryBuilder::RangeQueryBuilder(ExtBinaryPredicate* range_predicate) { + _range_predicate = range_predicate; +} + +rapidjson::Value MatchAllQueryBuilder::to_json(rapidjson::Document& document) { + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Value match_all_node(rapidjson::kObjectType); + match_all_node.SetObject(); + rapidjson::Value match_all_query(rapidjson::kObjectType); + match_all_query.SetObject(); + match_all_query.AddMember("match_all", match_all_node, allocator); + return match_all_query; +} + +BooleanQueryBuilder::BooleanQueryBuilder() { + +} +BooleanQueryBuilder::~BooleanQueryBuilder() { + for (auto clause : _must_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _must_not_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _filter_clauses) { + delete clause; + clause = nullptr; + } + for (auto clause : _should_clauses) { + delete clause; + clause = nullptr; + } +} + +BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predicates) { + for (auto predicate : predicates) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + switch (binary_predicate->op) + { + case TExprOpcode::EQ: { + TermQueryBuilder* term_query = new TermQueryBuilder(binary_predicate); + _should_clauses.push_back(term_query); + break; + } + case TExprOpcode::NE:{ // process NE + TermQueryBuilder* term_query = new TermQueryBuilder(binary_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(term_query); + _should_clauses.push_back(bool_query); + break; + } + case TExprOpcode::LT: + case TExprOpcode::LE: + case TExprOpcode::GT: + case TExprOpcode::GE: { + RangeQueryBuilder* range_query = new RangeQueryBuilder(binary_predicate); + _should_clauses.push_back(range_query); + break; + } + default: + break; + } + break; + } + case TExprNodeType::IN_PRED: { + ExtInPredicate* in_predicate = (ExtInPredicate *)predicate; + bool is_not_in = in_predicate->is_not_in; + if (is_not_in) { // process not in predicate + TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(in_predicate); + BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); + bool_query->must_not(terms_predicate); + _should_clauses.push_back(bool_query); + } else { // process in predicate + TermsInSetQueryBuilder* terms_query= new TermsInSetQueryBuilder(in_predicate); + _should_clauses.push_back(terms_query); + } + break; + } + case TExprNodeType::LIKE_PRED: { + ExtLikePredicate* like_predicate = (ExtLikePredicate *)predicate; + WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(like_predicate); + _should_clauses.push_back(wild_card_query); + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction *)predicate; + if ("es_query" == function_predicate->func_name ) { + ESQueryBuilder* es_query = new ESQueryBuilder(function_predicate); + _should_clauses.push_back(es_query); + }; + break; + } + default: + break; + } + } +} + +rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& docuemnt) { + rapidjson::Document::AllocatorType &allocator = docuemnt.GetAllocator(); + rapidjson::Value root_node_object(rapidjson::kObjectType); + if (_filter_clauses.size() > 0) { + rapidjson::Value filter_node(rapidjson::kArrayType); + for (auto must_clause : _filter_clauses) { + filter_node.PushBack(must_clause->to_json(docuemnt), allocator); + } + root_node_object.AddMember("filter", filter_node, allocator); + } + + if (_should_clauses.size() > 0) { + rapidjson::Value should_node(rapidjson::kArrayType); + for (auto should_clause : _should_clauses) { + should_node.PushBack(should_clause->to_json(docuemnt), allocator); + } + root_node_object.AddMember("should", should_node, allocator); + } + + if (_must_not_clauses.size() > 0) { + rapidjson::Value must_not_node(rapidjson::kArrayType); + for (auto must_not_clause : _must_not_clauses) { + must_not_node.PushBack(must_not_clause->to_json(docuemnt), allocator); + } + root_node_object.AddMember("must_not", must_not_node, allocator); + } + + rapidjson::Value bool_query(rapidjson::kObjectType); + bool_query.AddMember("bool", root_node_object, allocator); + return bool_query; +} + +void BooleanQueryBuilder::should(QueryBuilder* filter) { + _should_clauses.push_back(filter); +} +void BooleanQueryBuilder::filter(QueryBuilder* filter) { + _filter_clauses.push_back(filter); +} +void BooleanQueryBuilder::must(QueryBuilder* filter) { + _filter_clauses.push_back(filter); +} +void BooleanQueryBuilder::must_not(QueryBuilder* filter) { + _must_not_clauses.push_back(filter); +} + +rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates) { + rapidjson::Document root; + rapidjson::Document::AllocatorType &allocator = root.GetAllocator(); + root.SetObject(); + BooleanQueryBuilder *bool_query = new BooleanQueryBuilder(); + for (auto es_predicate : predicates) { + vector or_predicates = es_predicate->get_predicate_list(); + BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); + bool_query->must(inner_bool_query); + } + rapidjson::Value root_value_node = bool_query->to_json(root); + // root.AddMember("query", root_value_node, allocator); + // rapidjson::StringBuffer buffer; + // rapidjson::Writer writer(buffer); + // root.Accept(writer); + // std::string es_query_dsl_json = buffer.GetString(); + return root_value_node; +} +} diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h new file mode 100644 index 00000000000000..50402abc8ab617 --- /dev/null +++ b/be/src/util/es_query_builder.h @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once +#include +#include +#include "rapidjson/document.h" +#include "exec/es_predicate.h" + +namespace doris { + +class QueryBuilder { + +public: + virtual rapidjson::Value to_json(rapidjson::Document& allocator) = 0; +}; + +// process esquery(fieldA, json dsl) function +class ESQueryBuilder : public QueryBuilder { +public: + ESQueryBuilder(const std::string& es_query_str); + ESQueryBuilder(ExtFunction* es_query); + rapidjson::Value to_json(rapidjson::Document& allocator) override; +private: + std::string _es_query_str; +}; + +// process field = value +class TermQueryBuilder : public QueryBuilder { + +public: + TermQueryBuilder(const std::string& field, const std::string& term); + TermQueryBuilder(ExtBinaryPredicate* binary_predicate); + rapidjson::Value to_json(rapidjson::Document& document) override; + +private: + std::string _field; + std::string _term; +}; + +// process range predicate field >= value or field < value etc. +class RangeQueryBuilder : public QueryBuilder { + +public: + rapidjson::Value to_json(rapidjson::Document& document) override; + RangeQueryBuilder(ExtBinaryPredicate* range_predicate); +private: + ExtBinaryPredicate* _range_predicate; +}; + +// process in predicate : field in [value1, value2] +class TermsInSetQueryBuilder : public QueryBuilder { + +public: + rapidjson::Value to_json(rapidjson::Document& document) override; + TermsInSetQueryBuilder(ExtInPredicate* in_predicate); +private: + ExtInPredicate* _in_predicate; +}; + +// process like predicate : field like "a%b%c_" +class WildCardQueryBuilder : public QueryBuilder { + +public: + rapidjson::Value to_json(rapidjson::Document& document) override; + WildCardQueryBuilder(ExtLikePredicate* like_predicate); + +private: + std::string _like_value; + std::string _field; +}; + +// no predicates: all doccument match +class MatchAllQueryBuilder : public QueryBuilder { + +public: + rapidjson::Value to_json(rapidjson::Document& document) override; +}; + +// proccess bool compound query, and play the role of a bridge for transferring predicates to es native query +class BooleanQueryBuilder : public QueryBuilder { + +public: + BooleanQueryBuilder(const std::vector& predicates); + BooleanQueryBuilder(); + ~BooleanQueryBuilder(); + rapidjson::Value to_json(rapidjson::Document& document) override; + void should(QueryBuilder* filter); + void filter(QueryBuilder* filter); + void must(QueryBuilder* filter); + void must_not(QueryBuilder* filter); + // class method for transfer predicate to es query value, invoker should enclose this value with `query` + static rapidjson::Value to_query(const std::vector& predicates); + +private: + std::vector _must_clauses; + std::vector _must_not_clauses; + std::vector _filter_clauses; + std::vector _should_clauses; +}; + +} diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index abdb9f2a72463c..7dbbdb56daafcf 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -#include "es_scroll_query.h" #include #include #include "common/logging.h" +#include "es_scroll_query.h" #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" #include "util/es_scan_reader.h" +#include "util/es_query_builder.h" + namespace doris { ESScrollQueryBuilder::ESScrollQueryBuilder() { @@ -60,10 +62,15 @@ std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scr std::string ESScrollQueryBuilder::build(const std::map& properties, const std::vector& fields, - std::vector> predicates) { + std::vector& predicates) { rapidjson::Document es_query_dsl; rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); es_query_dsl.SetObject(); + // generate the filter caluse + rapidjson::Value query_node = BooleanQueryBuilder::to_query(predicates); + // note: add `query` for this value.... + es_query_dsl.AddMember("query", query_node, allocator); + // just filter the selected fields for reducing the network cost if (fields.size() > 0) { rapidjson::Value source_node(rapidjson::kArrayType); for (auto iter = fields.begin(); iter != fields.end(); iter++) { @@ -74,12 +81,12 @@ std::string ESScrollQueryBuilder::build(const std::map } int size = atoi(properties.at(ESScanReader::KEY_BATCH_SIZE).c_str()); rapidjson::Value sort_node(rapidjson::kArrayType); + // use the scroll-scan mode for scan index documents rapidjson::Value field("_doc", allocator); sort_node.PushBack(field, allocator); es_query_dsl.AddMember("sort", sort_node, allocator); - + // number of docuements returned es_query_dsl.AddMember("size", size, allocator); - rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); es_query_dsl.Accept(writer); @@ -87,31 +94,4 @@ std::string ESScrollQueryBuilder::build(const std::map return es_query_dsl_json; } -// std::string ESScrollQueryBuilder::build() { -// rapidjson::Document es_query_dsl; -// rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); -// es_query_dsl.SetObject(); -// if (_fields.size() > 0) { -// rapidjson::Value source_node(rapidjson::kArrayType); -// for (auto iter = _fields.begin(); iter != _fields.end(); iter++) { -// rapidjson::Value field(iter->c_str(), allocator); -// source_node.PushBack(field, allocator); -// } -// es_query_dsl.AddMember("_source", source_node, allocator); -// } - -// rapidjson::Value sort_node(rapidjson::kArrayType); -// rapidjson::Value field("_doc", allocator); -// sort_node.PushBack(field, allocator); -// es_query_dsl.AddMember("sort", sort_node, allocator); - -// es_query_dsl.AddMember("size", _size, allocator); - -// rapidjson::StringBuffer buffer; -// rapidjson::Writer writer(buffer); -// es_query_dsl.Accept(writer); -// std::string es_query_dsl_json = buffer.GetString(); -// return es_query_dsl_json; -// } - } diff --git a/be/src/util/es_scroll_query.h b/be/src/util/es_scroll_query.h index f30378d30c3b1e..c1e99f899f2485 100644 --- a/be/src/util/es_scroll_query.h +++ b/be/src/util/es_scroll_query.h @@ -29,24 +29,11 @@ class ESScrollQueryBuilder { ESScrollQueryBuilder(); ~ESScrollQueryBuilder(); // build the query DSL for elasticsearch - // std::string build(); - - - // void set_batch_size(uint16_t batch_size) { - // _size = batch_size; - // } - // void set_selected_fields(const std::vector& fields) { - // _fields = fields; - // } - static std::string build_next_scroll_body(const std::string& scroll_id, const std::string& scroll); static std::string build_clear_scroll_body(const std::string& scroll_id); + // @note: predicates should processed before pass it to this method, + // tie breaker for predicate wheather can push down es can reference the push-down filters static std::string build(const std::map& properties, - const std::vector& fields, - std::vector>); -// private: -// std::vector _fields; -// uint16_t _size; -// }; + const std::vector& fields, std::vector& predicates); }; } diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index ac87fbab2f3dfb..ab94d3cf2f814b 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -39,3 +39,4 @@ ADD_BE_TEST(arena_test) ADD_BE_TEST(aes_util_test) ADD_BE_TEST(md5_test) ADD_BE_TEST(es_scan_reader_test) +ADD_BE_TEST(es_query_builde_test) diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp new file mode 100644 index 00000000000000..6b700cf0c175aa --- /dev/null +++ b/be/test/util/es_query_builder_test.cpp @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include "common/logging.h" +#include "util/es_query_builder.h" +#include "rapidjson/document.h" +#include "exec/es_predicate.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "runtime/string_value.h" +namespace doris { + +class BooleanQueryBuilderTest : public testing::Test { +public: + BooleanQueryBuilderTest() { } + virtual ~BooleanQueryBuilderTest() { } +}; +TEST_F(BooleanQueryBuilderTest, term_query) { + // content = "wyf" + char str[] = "wyf"; + StringValue value(str, 3); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(3); + std::string name = "content"; + ExtBinaryPredicate* term_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::EQ, term_literal); + TermQueryBuilder term_query(term_predicate); + rapidjson::Document document; + rapidjson::Value term_value = term_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + term_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "term query" << actual_json; + ASSERT_STREQ("{\"term\":{\"content\":\"wyf\"}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, range_query) { + // k >= a + char str[] = "a"; + StringValue value(str, 1); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(1); + std::string name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::GE, term_literal); + RangeQueryBuilder range_query(range_predicate); + rapidjson::Document document; + rapidjson::Value range_value = range_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + range_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "range query" << actual_json; + ASSERT_STREQ("{\"range\":{\"k\":{\"ge\":\"a\"}}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, es_query) { + // esquery('random', "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") + char str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int length = (int)strlen(str); + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); + std::string name = "random"; + ExtColumnDesc col_des(name, type_desc); + std::vector cols = {col_des}; + StringValue value(str, length); + ExtLiteral term_literal(TYPE_VARCHAR, &value); + std::vector values = {term_literal}; + std::string function_name = "es_query"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, cols, values); + ESQueryBuilder es_query(function_predicate); + rapidjson::Document document; + rapidjson::Value es_query_value = es_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + es_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "es query" << actual_json; + ASSERT_STREQ("{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, like_query) { + // content like 'a%e%g_' + char str[] = "a%e%g_"; + int length = (int)strlen(str); + LOG(INFO) << "length " << length; + TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); + StringValue value(str, length); + ExtLiteral like_literal(TYPE_VARCHAR, &value); + std::string name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, name, type_desc, like_literal); + WildCardQueryBuilder like_query(like_predicate); + rapidjson::Document document; + rapidjson::Value like_query_value = like_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + like_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + // LOG(INFO) << "wildcard query" << actual_json; + ASSERT_STREQ("{\"wildcard\":{\"content\":\"a*e*g?\"}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, terms_in_query) { + // dv in ["2.0", "4.0", "8.0"] + std::string terms_in_field = "dv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "2.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "4.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + char value_3[] = "8.0"; + int value_3_length = (int)strlen(value_3); + StringValue string_value_3(value_3, value_3_length); + ExtLiteral term_literal_3(TYPE_VARCHAR, &string_value_3); + + std::vector terms_values = {term_literal_1, term_literal_2, term_literal_3}; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); + TermsInSetQueryBuilder terms_query(in_predicate); + rapidjson::Document document; + rapidjson::Value in_query_value = terms_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + in_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "terms in sets query" << actual_json; + ASSERT_STREQ("{\"terms\":{\"dv\":[\"2.0\",\"4.0\",\"8.0\"]}}", actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, match_all_query) { + // match all docs + MatchAllQueryBuilder match_all_query; + rapidjson::Document document; + rapidjson::Value match_all_query_value = match_all_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + match_all_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + //LOG(INFO) << "match all query" << actual_json; + ASSERT_STREQ("{\"match_all\":{}}", actual_json.c_str()); +} + + +TEST_F(BooleanQueryBuilderTest, bool_query) { + // content like 'a%e%g_' + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + // esquery("random", "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "es_query"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + // k >= a + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + // content = "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::EQ, term_literal); + + // content like 'a%e%g_' or k >= a or content = "wyf" + std::vector or_predicates = {like_predicate, function_predicate, range_predicate, term_predicate}; + BooleanQueryBuilder bool_query(or_predicates); + rapidjson::Document document; + rapidjson::Value bool_query_value = bool_query.to_json(document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + bool_query_value.Accept(writer); + std::string actual_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}},{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}},{\"range\":{\"k\":{\"ge\":\"a\"}}},{\"term\":{\"content\":\"wyf\"}}]}}"; + //LOG(INFO) << "bool query" << actual_json; + ASSERT_STREQ(expected_json.c_str(), actual_json.c_str()); +} + +TEST_F(BooleanQueryBuilderTest, compound_bool_query) { + // content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}') + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "es_query"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + std::vector bool_predicates_1 = {like_predicate, function_predicate}; + EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); + + // k >= "a" + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + + std::vector bool_predicates_2 = {range_predicate}; + EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); + + // content != "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_ne_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::NE, term_literal); + std::vector bool_predicates_3 = {term_ne_predicate}; + EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); + + // fv not in [8.0, 16.0] + std::string terms_in_field = "fv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "8.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "16.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + std::vector terms_values = {term_literal_1, term_literal_2}; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); + in_predicate->is_not_in = true; + std::vector bool_predicates_4 = {in_predicate}; + EsPredicate* bool_predicate_4 = new EsPredicate(bool_predicates_4); + + // (content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}')) and content != "wyf" and fv not in [8.0, 16.0] + std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3, bool_predicate_4}; + + rapidjson::Document document; + rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(and_bool_predicates); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + compound_bool_value.Accept(writer); + std::string actual_bool_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}},{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}]}},{\"bool\":{\"should\":[{\"range\":{\"k\":{\"ge\":\"a\"}}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"term\":{\"content\":\"wyf\"}}]}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[{\"terms\":{\"fv\":[\"8.0\",\"16.0\"]}}]}}]}}]}}"; + //LOG(INFO) << "compound bool query" << actual_bool_json; + ASSERT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); +} +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From db1434feb8932c84b43dcc6d7a3b92fd4d12c18c Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 1 Apr 2019 15:57:38 +0800 Subject: [PATCH 15/73] Add virtual destructor and delete unused variable --- be/src/util/es_query_builder.cpp | 1 - be/src/util/es_query_builder.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index d9c1760fb16ab5..c075f27143fcc4 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -302,7 +302,6 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates) { rapidjson::Document root; - rapidjson::Document::AllocatorType &allocator = root.GetAllocator(); root.SetObject(); BooleanQueryBuilder *bool_query = new BooleanQueryBuilder(); for (auto es_predicate : predicates) { diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index 50402abc8ab617..53af63fde3d08b 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -26,6 +26,8 @@ class QueryBuilder { public: virtual rapidjson::Value to_json(rapidjson::Document& allocator) = 0; + virtual ~QueryBuilder() { + }; }; // process esquery(fieldA, json dsl) function From 34bb6a3338ec68bc09c88b05ec388305f26e7612 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 1 Apr 2019 16:26:58 +0800 Subject: [PATCH 16/73] Bug-fix for empty predicates --- be/src/util/es_query_builder.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index c075f27143fcc4..6909e00c24dfa1 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -302,6 +302,10 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates) { rapidjson::Document root; + if (predicates.size() == 0) { + MatchAllQueryBuilder match_all_query; + return match_all_query.to_json(root) + } root.SetObject(); BooleanQueryBuilder *bool_query = new BooleanQueryBuilder(); for (auto es_predicate : predicates) { From 1bb8ad598c804d6e20cf849dd5dc724c1467923e Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 1 Apr 2019 19:39:08 +0800 Subject: [PATCH 17/73] Fix some issues for predicate --- be/src/exec/es_http_scan_node.cpp | 8 ++++++-- be/src/exec/es_http_scan_node.h | 2 +- be/src/exec/es_predicate.cpp | 26 +++++++++++++------------- be/src/exec/es_predicate.h | 8 ++++---- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index eca40a74047393..b1df4001668c5b 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -85,8 +85,7 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { // build predicate void EsHttpScanNode::build_conjuncts_list() { for (int i = 0; i < _conjunct_ctxs.size(); ++i) { - std::shared_ptr predicate( - new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); + EsPredicate* predicate = new EsPredicate(_conjunct_ctxs[i], _tuple_desc); if (predicate->build_disjuncts_list()) { _predicates.push_back(predicate); _predicate_to_conjunct.push_back(i); @@ -229,6 +228,11 @@ Status EsHttpScanNode::close(RuntimeState* state) { _batch_queue.clear(); + for(int i=0; i < _predicates.size(); i++) { + delete _predicates[i]; + } + _predicates.clear(); + return ExecNode::close(state); } diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index b80bf263882e56..1779550faa62d8 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -108,7 +108,7 @@ class EsHttpScanNode : public ScanNode { std::condition_variable _queue_reader_cond; std::condition_variable _queue_writer_cond; std::deque> _batch_queue; - std::vector> _predicates; + std::vector _predicates; std::vector _predicate_to_conjunct; }; diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index ff59c961e40c68..39bb0b288706ec 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -52,42 +52,42 @@ ExtLiteral::~ExtLiteral(){ } int8_t ExtLiteral::to_byte() { - DCHECK(_type != TYPE_TINYINT); + DCHECK(_type == TYPE_TINYINT); return *(reinterpret_cast(_value)); } int16_t ExtLiteral::to_short() { - DCHECK(_type != TYPE_SMALLINT); + DCHECK(_type == TYPE_SMALLINT); return *(reinterpret_cast(_value)); } int32_t ExtLiteral::to_int() { - DCHECK(_type != TYPE_INT); + DCHECK(_type == TYPE_INT); return *(reinterpret_cast(_value)); } int64_t ExtLiteral::to_long() { - DCHECK(_type != TYPE_BIGINT); + DCHECK(_type == TYPE_BIGINT); return *(reinterpret_cast(_value)); } float ExtLiteral::to_float() { - DCHECK(_type != TYPE_FLOAT); + DCHECK(_type == TYPE_FLOAT); return *(reinterpret_cast(_value)); } double ExtLiteral::to_double() { - DCHECK(_type != TYPE_DOUBLE); + DCHECK(_type == TYPE_DOUBLE); return *(reinterpret_cast(_value)); } std::string ExtLiteral::to_string() { - DCHECK(_type != TYPE_VARCHAR && _type != TYPE_CHAR); + DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR); return (reinterpret_cast(_value))->to_string(); } std::string ExtLiteral::to_date_string() { - DCHECK(_type != TYPE_DATE && _type != TYPE_DATETIME); + DCHECK(_type == TYPE_DATE || _type == TYPE_DATETIME); DateTimeValue date_value = *reinterpret_cast(_value); char str[MAX_DTVALUE_STR_LEN]; date_value.to_string(str); @@ -95,22 +95,22 @@ std::string ExtLiteral::to_date_string() { } bool ExtLiteral::to_bool() { - DCHECK(_type != TYPE_BOOLEAN); + DCHECK(_type == TYPE_BOOLEAN); return *(reinterpret_cast(_value)); } std::string ExtLiteral::to_decimal_string() { - DCHECK(_type != TYPE_DECIMAL); + DCHECK(_type == TYPE_DECIMAL); return reinterpret_cast(_value)->to_string(); } std::string ExtLiteral::to_decimalv2_string() { - DCHECK(_type != TYPE_DECIMALV2); + DCHECK(_type == TYPE_DECIMALV2); return reinterpret_cast(_value)->to_string(); } std::string ExtLiteral::to_largeint_string() { - DCHECK(_type != TYPE_LARGEINT); + DCHECK(_type == TYPE_LARGEINT); return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); } @@ -180,7 +180,7 @@ bool EsPredicate::build_disjuncts_list() { return build_disjuncts_list(_context->root(), _disjuncts); } -vector EsPredicate::get_predicate_list(){ +const vector& EsPredicate::get_predicate_list(){ return _disjuncts; } diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 1a4400c9d022aa..8b975809aecb50 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -70,8 +70,8 @@ struct ExtColumnDesc { type(type) { } - const std::string& name; - const TypeDescriptor& type; + std::string name; + TypeDescriptor type; }; struct ExtPredicate { @@ -96,7 +96,7 @@ struct ExtBinaryPredicate : public ExtPredicate { ExtColumnDesc col; TExprOpcode::type op; - const ExtLiteral& value; + ExtLiteral value; }; struct ExtInPredicate : public ExtPredicate { @@ -167,7 +167,7 @@ class EsPredicate { public: EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); ~EsPredicate(); - std::vector get_predicate_list(); + const std::vector& get_predicate_list(); bool build_disjuncts_list(); // public for tests EsPredicate(std::vector& all_predicates) { From 2331dd22cbe878858e69eafb830546ed68f87d2b Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 1 Apr 2019 19:43:21 +0800 Subject: [PATCH 18/73] Change local used variable --- be/src/util/es_query_builder.cpp | 5 ++--- be/src/util/es_query_builder.h | 2 +- be/src/util/es_scroll_query.cpp | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 6909e00c24dfa1..3f3ba8ebe5b00b 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -243,7 +243,7 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi } case TExprNodeType::FUNCTION_CALL: { ExtFunction* function_predicate = (ExtFunction *)predicate; - if ("es_query" == function_predicate->func_name ) { + if ("esquery" == function_predicate->func_name ) { ESQueryBuilder* es_query = new ESQueryBuilder(function_predicate); _should_clauses.push_back(es_query); }; @@ -300,8 +300,7 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { _must_not_clauses.push_back(filter); } -rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates) { - rapidjson::Document root; +rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document& root) { if (predicates.size() == 0) { MatchAllQueryBuilder match_all_query; return match_all_query.to_json(root) diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index 53af63fde3d08b..f50848d0bb3e82 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -105,7 +105,7 @@ class BooleanQueryBuilder : public QueryBuilder { void must(QueryBuilder* filter); void must_not(QueryBuilder* filter); // class method for transfer predicate to es query value, invoker should enclose this value with `query` - static rapidjson::Value to_query(const std::vector& predicates); + static rapidjson::Value to_query(const std::vector& predicates, rapidjson::Document& root); private: std::vector _must_clauses; diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index 7dbbdb56daafcf..3f268c532fbba4 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -67,7 +67,8 @@ std::string ESScrollQueryBuilder::build(const std::map rapidjson::Document::AllocatorType &allocator = es_query_dsl.GetAllocator(); es_query_dsl.SetObject(); // generate the filter caluse - rapidjson::Value query_node = BooleanQueryBuilder::to_query(predicates); + rapidjson::Document scratch_document; + rapidjson::Value query_node = BooleanQueryBuilder::to_query(predicates, scratch_document); // note: add `query` for this value.... es_query_dsl.AddMember("query", query_node, allocator); // just filter the selected fields for reducing the network cost From 85543e21eee6cf85218752c7663e74ec2acb0afa Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 1 Apr 2019 19:49:29 +0800 Subject: [PATCH 19/73] Bug-fix: add semicolon for builder --- be/src/util/es_query_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 3f3ba8ebe5b00b..b34e5abd2f70b3 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -303,7 +303,7 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document& root) { if (predicates.size() == 0) { MatchAllQueryBuilder match_all_query; - return match_all_query.to_json(root) + return match_all_query.to_json(root); } root.SetObject(); BooleanQueryBuilder *bool_query = new BooleanQueryBuilder(); From be7748f7faa290ed2249ece1b0562ed3fb691651 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Tue, 2 Apr 2019 07:42:48 +0800 Subject: [PATCH 20/73] Modify UT and add comment to builder --- be/src/util/es_query_builder.cpp | 4 +++- be/test/util/es_scan_reader_test.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index b34e5abd2f70b3..814295956e39c4 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -40,10 +40,11 @@ rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& docuemnt) { //{ "term": { "dv": "2" } } if (!draft.HasParseError()) { for (rapidjson::Value::ConstMemberIterator itr = draft.MemberBegin(); itr != draft.MemberEnd(); itr++) { + // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue query_key.CopyFrom(itr->name, draft_allocator); query_value.CopyFrom(itr->value, draft_allocator); if (query_key.IsString()) { - // if we found one key, then we should end loop + // if we found one key, then end loop as QueryDSL only support one `query` root break; } } @@ -51,6 +52,7 @@ rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& docuemnt) { rapidjson::Document::AllocatorType& allocator = docuemnt.GetAllocator(); rapidjson::Value es_query(rapidjson::kObjectType); es_query.SetObject(); + // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics es_query.AddMember(query_key, query_value, allocator); return es_query; } diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index f0159efc924d2f..06b194d13ca4d2 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -219,7 +219,7 @@ TEST_F(MockESServerTest, workflow) { props[ESScanReader::KEY_PASS_WORD] = "root"; props[ESScanReader::KEY_SHARD] = "0"; props[ESScanReader::KEY_BATCH_SIZE] = "1"; - std::vector> predicates; + std::vector predicates; props[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(props, fields, predicates); ESScanReader reader(target, props); auto st = reader.open(); From 6b56de6ef97fd33d450bdcbe0900345843454c62 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Tue, 2 Apr 2019 10:39:04 +0800 Subject: [PATCH 21/73] Bug-fix for CMake --- be/test/util/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index ab94d3cf2f814b..bff2fb9b1eb0ac 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -39,4 +39,4 @@ ADD_BE_TEST(arena_test) ADD_BE_TEST(aes_util_test) ADD_BE_TEST(md5_test) ADD_BE_TEST(es_scan_reader_test) -ADD_BE_TEST(es_query_builde_test) +ADD_BE_TEST(es_query_builder_test) From 1510741917ce9085dda9eab380dd65a27e3a9d8c Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Tue, 2 Apr 2019 11:25:30 +0800 Subject: [PATCH 22/73] Replace es_query with esquery --- be/test/util/es_query_builder_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index 6b700cf0c175aa..262d3447ec0beb 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -82,7 +82,7 @@ TEST_F(BooleanQueryBuilderTest, es_query) { StringValue value(str, length); ExtLiteral term_literal(TYPE_VARCHAR, &value); std::vector values = {term_literal}; - std::string function_name = "es_query"; + std::string function_name = "esquery"; ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, cols, values); ESQueryBuilder es_query(function_predicate); rapidjson::Document document; @@ -184,7 +184,7 @@ TEST_F(BooleanQueryBuilderTest, bool_query) { StringValue es_query_value(es_query_str, es_query_length); ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); std::vector es_query_values = {es_query_term_literal}; - std::string function_name = "es_query"; + std::string function_name = "esquery"; ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); // k >= a char range_value_str[] = "a"; @@ -237,7 +237,7 @@ TEST_F(BooleanQueryBuilderTest, compound_bool_query) { StringValue es_query_value(es_query_str, es_query_length); ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); std::vector es_query_values = {es_query_term_literal}; - std::string function_name = "es_query"; + std::string function_name = "esquery"; ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); std::vector bool_predicates_1 = {like_predicate, function_predicate}; EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); From 06bda8e8e68eee10aec263c5413e10f8711a97a1 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 3 Apr 2019 10:29:30 +0800 Subject: [PATCH 23/73] Change to pass predicates --- be/src/exec/es_predicate.cpp | 120 ++++++++++++------------- be/src/exec/es_predicate.h | 34 +++---- be/src/util/es_query_builder.cpp | 33 ++++--- be/src/util/es_scan_reader.cpp | 4 +- be/src/util/es_scroll_parser.cpp | 57 ++++++------ be/src/util/es_scroll_parser.h | 10 +-- be/test/util/es_query_builder_test.cpp | 2 +- 7 files changed, 128 insertions(+), 132 deletions(-) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 39bb0b288706ec..8bb6465688d860 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -48,45 +48,93 @@ namespace doris { using namespace std; +std::string ExtLiteral::value_to_string() { + std::stringstream ss; + switch (_type) { + case TYPE_TINYINT: + ss << get_byte(); + break; + case TYPE_SMALLINT: + ss << get_short(); + break; + case TYPE_INT: + ss << get_int(); + break; + case TYPE_BIGINT: + ss << get_long(); + break; + case TYPE_FLOAT: + ss << get_float(); + break; + case TYPE_DOUBLE: + ss << get_double(); + break; + case TYPE_CHAR: + case TYPE_VARCHAR: + ss << get_string(); + break; + case TYPE_DATE: + case TYPE_DATETIME: + ss << get_date_string(); + break; + case TYPE_BOOLEAN: + ss << get_bool(); + break; + case TYPE_DECIMAL: + ss << get_decimal_string(); + break; + case TYPE_DECIMALV2: + ss << get_decimalv2_string(); + break; + case TYPE_LARGEINT: + ss << get_largeint_string(); + break; + default: + DCHECK(false); + break; + } + return ss.str(); +} + ExtLiteral::~ExtLiteral(){ } -int8_t ExtLiteral::to_byte() { +int8_t ExtLiteral::get_byte() { DCHECK(_type == TYPE_TINYINT); return *(reinterpret_cast(_value)); } -int16_t ExtLiteral::to_short() { +int16_t ExtLiteral::get_short() { DCHECK(_type == TYPE_SMALLINT); return *(reinterpret_cast(_value)); } -int32_t ExtLiteral::to_int() { +int32_t ExtLiteral::get_int() { DCHECK(_type == TYPE_INT); return *(reinterpret_cast(_value)); } -int64_t ExtLiteral::to_long() { +int64_t ExtLiteral::get_long() { DCHECK(_type == TYPE_BIGINT); return *(reinterpret_cast(_value)); } -float ExtLiteral::to_float() { +float ExtLiteral::get_float() { DCHECK(_type == TYPE_FLOAT); return *(reinterpret_cast(_value)); } -double ExtLiteral::to_double() { +double ExtLiteral::get_double() { DCHECK(_type == TYPE_DOUBLE); return *(reinterpret_cast(_value)); } -std::string ExtLiteral::to_string() { +std::string ExtLiteral::get_string() { DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR); return (reinterpret_cast(_value))->to_string(); } -std::string ExtLiteral::to_date_string() { +std::string ExtLiteral::get_date_string() { DCHECK(_type == TYPE_DATE || _type == TYPE_DATETIME); DateTimeValue date_value = *reinterpret_cast(_value); char str[MAX_DTVALUE_STR_LEN]; @@ -94,74 +142,26 @@ std::string ExtLiteral::to_date_string() { return std::string(str, strlen(str)); } -bool ExtLiteral::to_bool() { +bool ExtLiteral::get_bool() { DCHECK(_type == TYPE_BOOLEAN); return *(reinterpret_cast(_value)); } -std::string ExtLiteral::to_decimal_string() { +std::string ExtLiteral::get_decimal_string() { DCHECK(_type == TYPE_DECIMAL); return reinterpret_cast(_value)->to_string(); } -std::string ExtLiteral::to_decimalv2_string() { +std::string ExtLiteral::get_decimalv2_string() { DCHECK(_type == TYPE_DECIMALV2); return reinterpret_cast(_value)->to_string(); } -std::string ExtLiteral::to_largeint_string() { +std::string ExtLiteral::get_largeint_string() { DCHECK(_type == TYPE_LARGEINT); return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); } -std::string ExtLiteral::value_to_string() { - std::stringstream ss; - switch (_type) { - case TYPE_TINYINT: - ss << to_byte(); - break; - case TYPE_SMALLINT: - ss << to_short(); - break; - case TYPE_INT: - ss << to_int(); - break; - case TYPE_BIGINT: - ss << to_long(); - break; - case TYPE_FLOAT: - ss << to_float(); - break; - case TYPE_DOUBLE: - ss << to_double(); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - ss << to_string(); - break; - case TYPE_DATE: - case TYPE_DATETIME: - ss << to_date_string(); - break; - case TYPE_BOOLEAN: - ss << to_bool(); - break; - case TYPE_DECIMAL: - ss << to_decimal_string(); - break; - case TYPE_DECIMALV2: - ss << to_decimalv2_string(); - break; - case TYPE_LARGEINT: - ss << to_largeint_string(); - break; - default: - DCHECK(false); - break; - } - return ss.str(); -} - EsPredicate::EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc) : _context(context), diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 8b975809aecb50..901413d4b4a23e 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -40,28 +40,32 @@ class ExtLiteral { ExtLiteral(PrimitiveType type, void *value) : _type(type), _value(value) { + _str = value_to_string(); } ~ExtLiteral(); + const std::string& to_string() { + return _str; + } - int8_t to_byte(); - int16_t to_short(); - int32_t to_int(); - int64_t to_long(); - float to_float(); - double to_double(); - std::string to_string(); - std::string to_date_string(); - bool to_bool(); - std::string to_decimal_string(); - std::string to_decimalv2_string(); - std::string to_largeint_string(); + private: + int8_t get_byte(); + int16_t get_short(); + int32_t get_int(); + int64_t get_long(); + float get_float(); + double get_double(); + std::string get_string(); + std::string get_date_string(); + bool get_bool(); + std::string get_decimal_string(); + std::string get_decimalv2_string(); + std::string get_largeint_string(); std::string value_to_string(); - private: - PrimitiveType _type; - void *_value; + void* _value; + std::string _str; }; struct ExtColumnDesc { diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 814295956e39c4..8dec3bba62f656 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -28,28 +28,27 @@ ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str( } ESQueryBuilder::ESQueryBuilder(ExtFunction* es_query) { auto first = es_query->values.front(); - _es_query_str = first.value_to_string(); + _es_query_str = first.to_string(); } -rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& docuemnt) { +rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& document) { rapidjson::Document draft; draft.Parse<0>(_es_query_str.c_str()); - rapidjson::Document::AllocatorType& draft_allocator = draft.GetAllocator(); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); rapidjson::Value query_key; rapidjson::Value query_value; //{ "term": { "dv": "2" } } if (!draft.HasParseError()) { for (rapidjson::Value::ConstMemberIterator itr = draft.MemberBegin(); itr != draft.MemberEnd(); itr++) { // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue - query_key.CopyFrom(itr->name, draft_allocator); - query_value.CopyFrom(itr->value, draft_allocator); + query_key.CopyFrom(itr->name, allocator); + query_value.CopyFrom(itr->value, allocator); if (query_key.IsString()) { // if we found one key, then end loop as QueryDSL only support one `query` root break; } } } - rapidjson::Document::AllocatorType& allocator = docuemnt.GetAllocator(); rapidjson::Value es_query(rapidjson::kObjectType); es_query.SetObject(); // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics @@ -70,7 +69,7 @@ rapidjson::Value WildCardQueryBuilder::to_json(rapidjson::Document& document) { } WildCardQueryBuilder::WildCardQueryBuilder(ExtLikePredicate* like_predicate) { - _like_value = like_predicate->value.value_to_string(); + _like_value = like_predicate->value.to_string(); std::replace(_like_value.begin(), _like_value.end(), '_', '?'); std::replace(_like_value.begin(), _like_value.end(), '%', '*'); _field = like_predicate->col.name; @@ -83,11 +82,11 @@ TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& TermQueryBuilder::TermQueryBuilder(ExtBinaryPredicate* binary_predicate) { _field = binary_predicate->col.name; ExtLiteral literal = binary_predicate->value; - _term = literal.value_to_string(); + _term = literal.to_string(); } -rapidjson::Value TermQueryBuilder::to_json(rapidjson::Document& docuemnt) { - rapidjson::Document::AllocatorType& allocator = docuemnt.GetAllocator(); +rapidjson::Value TermQueryBuilder::to_json(rapidjson::Document& document) { + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); rapidjson::Value term_node(rapidjson::kObjectType); term_node.SetObject(); rapidjson::Value field_value(_field.c_str(), allocator); @@ -105,7 +104,7 @@ rapidjson::Value TermsInSetQueryBuilder::to_json(rapidjson::Document& document) rapidjson::Value terms_node(rapidjson::kObjectType); rapidjson::Value values_node(rapidjson::kArrayType); for (auto value : _in_predicate->values) { - rapidjson::Value value_value(value.value_to_string().c_str(), allocator); + rapidjson::Value value_value(value.to_string().c_str(), allocator); values_node.PushBack(value_value, allocator); } rapidjson::Value field_value(field.c_str(), allocator); @@ -125,7 +124,7 @@ rapidjson::Value RangeQueryBuilder::to_json(rapidjson::Document& document) { rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); rapidjson::Value field_value(field.c_str(), allocator); ExtLiteral b_value = _range_predicate->value; - rapidjson::Value value(b_value.value_to_string().c_str(), allocator); + rapidjson::Value value(b_value.to_string().c_str(), allocator); rapidjson::Value op_node(rapidjson::kObjectType); op_node.SetObject(); switch (_range_predicate->op) @@ -257,13 +256,13 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi } } -rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& docuemnt) { - rapidjson::Document::AllocatorType &allocator = docuemnt.GetAllocator(); +rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& document) { + rapidjson::Document::AllocatorType &allocator = document.GetAllocator(); rapidjson::Value root_node_object(rapidjson::kObjectType); if (_filter_clauses.size() > 0) { rapidjson::Value filter_node(rapidjson::kArrayType); for (auto must_clause : _filter_clauses) { - filter_node.PushBack(must_clause->to_json(docuemnt), allocator); + filter_node.PushBack(must_clause->to_json(document), allocator); } root_node_object.AddMember("filter", filter_node, allocator); } @@ -271,7 +270,7 @@ rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& docuemnt) { if (_should_clauses.size() > 0) { rapidjson::Value should_node(rapidjson::kArrayType); for (auto should_clause : _should_clauses) { - should_node.PushBack(should_clause->to_json(docuemnt), allocator); + should_node.PushBack(should_clause->to_json(document), allocator); } root_node_object.AddMember("should", should_node, allocator); } @@ -279,7 +278,7 @@ rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& docuemnt) { if (_must_not_clauses.size() > 0) { rapidjson::Value must_not_node(rapidjson::kArrayType); for (auto must_not_clause : _must_not_clauses) { - must_not_node.PushBack(must_not_clause->to_json(docuemnt), allocator); + must_not_node.PushBack(must_not_clause->to_json(document), allocator); } root_node_object.AddMember("must_not", must_not_node, allocator); } diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index bfc4fed045e2fd..b4d0236dc5a585 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -110,8 +110,7 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { } } - scroll_parser = ScrollParser::parse_from_string(response); - _scroll_id = scroll_parser->get_scroll_id(); + scroll_parser = new ScrollParser(response); // maybe the index or shard is empty if (scroll_parser == nullptr || scroll_parser->get_total() == 0) { @@ -125,6 +124,7 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { _eos = false; } + _scroll_id = scroll_parser->get_scroll_id(); *parser = scroll_parser; *scan_eos = false; return Status::OK; diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 7d92cc86688c7a..3ab06455dcb640 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -40,49 +40,45 @@ static const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " "$1 bytes for $2."; -ScrollParser::ScrollParser(std::string scroll_id, int total, int size) : - _scroll_id(scroll_id), - _total(total), - _size(size), +ScrollParser::ScrollParser(const std::string& scroll_result) : + _scroll_id(""), + _total(0), + _size(0), _line_index(0) { + parsing(scroll_result); } ScrollParser::~ScrollParser() { } +void ScrollParser::parsing(const std::string scroll_result) { + _document_node.Parse(scroll_result.c_str()); -ScrollParser* ScrollParser::parse_from_string(const std::string& scroll_result) { - ScrollParser* scroll_parser = nullptr; - rapidjson::Document document_node; - document_node.Parse<0>(scroll_result.c_str()); - - if (!document_node.HasMember(FIELD_SCROLL_ID)) { + if (!_document_node.HasMember(FIELD_SCROLL_ID)) { LOG(ERROR) << "maybe not a scroll request"; - return nullptr; + return; } - rapidjson::Value &scroll_node = document_node[FIELD_SCROLL_ID]; - std::string scroll_id = scroll_node.GetString(); + const rapidjson::Value &scroll_node = _document_node[FIELD_SCROLL_ID]; + _scroll_id = scroll_node.GetString(); // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} - rapidjson::Value &outer_hits_node = document_node[FIELD_HITS]; - rapidjson::Value &field_total = outer_hits_node[FIELD_TOTAL]; - int total = field_total.GetInt(); - if (total == 0) { - scroll_parser = new ScrollParser(scroll_id, total); - return scroll_parser; + const rapidjson::Value &outer_hits_node = _document_node[FIELD_HITS]; + const rapidjson::Value &field_total = outer_hits_node[FIELD_TOTAL]; + _total = field_total.GetInt(); + if (_total == 0) { + return; } - VLOG(1) << "es_scan_reader total hits: " << total << " documents"; - rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; + VLOG(1) << "es_scan_reader total hits: " << _total << " documents"; + const rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; if (!inner_hits_node.IsArray()) { LOG(ERROR) << "maybe not a scroll request"; - return nullptr; + return; } - int size = inner_hits_node.Size(); - scroll_parser = new ScrollParser(scroll_id, total, size); - scroll_parser->set_inner_hits_node(inner_hits_node); - return scroll_parser; + rapidjson::Document::AllocatorType& a = _document_node.GetAllocator(); + _inner_hits_node.CopyFrom(inner_hits_node, a); + _size = _inner_hits_node.Size(); } int ScrollParser::get_size() { @@ -104,8 +100,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, return Status::OK; } - rapidjson::Value& obj = _inner_hits_node[_line_index++]; - rapidjson::Value& line = obj[FIELD_SOURCE]; + const rapidjson::Value& obj = _inner_hits_node[_line_index++]; + const rapidjson::Value& line = obj[FIELD_SOURCE]; if (!line.IsObject()) { return Status("Parse inner hits failed"); } @@ -118,7 +114,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, continue; } - const char* col_name = slot_desc->col_name().c_str(); + std::string s(slot_desc->col_name()); + const char* col_name = s.c_str(); rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); if (itr == line.MemberEnd()) { tuple->set_null(slot_desc->null_indicator_offset()); @@ -126,7 +123,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } tuple->set_not_null(slot_desc->null_indicator_offset()); - rapidjson::Value &col = line[col_name]; + const rapidjson::Value &col = line[col_name]; void* slot = tuple->get_slot(slot_desc->tuple_offset()); switch (slot_desc->type().type) { diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index 53e207c2e4fe6a..820aeaa94e8979 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -30,29 +30,25 @@ class Status; class ScrollParser { public: - ScrollParser(std::string scroll_id, int total, int size = 0); + ScrollParser(const std::string& scroll_result); ~ScrollParser(); - static ScrollParser* parse_from_string(const std::string& scroll_result); - Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, MemPool* mem_pool, bool* line_eof); - void set_inner_hits_node(rapidjson::Value& inner_hits_node) { - _inner_hits_node = inner_hits_node; - } - const std::string& get_scroll_id(); int get_total(); int get_size(); private: + void parsing(const std::string scroll_result); std::string _scroll_id; int _total; int _size; rapidjson::SizeType _line_index; + rapidjson::Document _document_node; rapidjson::Value _inner_hits_node; }; } diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index 262d3447ec0beb..b53ec1789d484b 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -290,7 +290,7 @@ TEST_F(BooleanQueryBuilderTest, compound_bool_query) { std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3, bool_predicate_4}; rapidjson::Document document; - rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(and_bool_predicates); + rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(and_bool_predicates, document); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); compound_bool_value.Accept(writer); From 2d4a78493d82c1ede6025f73edcecc04b625915b Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Wed, 3 Apr 2019 20:27:46 +0800 Subject: [PATCH 24/73] Modify doris-fe support http transport mode --- be/src/util/es_query_builder.cpp | 23 +- .../org/apache/doris/catalog/EsTable.java | 33 ++- .../apache/doris/external/EsIndexState.java | 22 ++ .../apache/doris/external/EsMajorVersion.java | 103 +++++++++ .../org/apache/doris/external/EsNodeInfo.java | 205 ++++++++++++++++++ .../apache/doris/external/EsRestClient.java | 109 ++++++++++ .../apache/doris/external/EsShardRouting.java | 24 +- .../apache/doris/external/EsStateStore.java | 6 + .../apache/doris/external/EsTableState.java | 18 ++ .../org/apache/doris/planner/EsScanNode.java | 15 +- .../org/apache/doris/es/EsRestClientTest.java | 4 + 11 files changed, 539 insertions(+), 23 deletions(-) create mode 100644 fe/src/main/java/org/apache/doris/external/EsMajorVersion.java create mode 100644 fe/src/main/java/org/apache/doris/external/EsNodeInfo.java create mode 100644 fe/src/main/java/org/apache/doris/external/EsRestClient.java create mode 100644 fe/src/test/java/org/apache/doris/es/EsRestClientTest.java diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 8dec3bba62f656..762f7021f1d6ee 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -14,8 +14,9 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#include #include "util/es_query_builder.h" + +#include #include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" @@ -42,18 +43,18 @@ rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& document) { for (rapidjson::Value::ConstMemberIterator itr = draft.MemberBegin(); itr != draft.MemberEnd(); itr++) { // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue query_key.CopyFrom(itr->name, allocator); - query_value.CopyFrom(itr->value, allocator); if (query_key.IsString()) { // if we found one key, then end loop as QueryDSL only support one `query` root - break; + query_value.CopyFrom(itr->value, allocator); + rapidjson::Value es_query(rapidjson::kObjectType); + es_query.SetObject(); + // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics + es_query.AddMember(query_key, query_value, allocator); + return es_query; } } } - rapidjson::Value es_query(rapidjson::kObjectType); - es_query.SetObject(); - // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics - es_query.AddMember(query_key, query_value, allocator); - return es_query; + return nullptr; } rapidjson::Value WildCardQueryBuilder::to_json(rapidjson::Document& document) { rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); @@ -307,13 +308,13 @@ rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& return match_all_query.to_json(root); } root.SetObject(); - BooleanQueryBuilder *bool_query = new BooleanQueryBuilder(); + BooleanQueryBuilder bool_query; for (auto es_predicate : predicates) { vector or_predicates = es_predicate->get_predicate_list(); BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); - bool_query->must(inner_bool_query); + bool_query.must(inner_bool_query); } - rapidjson::Value root_value_node = bool_query->to_json(root); + rapidjson::Value root_value_node = bool_query.to_json(root); // root.AddMember("query", root_value_node, allocator); // rapidjson::StringBuffer buffer; // rapidjson::Writer writer(buffer); diff --git a/fe/src/main/java/org/apache/doris/catalog/EsTable.java b/fe/src/main/java/org/apache/doris/catalog/EsTable.java index fdcb7c843adfc5..e63fed6d20a6eb 100644 --- a/fe/src/main/java/org/apache/doris/catalog/EsTable.java +++ b/fe/src/main/java/org/apache/doris/catalog/EsTable.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.zip.Adler32; +import org.apache.doris.external.EsNodeInfo; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -39,6 +40,8 @@ import com.google.common.base.Strings; import com.google.common.collect.Maps; +import javax.xml.soap.Node; + public class EsTable extends Table { private static final Logger LOG = LogManager.getLogger(EsTable.class); @@ -47,6 +50,10 @@ public class EsTable extends Table { public static final String PASSWORD = "password"; public static final String INDEX = "index"; public static final String TYPE = "type"; + public static final String TRANSPORT = "transport"; + + public static final String TRANSPORT_HTTP = "http"; + public static final String TRANSPORT_THRIFT = "thrift"; private String hosts; private String[] seeds; @@ -54,6 +61,7 @@ public class EsTable extends Table { private String passwd = ""; private String indexName; private String mappingType = "_doc"; + private String transport = "http"; // only save the partition definition, save the partition key, // partition list is got from es cluster dynamically and is saved in esTableState private PartitionInfo partitionInfo; @@ -63,7 +71,7 @@ public EsTable() { super(TableType.ELASTICSEARCH); } - public EsTable(long id, String name, List schema, + public EsTable(long id, String name, List schema, Map properties, PartitionInfo partitionInfo) throws DdlException { super(id, name, TableType.ELASTICSEARCH, schema); @@ -85,7 +93,7 @@ private void validate(Map properties) throws DdlException { hosts = properties.get(HOSTS).trim(); seeds = hosts.split(","); - if (!Strings.isNullOrEmpty(properties.get(USER)) + if (!Strings.isNullOrEmpty(properties.get(USER)) && !Strings.isNullOrEmpty(properties.get(USER).trim())) { userName = properties.get(USER).trim(); } @@ -106,8 +114,16 @@ private void validate(Map properties) throws DdlException { && !Strings.isNullOrEmpty(properties.get(TYPE).trim())) { mappingType = properties.get(TYPE).trim(); } + if (!Strings.isNullOrEmpty(properties.get(TRANSPORT)) + && Strings.isNullOrEmpty(properties.get(TRANSPORT).trim())) { + transport = properties.get(TRANSPORT); + if (!(TRANSPORT_HTTP.equals(transport) || TRANSPORT_THRIFT.equals(transport))) { + throw new DdlException("transport of ES table must be http(recommend) or thrift(reserved inner usage)," + + " but value is " + transport); + } + } } - + public TTableDescriptor toThrift() { TEsTable tEsTable = new TEsTable(); TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.ES_TABLE, @@ -137,7 +153,8 @@ public int getSignature(int signatureVersion) { adler32.update(indexName.getBytes(charsetName)); // mysql table adler32.update(mappingType.getBytes(charsetName)); - + // transport + adler32.update(transport.getBytes(charsetName)); } catch (UnsupportedEncodingException e) { LOG.error("encoding error", e); return -1; @@ -156,6 +173,7 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, mappingType); Text.writeString(out, partitionInfo.getType().name()); partitionInfo.write(out); + Text.writeString(out, transport); } @Override @@ -175,12 +193,13 @@ public void readFields(DataInput in) throws IOException { } else { throw new IOException("invalid partition type: " + partType); } + transport = Text.readString(in); } public String getHosts() { return hosts; } - + public String[] getSeeds() { return seeds; } @@ -201,6 +220,10 @@ public String getMappingType() { return mappingType; } + public String getTransport() { + return transport; + } + public PartitionInfo getPartitionInfo() { return partitionInfo; } diff --git a/fe/src/main/java/org/apache/doris/external/EsIndexState.java b/fe/src/main/java/org/apache/doris/external/EsIndexState.java index bcb692511ae9ef..4a1201e70dbc33 100644 --- a/fe/src/main/java/org/apache/doris/external/EsIndexState.java +++ b/fe/src/main/java/org/apache/doris/external/EsIndexState.java @@ -19,6 +19,7 @@ import java.util.List; import java.util.Map; +import java.util.Random; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -55,6 +56,27 @@ public EsIndexState(String indexName) { this.partitionDesc = null; this.partitionKey = null; } + + + public void addHttpAddress(Map nodesInfo) { + for (Map.Entry> entry : shardRoutings.entrySet()) { + List shardRoutings = entry.getValue(); + for (EsShardRouting shardRouting : shardRoutings) { + String nodeId = shardRouting.getNodeId(); + if (nodesInfo.containsKey(nodeId)) { + shardRouting.setHttpAddress(nodesInfo.get(nodeId).getPublishAddress()); + } else { + shardRouting.setHttpAddress(randomAddress(nodesInfo)); + } + } + } + } + + public TNetworkAddress randomAddress(Map nodesInfo) { + int seed = new Random().nextInt() % nodesInfo.size(); + EsNodeInfo[] nodeInfos = (EsNodeInfo[]) nodesInfo.values().toArray(); + return nodeInfos[seed].getPublishAddress(); + } public static EsIndexState parseIndexStateV55(String indexName, JSONObject indicesRoutingMap, JSONObject nodesMap, diff --git a/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java b/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java new file mode 100644 index 00000000000000..b71db8ae0fd6b4 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsMajorVersion.java @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.external; + + +/** + * Elasticsearch major version information, useful to check client's query compatibility with the Rest API. + * + * reference es-hadoop: + * + */ +public class EsMajorVersion { + public static final EsMajorVersion V_5_X = new EsMajorVersion((byte) 5, "5.x"); + public static final EsMajorVersion V_6_X = new EsMajorVersion((byte) 6, "6.x"); + public static final EsMajorVersion V_7_X = new EsMajorVersion((byte) 7, "7.x"); + public static final EsMajorVersion LATEST = V_7_X; + + public final byte major; + private final String version; + + private EsMajorVersion(byte major, String version) { + this.major = major; + this.version = version; + } + + public boolean after(EsMajorVersion version) { + return version.major < major; + } + + public boolean on(EsMajorVersion version) { + return version.major == major; + } + + public boolean notOn(EsMajorVersion version) { + return !on(version); + } + + public boolean onOrAfter(EsMajorVersion version) { + return version.major <= major; + } + + public boolean before(EsMajorVersion version) { + return version.major > major; + } + + public boolean onOrBefore(EsMajorVersion version) { + return version.major >= major; + } + + public static EsMajorVersion parse(String version) throws Exception { + if (version.startsWith("5.")) { + return new EsMajorVersion((byte) 5, version); + } + if (version.startsWith("6.")) { + return new EsMajorVersion((byte) 6, version); + } + if (version.startsWith("7.")) { + return new EsMajorVersion((byte) 7, version); + } + throw new Exception("Unsupported/Unknown Elasticsearch version [" + version + "]." + + "Highest supported version is [" + LATEST.version + "]. You may need to upgrade ES-Hadoop."); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + EsMajorVersion version = (EsMajorVersion) o; + + return major == version.major && + version.equals(version.version); + } + + @Override + public int hashCode() { + return major; + } + + @Override + public String toString() { + return version; + } +} diff --git a/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java b/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java new file mode 100644 index 00000000000000..61b513bbfacd9e --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsNodeInfo.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.external; + +import org.apache.doris.thrift.TNetworkAddress; + +import java.util.List; +import java.util.Map; + +/** + * This class represents one node with the http and potential thrift publish address + */ +public class EsNodeInfo { + private final String id; + private final String name; + private final String host; + private final String ip; + private TNetworkAddress publishAddress; + private final boolean hasHttp; + private final boolean isClient; + private final boolean isData; + private final boolean isIngest; + private boolean hasThrift; + private TNetworkAddress thriftAddress; + + public EsNodeInfo(String id, Map map) throws Exception { + this.id = id; + EsMajorVersion version = EsMajorVersion.parse((String) map.get("version")); + this.name = (String) map.get("name"); + this.host = (String) map.get("host"); + this.ip = (String) map.get("ip"); + if (version.before(EsMajorVersion.V_5_X)) { + Map attributes = (Map) map.get("attributes"); + if (attributes == null) { + this.isClient = false; + this.isData = true; + } else { + String data = (String) attributes.get("data"); + this.isClient = data == null ? true : !Boolean.parseBoolean(data); + this.isData = data == null ? true : Boolean.parseBoolean(data); + } + this.isIngest = false; + } else { + List roles = (List) map.get("roles"); + this.isClient = roles.contains("data") == false; + this.isData = roles.contains("data"); + this.isIngest = roles.contains("ingest"); + } + Map httpMap = (Map) map.get("http"); + if (httpMap != null) { + String address = (String) httpMap.get("publish_address"); + if (address != null) { + String[] scratch = address.split(":"); + this.publishAddress = new TNetworkAddress(scratch[0], Integer.valueOf(scratch[1])); + this.hasHttp = true; + } else { + this.publishAddress = null; + this.hasHttp = false; + } + } else { + this.publishAddress = null; + this.hasHttp = false; + } + + Map attributesMap = (Map) map.get("attributes"); + if (attributesMap != null) { + String thriftPortStr = (String) attributesMap.get("thrift_port"); + if (thriftPortStr != null) { + try { + int thriftPort = Integer.valueOf(thriftPortStr); + hasThrift = true; + thriftAddress = new TNetworkAddress(this.ip, thriftPort); + } catch (Exception e) { + hasThrift = false; + } + } else { + hasThrift = false; + } + } else { + hasThrift = false; + } + } + + public boolean hasHttp() { + return hasHttp; + } + + public boolean isClient() { + return isClient; + } + + public boolean isData() { + return isData; + } + + public boolean isIngest() { + return isIngest; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + + public String getHost() { + return host; + } + + public TNetworkAddress getPublishAddress() { + return publishAddress; + } + + public boolean isHasThrift() { + return hasThrift; + } + + public TNetworkAddress getThriftAddress() { + return thriftAddress; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + EsNodeInfo nodeInfo = (EsNodeInfo) o; + + if (hasHttp != nodeInfo.hasHttp) { + return false; + } + if (isClient != nodeInfo.isClient) { + return false; + } + if (isData != nodeInfo.isData) { + return false; + } + if (!id.equals(nodeInfo.id)) { + return false; + } + if (!name.equals(nodeInfo.name)) { + return false; + } + if (!host.equals(nodeInfo.host)) { + return false; + } + if (!ip.equals(nodeInfo.ip)) { + return false; + } + if (hasThrift != nodeInfo.hasThrift) { + return false; + } + return (publishAddress != null ? publishAddress.equals(nodeInfo.publishAddress) : nodeInfo.publishAddress == null) + && (thriftAddress != null ? thriftAddress.equals(nodeInfo.thriftAddress) : nodeInfo.thriftAddress == null); + } + + @Override + public int hashCode() { + int result = id.hashCode(); + result = 31 * result + name.hashCode(); + result = 31 * result + host.hashCode(); + result = 31 * result + ip.hashCode(); + result = 31 * result + (publishAddress != null ? publishAddress.hashCode() : 0); + result = 31 * result + (thriftAddress != null ? thriftAddress.hashCode() : 0); + result = 31 * result + (hasHttp ? 1 : 0); + result = 31 * result + (hasThrift ? 1 : 0); + result = 31 * result + (isClient ? 1 : 0); + result = 31 * result + (isData ? 1 : 0); + return result; + } + + @Override + public String toString() { + return "EsNodeInfo{" + + "id='" + id + '\'' + + ", name='" + name + '\'' + + ", host='" + host + '\'' + + ", ip='" + ip + '\'' + + ", publishAddress=" + publishAddress + + ", hasHttp=" + hasHttp + + ", isClient=" + isClient + + ", isData=" + isData + + ", isIngest=" + isIngest + + ", hasThrift=" + hasThrift + + ", thriftAddress=" + thriftAddress + + '}'; + } +} diff --git a/fe/src/main/java/org/apache/doris/external/EsRestClient.java b/fe/src/main/java/org/apache/doris/external/EsRestClient.java new file mode 100644 index 00000000000000..6b033b197f7983 --- /dev/null +++ b/fe/src/main/java/org/apache/doris/external/EsRestClient.java @@ -0,0 +1,109 @@ +package org.apache.doris.external; + +import okhttp3.Credentials; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.util.Strings; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.map.DeserializationConfig; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.SerializationConfig; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class EsRestClient { + private static final Logger LOG = LogManager.getLogger(EsRestClient.class); + private ObjectMapper mapper; + + { + mapper = new ObjectMapper(); + mapper.configure(DeserializationConfig.Feature.USE_ANNOTATIONS, false); + mapper.configure(SerializationConfig.Feature.USE_ANNOTATIONS, false); + } + + private int nextClient = 0; + private OkHttpClient networkClient; + private String[] nodes; + private String currentNode; + + public EsRestClient(String[] nodes, String authUser, String authPassword) { + this.nodes = nodes; + if (!Strings.isEmpty(authUser) && !Strings.isEmpty(authPassword)) { + networkClient = new OkHttpClient.Builder().authenticator((route, response) -> { + String credential = Credentials.basic(authUser, authPassword); + return response.request().newBuilder().header("Authorization", credential).build(); + } + ).build(); + } + selectNextNode(); + } + + private boolean selectNextNode() { + if (nextClient >= nodes.length) { + return false; + } + currentNode = nodes[nextClient++]; + return true; + } + + public Map getHttpNodes() throws Exception { + Map> nodesData = get("_nodes/http", "nodes"); + if (nodesData == null) { + return Collections.emptyMap(); + } + Map nodes = new HashMap<>(); + for (Map.Entry> entry : nodesData.entrySet()) { + EsNodeInfo node = new EsNodeInfo(entry.getKey(), entry.getValue()); + if (node.hasHttp()) { + nodes.put(node.getId(), node); + } + } + return nodes; + } + + private String execute(String path) { + selectNextNode(); + boolean nextNode; + do { + Request request = new Request.Builder() + .get() + .url(currentNode + "/" + path) + .build(); + try { + Response response = networkClient.newCall(request).execute(); + if (response.isSuccessful()) { + return response.body().string(); + } + } catch (IOException e) { + LOG.warn("request node [{}] [{}] failures {}, try next nodes", currentNode, path, e); + } + nextNode = selectNextNode(); + if (!nextNode) { + LOG.error("try all nodes [{}],no other nodes left", nodes); + } + } while (nextNode); + return null; + } + + public T get(String q, String key) { + return parseContent(execute(q), key); + } + + private T parseContent(String response, String key) { + Map map = Collections.emptyMap(); + try { + JsonParser jsonParser = mapper.getJsonFactory().createJsonParser(response); + map = mapper.readValue(jsonParser, Map.class); + } catch (IOException ex) { + LOG.error("parse es response failure: [{}]", response); + } + return (T) (key != null ? map.get(key) : map); + } + +} diff --git a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java index 8c5d06932fbdfe..ba609000417ed9 100644 --- a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java +++ b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java @@ -28,12 +28,16 @@ public class EsShardRouting { private final int shardId; private final boolean isPrimary; private final TNetworkAddress address; + + private TNetworkAddress httpAddress; + private final String nodeId; - public EsShardRouting(String indexName, int shardId, boolean isPrimary, TNetworkAddress address) { + public EsShardRouting(String indexName, int shardId, boolean isPrimary, TNetworkAddress address, String nodeId) { this.indexName = indexName; this.shardId = shardId; this.isPrimary = isPrimary; this.address = address; + this.nodeId = nodeId; } public static EsShardRouting parseShardRoutingV55(String indexName, String shardKey, @@ -42,11 +46,11 @@ public static EsShardRouting parseShardRoutingV55(String indexName, String shard JSONObject nodeInfo = nodesMap.getJSONObject(nodeId); String[] transportAddr = nodeInfo.getString("transport_address").split(":"); // get thrift port from node info - String thriftPort = "8200";//nodeInfo.getJSONObject("attributes").getString("thrift_port"); + String thriftPort = nodeInfo.getJSONObject("attributes").getString("thrift_port"); TNetworkAddress addr = new TNetworkAddress(transportAddr[0], Integer.valueOf(thriftPort)); boolean isPrimary = shardInfo.getBoolean("primary"); - return new EsShardRouting(indexName, Integer.valueOf(shardKey), - isPrimary, addr); + return new EsShardRouting(indexName, Integer.valueOf(shardKey), + isPrimary, addr, nodeId); } public int getShardId() { @@ -64,4 +68,16 @@ public TNetworkAddress getAddress() { public String getIndexName() { return indexName; } + + public TNetworkAddress getHttpAddress() { + return httpAddress; + } + + public void setHttpAddress(TNetworkAddress httpAddress) { + this.httpAddress = httpAddress; + } + + public String getNodeId() { + return nodeId; + } } diff --git a/fe/src/main/java/org/apache/doris/external/EsStateStore.java b/fe/src/main/java/org/apache/doris/external/EsStateStore.java index 3a4822ce219dbf..1e7252d9300972 100644 --- a/fe/src/main/java/org/apache/doris/external/EsStateStore.java +++ b/fe/src/main/java/org/apache/doris/external/EsStateStore.java @@ -87,6 +87,12 @@ protected void runOneCycle() { for (EsTable esTable : esTables.values()) { try { EsTableState esTableState = loadEsIndexMetadataV55(esTable); + if (EsTable.TRANSPORT.equals(esTable.getTransport())) { + EsRestClient client = new EsRestClient(esTable.getSeeds(), + esTable.getUserName(), esTable.getPasswd()); + Map nodesInfo = client.getHttpNodes(); + esTableState.addHttpAddress(nodesInfo); + } if (esTableState != null) { esTable.setEsTableState(esTableState); } diff --git a/fe/src/main/java/org/apache/doris/external/EsTableState.java b/fe/src/main/java/org/apache/doris/external/EsTableState.java index a7620cfd04a9cb..59b69aa2678c2b 100644 --- a/fe/src/main/java/org/apache/doris/external/EsTableState.java +++ b/fe/src/main/java/org/apache/doris/external/EsTableState.java @@ -18,9 +18,11 @@ package org.apache.doris.external; import java.util.Map; +import java.util.Random; import org.apache.doris.catalog.PartitionInfo; import com.google.common.collect.Maps; +import org.apache.doris.thrift.TNetworkAddress; /** * save the dynamic info parsed from es cluster state such as shard routing, partition info @@ -38,6 +40,22 @@ public EsTableState() { partitionedIndexStates = Maps.newHashMap(); unPartitionedIndexStates = Maps.newHashMap(); } + + public void addHttpAddress(Map nodesInfo) { + for (EsIndexState indexState : partitionedIndexStates.values()) { + indexState.addHttpAddress(nodesInfo); + } + for (EsIndexState indexState : unPartitionedIndexStates.values()) { + indexState.addHttpAddress(nodesInfo); + } + + } + + public TNetworkAddress randomAddress(Map nodesInfo) { + int seed = new Random().nextInt() % nodesInfo.size(); + EsNodeInfo[] nodeInfos = (EsNodeInfo[]) nodesInfo.values().toArray(); + return nodeInfos[seed].getPublishAddress(); + } public PartitionInfo getPartitionInfo() { return partitionInfo; diff --git a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java index 38b0f3296135d4..ac74ae0561a5a1 100644 --- a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -128,7 +128,11 @@ public void finalize(Analyzer analyzer) throws UserException { @Override protected void toThrift(TPlanNode msg) { - msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; + if (EsTable.TRANSPORT_HTTP.equals(table.getTransport())) { + msg.node_type = TPlanNodeType.ES_SCAN_NODE; + } else { + msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; + } Map properties = Maps.newHashMap(); properties.put(EsTable.USER, table.getUserName()); properties.put(EsTable.PASSWORD, table.getPasswd()); @@ -187,8 +191,13 @@ private List getShardLocations() throws UserException { // get backends Set colocatedBes = Sets.newHashSet(); int numBe = Math.min(3, backendMap.size()); - List shardAllocations = shardRouting.stream().map(e -> e.getAddress()) - .collect(Collectors.toList()); + List shardAllocations = shardRouting.stream().map(e -> { + if (EsTable.TRANSPORT_HTTP.equals(table.getTransport())) { + return e.getHttpAddress(); + } else { + return e.getAddress(); + } + }).collect(Collectors.toList()); Collections.shuffle(shardAllocations, random); for (TNetworkAddress address : shardAllocations) { colocatedBes.addAll(backendMap.get(address.getHostname())); diff --git a/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java b/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java new file mode 100644 index 00000000000000..a8278d5c8a1d5a --- /dev/null +++ b/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java @@ -0,0 +1,4 @@ +package org.apache.doris.es; + +public class EsRestClientTest { +} From 8c2cd7d9eeec093fb73215cd670e3cd589433d63 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 4 Apr 2019 11:26:43 +0800 Subject: [PATCH 25/73] Simplify processing esquery --- be/src/util/es_query_builder.cpp | 57 ++++++++++++++++++++++---------- be/src/util/es_query_builder.h | 2 ++ 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 762f7021f1d6ee..30697269f7cd00 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -32,29 +32,24 @@ ESQueryBuilder::ESQueryBuilder(ExtFunction* es_query) { _es_query_str = first.to_string(); } +// note: call this function must invoke BooleanQueryBuilder::check_es_query to check validation rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& document) { - rapidjson::Document draft; - draft.Parse<0>(_es_query_str.c_str()); + rapidjson::Document scratch_document; + scratch_document.Parse(_es_query_str.c_str()); rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); rapidjson::Value query_key; rapidjson::Value query_value; //{ "term": { "dv": "2" } } - if (!draft.HasParseError()) { - for (rapidjson::Value::ConstMemberIterator itr = draft.MemberBegin(); itr != draft.MemberEnd(); itr++) { - // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue - query_key.CopyFrom(itr->name, allocator); - if (query_key.IsString()) { - // if we found one key, then end loop as QueryDSL only support one `query` root - query_value.CopyFrom(itr->value, allocator); - rapidjson::Value es_query(rapidjson::kObjectType); - es_query.SetObject(); - // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics - es_query.AddMember(query_key, query_value, allocator); - return es_query; - } - } - } - return nullptr; + rapidjson::Value es_query(rapidjson::kObjectType); + rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); + // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue + query_key.CopyFrom(first->name, allocator); + // if we found one key, then end loop as QueryDSL only support one `query` root + query_value.CopyFrom(first->value, allocator); + es_query.SetObject(); + // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics + es_query.AddMember(query_key, query_value, allocator); + return es_query; } rapidjson::Value WildCardQueryBuilder::to_json(rapidjson::Document& document) { rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); @@ -302,6 +297,32 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { _must_not_clauses.push_back(filter); } +static Status BooleanQueryBuilder::check_es_query(ExtFunction extFunction) { + std::string esquery_str = extFunction.values.front().to_string(); + rapidjson::Document scratch_document; + draft.Parse(esquery_str.c_str()); + rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); + rapidjson::Value query_key; + //{ "term": { "dv": "2" } } + if (!draft.HasParseError()) { + rapidjson::SizeType object_count = scratch_document.MemberCount(); + if (object_count != 1) { + return Status("esquery must only one root"); + } + // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue + rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); + query_key.CopyFrom(first->name, allocator); + if (!query_key.IsString()) { + // if we found one key, then end loop as QueryDSL only support one `query` root + return Status("esquery root key must be string"); + } + } else { + return Status("malformed esquery json"); + } + return Status::OK; +} + + rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document& root) { if (predicates.size() == 0) { MatchAllQueryBuilder match_all_query; diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index f50848d0bb3e82..59d1bca85afa19 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -19,6 +19,7 @@ #include #include "rapidjson/document.h" #include "exec/es_predicate.h" +#include "common/status.h" namespace doris { @@ -106,6 +107,7 @@ class BooleanQueryBuilder : public QueryBuilder { void must_not(QueryBuilder* filter); // class method for transfer predicate to es query value, invoker should enclose this value with `query` static rapidjson::Value to_query(const std::vector& predicates, rapidjson::Document& root); + static Status check_es_query(ExtFunction extFunction); private: std::vector _must_clauses; From 1d1ebdebeeafcfea1991f55c9b1ef71eb8578cfa Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 4 Apr 2019 12:02:37 +0800 Subject: [PATCH 26/73] Modify some errors --- be/src/util/es_query_builder.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 30697269f7cd00..d5e007378baaab 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -297,14 +297,14 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { _must_not_clauses.push_back(filter); } -static Status BooleanQueryBuilder::check_es_query(ExtFunction extFunction) { +Status BooleanQueryBuilder::check_es_query(ExtFunction extFunction) { std::string esquery_str = extFunction.values.front().to_string(); rapidjson::Document scratch_document; - draft.Parse(esquery_str.c_str()); + scratch_document.Parse(esquery_str.c_str()); rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); rapidjson::Value query_key; //{ "term": { "dv": "2" } } - if (!draft.HasParseError()) { + if (!scratch_document.HasParseError()) { rapidjson::SizeType object_count = scratch_document.MemberCount(); if (object_count != 1) { return Status("esquery must only one root"); From 19386e95369943eccf62a2c6a523cf9490ce067d Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 8 Apr 2019 11:26:21 +0800 Subject: [PATCH 27/73] Add unit tests for EsHttpScanNode and EsPredicate --- be/src/exec/es_http_scan_node.cpp | 2 +- be/src/exec/es_http_scanner.cpp | 2 +- be/src/exec/es_predicate.cpp | 5 + be/src/util/es_scroll_parser.cpp | 128 +++++++++++++++--- be/test/exec/CMakeLists.txt | 2 + be/test/exec/es_http_scan_node_test.cpp | 151 +++++++++++++++++++++ be/test/exec/es_predicate_test.cpp | 172 ++++++++++++++++++++++++ 7 files changed, 445 insertions(+), 17 deletions(-) create mode 100644 be/test/exec/es_http_scan_node_test.cpp create mode 100644 be/test/exec/es_predicate_test.cpp diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index b1df4001668c5b..a1d64da781028f 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -388,7 +388,7 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { properties, scanner_expr_ctxs, &counter)); status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx + i << "] prcess failed. status=" + LOG(WARNING) << "Scanner[" << start_idx + i << "] process failed. status=" << status.get_error_msg(); } } diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index 36eb09d57ac0fe..dbdbab1e877b2f 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -79,7 +79,7 @@ Status EsHttpScanner::open() { return Status("Es reader construct failed."); } - _es_reader->open(); + RETURN_IF_ERROR(_es_reader->open()); _rows_read_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); _read_timer = ADD_TIMER(_profile, "TotalRawReadTime(*)"); diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 8bb6465688d860..3a496d857f481f 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -252,6 +252,11 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return true; } + + if (TExprNodeType::LIKE_PRED == conjunct->node_type()) { + //TODO + return true; + } if (TExprNodeType::IN_PRED == conjunct->node_type()) { // the op code maybe FILTER_NEW_IN, it means there is function in list diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 3ab06455dcb640..30f7086907f8d7 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -25,6 +25,7 @@ #include "common/status.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" +#include "util/string_parser.hpp" namespace doris { @@ -147,58 +148,155 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } case TYPE_TINYINT: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = (int8_t)col.GetInt(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); + } + + const std::string& val = col.GetString(); + const char* data = val.c_str(); + size_t len = col.GetStringLength(); + StringParser::ParseResult result; + int8_t v = StringParser::string_to_int(data, len, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); } - *reinterpret_cast(slot) = (int8_t)col.GetInt(); + *reinterpret_cast(slot) = v; break; } case TYPE_SMALLINT: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = (int16_t)col.GetInt(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); + } + + const std::string& val = col.GetString(); + const char* data = val.c_str(); + size_t len = col.GetStringLength(); + StringParser::ParseResult result; + int16_t v = StringParser::string_to_int(data, len, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); } - *reinterpret_cast(slot) = (int16_t)col.GetInt(); + *reinterpret_cast(slot) = v; break; } case TYPE_INT: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = (int32_t)col.GetInt(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); + } + + const std::string& val = col.GetString(); + const char* data = val.c_str(); + size_t len = col.GetStringLength(); + StringParser::ParseResult result; + int32_t v = StringParser::string_to_int(data, len, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); } - *reinterpret_cast(slot) = (int32_t)col.GetInt(); + *reinterpret_cast(slot) = v; break; } case TYPE_BIGINT: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetInt64(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); + } + + const std::string& val = col.GetString(); + const char* data = val.c_str(); + size_t len = col.GetStringLength(); + StringParser::ParseResult result; + int64_t v = StringParser::string_to_int(data, len, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); } - *reinterpret_cast(slot) = col.GetInt64(); + *reinterpret_cast(slot) = v; break; } case TYPE_LARGEINT: { - if (!col.IsNumber()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetInt64(); + break; } - *reinterpret_cast(slot) = col.GetInt64(); + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); + } + + const std::string& val = col.GetString(); + const char* data = val.c_str(); + size_t len = col.GetStringLength(); + StringParser::ParseResult result; + __int128 v = StringParser::string_to_int<__int128>(data, len, &result); + if (result != StringParser::PARSE_SUCCESS) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); + } + memcpy(slot, &v, sizeof(v)); break; } case TYPE_DOUBLE: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetDouble(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); + } + + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + StringParser::ParseResult result; + double d = StringParser::string_to_float(val.c_str(), + val_size, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); } - *reinterpret_cast(slot) = col.GetDouble(); + *reinterpret_cast(slot) = d; break; } case TYPE_FLOAT: { - if (!col.IsNumber()) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetFloat(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); + } + + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + StringParser::ParseResult result; + float f = StringParser::string_to_float(val.c_str(), val_size, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); } - *reinterpret_cast(slot) = col.GetDouble(); + *reinterpret_cast(slot) = f; break; } diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index 7b683602ce0b1c..defb493151e2b9 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -44,6 +44,8 @@ ADD_BE_TEST(broker_reader_test) ADD_BE_TEST(broker_scanner_test) ADD_BE_TEST(broker_scan_node_test) ADD_BE_TEST(es_scan_node_test) +ADD_BE_TEST(es_http_scan_node_test) +ADD_BE_TEST(es_predicate_test) ADD_BE_TEST(olap_table_info_test) ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scan_node_test) diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp new file mode 100644 index 00000000000000..754cd5f2ec7c0f --- /dev/null +++ b/be/test/exec/es_http_scan_node_test.cpp @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_http_scan_node.h" + +#include +#include + +#include "common/object_pool.h" +#include "gen_cpp/PlanNodes_types.h" +#include "runtime/mem_pool.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" + +using std::vector; + +namespace doris { + +// mock +class EsHttpScanNodeTest : public testing::Test { +public: + EsHttpScanNodeTest() : _runtime_state("EsHttpScanNodeTest") { + _runtime_state._instance_mem_tracker.reset(new MemTracker()); + TDescriptorTable t_desc_table; + + // table descriptors + TTableDescriptor t_table_desc; + t_table_desc.id = 0; + t_table_desc.tableType = TTableType::ES_TABLE; + t_table_desc.numCols = 1; + t_table_desc.numClusteringCols = 0; + t_table_desc.__isset.esTable = true; + t_desc_table.tableDescriptors.push_back(t_table_desc); + t_desc_table.__isset.tableDescriptors = true; + + // TSlotDescriptor + int offset = 1; + int i = 0; + // id + { + TSlotDescriptor t_slot_desc; + t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); + t_slot_desc.__set_columnPos(i); + t_slot_desc.__set_byteOffset(offset); + t_slot_desc.__set_nullIndicatorByte(0); + t_slot_desc.__set_nullIndicatorBit(-1); + t_slot_desc.__set_slotIdx(i); + t_slot_desc.__set_isMaterialized(true); + t_desc_table.slotDescriptors.push_back(t_slot_desc); + offset += sizeof(int); + } + + TTupleDescriptor t_tuple_desc; + t_tuple_desc.id = 0; + t_tuple_desc.byteSize = offset; + t_tuple_desc.numNullBytes = 1; + t_tuple_desc.tableId = 0; + t_tuple_desc.__isset.tableId = true; + t_desc_table.__isset.slotDescriptors = true; + t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + + DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + _runtime_state.set_desc_tbl(_desc_tbl); + + // Node Id + _tnode.node_id = 0; + _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; + _tnode.num_children = 0; + _tnode.limit = -1; + _tnode.row_tuples.push_back(0); + _tnode.nullable_tuples.push_back(false); + _tnode.es_scan_node.tuple_id = 0; + std::map properties; + _tnode.es_scan_node.__set_properties(properties); + _tnode.__isset.es_scan_node = true; + } + +protected: + virtual void SetUp() { + } + virtual void TearDown() { + } + TPlanNode _tnode; + ObjectPool _obj_pool; + DescriptorTbl* _desc_tbl; + RuntimeState _runtime_state; +}; + +TEST_F(EsHttpScanNodeTest, normal_use) { + + EsHttpScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); + Status status = scan_node.init(_tnode, &_runtime_state); + ASSERT_TRUE(status.ok()); + + status = scan_node.prepare(&_runtime_state); + ASSERT_TRUE(status.ok()); + + // scan range + TEsScanRange es_scan_range; + es_scan_range.__set_index("index1"); + es_scan_range.__set_type("docs"); + es_scan_range.__set_shard_id(0); + TNetworkAddress es_host; + es_host.__set_hostname("unknown"); + es_host.__set_port(8200); + std::vector es_hosts; + es_hosts.push_back(es_host); + es_scan_range.__set_es_hosts(es_hosts); + TScanRange scan_range; + scan_range.__set_es_scan_range(es_scan_range); + TScanRangeParams scan_range_params; + scan_range_params.__set_scan_range(scan_range); + std::vector scan_ranges; + scan_ranges.push_back(scan_range_params); + + status = scan_node.set_scan_ranges(scan_ranges); + ASSERT_TRUE(status.ok()); + + status = scan_node.open(&_runtime_state); + ASSERT_TRUE(status.ok()); + + status = scan_node.close(&_runtime_state); + ASSERT_TRUE(status.ok()); +} + +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp new file mode 100644 index 00000000000000..d448114336d40f --- /dev/null +++ b/be/test/exec/es_predicate_test.cpp @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/es_predicate.h" + +#include +#include +#include +#include "common/logging.h" +#include "common/status.h" +#include "exprs/binary_predicate.h" +#include "gen_cpp/Exprs_types.h" +#include "util/es_query_builder.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "runtime/mem_tracker.h" +#include "runtime/primitive_type.h" +#include "runtime/runtime_state.h" +#include "runtime/string_value.h" + +namespace doris { + +class RuntimeState; + +class EsPredicateTest : public testing::Test { +public: + EsPredicateTest() : _runtime_state("EsPredicateTest") { + _runtime_state._instance_mem_tracker.reset(new MemTracker()); + TDescriptorTable t_desc_table; + + // table descriptors + TTableDescriptor t_table_desc; + t_table_desc.id = 0; + t_table_desc.tableType = TTableType::ES_TABLE; + t_table_desc.numCols = 1; + t_table_desc.numClusteringCols = 0; + t_table_desc.__isset.esTable = true; + t_desc_table.tableDescriptors.push_back(t_table_desc); + t_desc_table.__isset.tableDescriptors = true; + + // TSlotDescriptor + int offset = 1; + int i = 0; + // id + { + TSlotDescriptor t_slot_desc; + t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); + t_slot_desc.__set_columnPos(i); + t_slot_desc.__set_byteOffset(offset); + t_slot_desc.__set_nullIndicatorByte(0); + t_slot_desc.__set_nullIndicatorBit(-1); + t_slot_desc.__set_slotIdx(i); + t_slot_desc.__set_isMaterialized(true); + t_slot_desc.colName = "id"; + t_desc_table.slotDescriptors.push_back(t_slot_desc); + offset += sizeof(int); + } + + TTupleDescriptor t_tuple_desc; + t_tuple_desc.id = 0; + t_tuple_desc.byteSize = offset; + t_tuple_desc.numNullBytes = 1; + t_tuple_desc.tableId = 0; + t_tuple_desc.__isset.tableId = true; + t_desc_table.__isset.slotDescriptors = true; + t_desc_table.tupleDescriptors.push_back(t_tuple_desc); + + DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); + _runtime_state.set_desc_tbl(_desc_tbl); + } + + Status build_expr_context_list(std::vector& conjunct_ctxs); + void init(); + void SetUp() override {} + void TearDown() override {} + +private: + + ObjectPool _obj_pool; + DescriptorTbl* _desc_tbl; + RuntimeState _runtime_state; +}; + +Status EsPredicateTest::build_expr_context_list(std::vector& conjunct_ctxs) { + + TExpr texpr; + { + TExprNode node0; + node0.opcode = TExprOpcode::GT; + node0.child_type = TPrimitiveType::BIGINT; + node0.node_type = TExprNodeType::BINARY_PRED; + node0.num_children = 2; + node0.__isset.opcode = true; + node0.__isset.child_type = true; + node0.type = gen_type_desc(TPrimitiveType::BOOLEAN); + texpr.nodes.emplace_back(node0); + + TExprNode node1; + node1.node_type = TExprNodeType::SLOT_REF; + node1.type = gen_type_desc(TPrimitiveType::INT); + node1.__isset.slot_ref = true; + node1.num_children = 0; + node1.slot_ref.slot_id = 0; + node1.slot_ref.tuple_id = 0; + node1.output_column = true; + node1.__isset.output_column = true; + texpr.nodes.emplace_back(node1); + + TExprNode node2; + TIntLiteral intLiteral; + intLiteral.value = 10; + node2.node_type = TExprNodeType::INT_LITERAL; + node2.type = gen_type_desc(TPrimitiveType::BIGINT); + node2.__isset.int_literal = true; + node2.int_literal = intLiteral; + texpr.nodes.emplace_back(node2); + } + + std::vector conjuncts; + conjuncts.emplace_back(texpr); + Status status = Expr::create_expr_trees(&_obj_pool, conjuncts, &conjunct_ctxs); + + return status; +} + +TEST_F(EsPredicateTest, normal) { + std::vector conjunct_ctxs; + Status status = build_expr_context_list(conjunct_ctxs); + + TupleDescriptor *tuple_desc = _desc_tbl->get_tuple_descriptor(0); + std::vector predicates; + for (int i = 0; i < conjunct_ctxs.size(); ++i) { + EsPredicate* predicate = new EsPredicate(conjunct_ctxs[i], tuple_desc); + if (predicate->build_disjuncts_list()) { + predicates.push_back(predicate); + } + } + + rapidjson::Document document; + rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(predicates, document); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + compound_bool_value.Accept(writer); + std::string actual_bool_json = buffer.GetString(); + std::string expected_json = "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"range\":{\"id\":{\"gt\":\"10\"}}}]}}]}}"; + LOG(INFO) << "compound bool query" << actual_bool_json; + ASSERT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); +} + + +} // end namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 2ca72e9377825eab2da73b94c75792345253f1f1 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 8 Apr 2019 14:42:14 +0800 Subject: [PATCH 28/73] Modify some FE error and Add predicate validator for BE --- be/src/util/es_query_builder.cpp | 56 +++++++- be/src/util/es_query_builder.h | 1 + be/test/util/es_query_builder_test.cpp | 135 ++++++++++++++++++ .../org/apache/doris/catalog/Catalog.java | 1 + .../org/apache/doris/catalog/EsTable.java | 4 +- .../apache/doris/external/EsShardRouting.java | 12 ++ .../apache/doris/external/EsStateStore.java | 2 +- .../org/apache/doris/planner/EsScanNode.java | 17 ++- 8 files changed, 213 insertions(+), 15 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index d5e007378baaab..18187db56ceab7 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -305,23 +305,73 @@ Status BooleanQueryBuilder::check_es_query(ExtFunction extFunction) { rapidjson::Value query_key; //{ "term": { "dv": "2" } } if (!scratch_document.HasParseError()) { + if (!scratch_document.IsObject()) { + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery must be a object"); + } rapidjson::SizeType object_count = scratch_document.MemberCount(); if (object_count != 1) { - return Status("esquery must only one root"); + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery must only one root"); } // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); query_key.CopyFrom(first->name, allocator); if (!query_key.IsString()) { // if we found one key, then end loop as QueryDSL only support one `query` root - return Status("esquery root key must be string"); + return Status(TStatusCode::ES_REQUEST_ERROR, "esquery root key must be string"); } } else { - return Status("malformed esquery json"); + return Status(TStatusCode::ES_REQUEST_ERROR, "malformed esquery json"); } return Status::OK; } +std::vector BooleanQueryBuilder::validate(const std::vector& espredicates) { + int conjunct_size = espredicates.size(); + std::vector result; + result.reserve(conjunct_size); + for (auto espredicate : espredicates) { + bool flag = true; + for (auto predicate : espredicate->get_predicate_list()) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + TExprOpcode::type op = binary_predicate->op; + if (op != TExprOpcode::EQ && op != TExprOpcode::NE + && op != TExprOpcode::LT && op != TExprOpcode::LE + && op != TExprOpcode::GT && op != TExprOpcode::GE) { + flag = false; + } + break; + } + case TExprNodeType::LIKE_PRED: + case TExprNodeType::IN_PRED: { + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction *)predicate; + if ("esquery" == function_predicate->func_name ) { + Status st = check_es_query(*function_predicate); + if (!st.ok()) { + flag = false; + } + } else { + flag = false; + } + break; + } + default: { + flag = false; + break; + } + } + if (!flag) { + break; + } + } + result.push_back(flag); + } + return result; +} rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document& root) { if (predicates.size() == 0) { diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index 59d1bca85afa19..920002f862a34a 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -108,6 +108,7 @@ class BooleanQueryBuilder : public QueryBuilder { // class method for transfer predicate to es query value, invoker should enclose this value with `query` static rapidjson::Value to_query(const std::vector& predicates, rapidjson::Document& root); static Status check_es_query(ExtFunction extFunction); + static std::vector validate(const std::vector& espredicates); private: std::vector _must_clauses; diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index b53ec1789d484b..091f286cbfc052 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -299,6 +299,141 @@ TEST_F(BooleanQueryBuilderTest, compound_bool_query) { //LOG(INFO) << "compound bool query" << actual_bool_json; ASSERT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); } +TEST_F(BooleanQueryBuilderTest, validate_esquery) { + std::string function_name = "esquery"; + char field[] = "random"; + int field_length = (int)strlen(field); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(field_length); + ExtColumnDesc es_query_col_des(field, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + ExtFunction legal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + auto st = BooleanQueryBuilder::check_es_query(legal_es_query); + ASSERT_TRUE(st.ok()); + char empty_query[] = "{}"; + int empty_query_length = (int)strlen(empty_query); + StringValue empty_query_value(empty_query, empty_query_length); + ExtLiteral empty_query_term_literal(TYPE_VARCHAR, &empty_query_value); + std::vector empty_query_values = {empty_query_term_literal}; + ExtFunction empty_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, empty_query_values); + st = BooleanQueryBuilder::check_es_query(empty_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must only one root"); + //LOG(INFO) <<"error msg:" << st1.get_error_msg(); + char malformed_query[] = "{\"bool\": {\"must_not\": {\"exists\": {"; + int malformed_query_length = (int)strlen(malformed_query); + StringValue malformed_query_value(malformed_query, malformed_query_length); + ExtLiteral malformed_query_term_literal(TYPE_VARCHAR, &malformed_query_value); + std::vector malformed_query_values = {malformed_query_term_literal}; + ExtFunction malformed_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, malformed_query_values); + st = BooleanQueryBuilder::check_es_query(malformed_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "malformed esquery json"); + char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; + int illegal_query_length = (int)strlen(illegal_query); + StringValue illegal_query_value(illegal_query, illegal_query_length); + ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); + std::vector illegal_query_values = {illegal_query_term_literal}; + ExtFunction illegal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_query_values); + st = BooleanQueryBuilder::check_es_query(illegal_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must only one root"); + char illegal_key_query[] = "[\"22\"]"; + int illegal_key_query_length = (int)strlen(illegal_key_query); + StringValue illegal_key_query_value(illegal_key_query, illegal_key_query_length); + ExtLiteral illegal_key_query_term_literal(TYPE_VARCHAR, &illegal_key_query_value); + std::vector illegal_key_query_values = {illegal_key_query_term_literal}; + ExtFunction illegal_key_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_key_query_values); + st = BooleanQueryBuilder::check_es_query(illegal_key_es_query); + ASSERT_STREQ(st.get_error_msg().c_str(), "esquery must be a object"); +} + +TEST_F(BooleanQueryBuilderTest, validate_partial) { + char like_value[] = "a%e%g_"; + int like_value_length = (int)strlen(like_value); + TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); + StringValue like_term_value(like_value, like_value_length); + ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); + std::string like_field_name = "content"; + ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); + + // k >= "a" + char range_value_str[] = "a"; + int range_value_length = (int)strlen(range_value_str); + StringValue range_value(range_value_str, range_value_length); + ExtLiteral range_literal(TYPE_VARCHAR, &range_value); + TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); + std::string range_field_name = "k"; + ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, TExprOpcode::GE, range_literal); + + std::vector bool_predicates_1 = {like_predicate, range_predicate}; + EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); + + // fv not in [8.0, 16.0] + std::string terms_in_field = "fv"; + int terms_in_field_length = terms_in_field.length(); + TypeDescriptor terms_in_col_type_desc = TypeDescriptor::create_varchar_type(terms_in_field_length); + + char value_1[] = "8.0"; + int value_1_length = (int)strlen(value_1); + StringValue string_value_1(value_1, value_1_length); + ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); + + char value_2[] = "16.0"; + int value_2_length = (int)strlen(value_2); + StringValue string_value_2(value_2, value_2_length); + ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); + + std::vector terms_values = {term_literal_1, term_literal_2}; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); + in_predicate->is_not_in = true; + std::vector bool_predicates_2 = {in_predicate}; + EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); + + // content != "wyf" + char term_str[] = "wyf"; + int term_value_length = (int)strlen(term_str); + StringValue term_value(term_str, term_value_length); + ExtLiteral term_literal(TYPE_VARCHAR, &term_value); + TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); + std::string term_field_name = "content"; + ExtBinaryPredicate* term_ne_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, TExprOpcode::NE, term_literal); + + char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; + int es_query_length = (int)strlen(es_query_str); + StringValue value(es_query_str, es_query_length); + TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); + std::string es_query_field_name = "random"; + ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); + std::vector es_query_cols = {es_query_col_des}; + StringValue es_query_value(es_query_str, es_query_length); + ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); + std::vector es_query_values = {es_query_term_literal}; + std::string function_name = "esquery"; + ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, es_query_values); + std::vector bool_predicates_3 = {term_ne_predicate, function_predicate}; + EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); + + std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3}; + std::vector result = BooleanQueryBuilder::validate(and_bool_predicates); + std::vector expected = {true, true, true}; + ASSERT_TRUE(result == expected); + char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; + int illegal_query_length = (int)strlen(illegal_query); + StringValue illegal_query_value(illegal_query, illegal_query_length); + ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); + std::vector illegal_query_values = {illegal_query_term_literal}; + ExtFunction* illegal_function_preficate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_query_values); + std::vector illegal_bool_predicates_3 = {term_ne_predicate, illegal_function_preficate}; + EsPredicate* illegal_bool_predicate_3 = new EsPredicate(illegal_bool_predicates_3); + std::vector and_bool_predicates_1 = {bool_predicate_1, bool_predicate_2, illegal_bool_predicate_3}; + result = BooleanQueryBuilder::validate(and_bool_predicates_1); + std::vector expected1 = {true, true, false}; + ASSERT_TRUE(result == expected1); +} + + } int main(int argc, char* argv[]) { diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index a4b880033d1bc2..6d571ee58a667c 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -3947,6 +3947,7 @@ public static void getDdlStmt(Table table, List createTableStmt, List properties) throws DdlException { mappingType = properties.get(TYPE).trim(); } if (!Strings.isNullOrEmpty(properties.get(TRANSPORT)) - && Strings.isNullOrEmpty(properties.get(TRANSPORT).trim())) { - transport = properties.get(TRANSPORT); + && !Strings.isNullOrEmpty(properties.get(TRANSPORT).trim())) { + transport = properties.get(TRANSPORT).trim(); if (!(TRANSPORT_HTTP.equals(transport) || TRANSPORT_THRIFT.equals(transport))) { throw new DdlException("transport of ES table must be http(recommend) or thrift(reserved inner usage)," + " but value is " + transport); diff --git a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java index ba609000417ed9..5f9e090a607583 100644 --- a/fe/src/main/java/org/apache/doris/external/EsShardRouting.java +++ b/fe/src/main/java/org/apache/doris/external/EsShardRouting.java @@ -80,4 +80,16 @@ public void setHttpAddress(TNetworkAddress httpAddress) { public String getNodeId() { return nodeId; } + + @Override + public String toString() { + return "EsShardRouting{" + + "indexName='" + indexName + '\'' + + ", shardId=" + shardId + + ", isPrimary=" + isPrimary + + ", address=" + address + + ", httpAddress=" + httpAddress + + ", nodeId='" + nodeId + '\'' + + '}'; + } } diff --git a/fe/src/main/java/org/apache/doris/external/EsStateStore.java b/fe/src/main/java/org/apache/doris/external/EsStateStore.java index 1e7252d9300972..8cdbfd6eedcf0f 100644 --- a/fe/src/main/java/org/apache/doris/external/EsStateStore.java +++ b/fe/src/main/java/org/apache/doris/external/EsStateStore.java @@ -87,7 +87,7 @@ protected void runOneCycle() { for (EsTable esTable : esTables.values()) { try { EsTableState esTableState = loadEsIndexMetadataV55(esTable); - if (EsTable.TRANSPORT.equals(esTable.getTransport())) { + if (EsTable.TRANSPORT_HTTP.equals(esTable.getTransport())) { EsRestClient client = new EsRestClient(esTable.getSeeds(), esTable.getUserName(), esTable.getPasswd()); Map nodesInfo = client.getHttpNodes(); diff --git a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java index ac74ae0561a5a1..d599369992de3e 100644 --- a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -129,9 +129,9 @@ public void finalize(Analyzer analyzer) throws UserException { @Override protected void toThrift(TPlanNode msg) { if (EsTable.TRANSPORT_HTTP.equals(table.getTransport())) { - msg.node_type = TPlanNodeType.ES_SCAN_NODE; - } else { msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; + } else { + msg.node_type = TPlanNodeType.ES_SCAN_NODE; } Map properties = Maps.newHashMap(); properties.put(EsTable.USER, table.getUserName()); @@ -191,13 +191,12 @@ private List getShardLocations() throws UserException { // get backends Set colocatedBes = Sets.newHashSet(); int numBe = Math.min(3, backendMap.size()); - List shardAllocations = shardRouting.stream().map(e -> { - if (EsTable.TRANSPORT_HTTP.equals(table.getTransport())) { - return e.getHttpAddress(); - } else { - return e.getAddress(); - } - }).collect(Collectors.toList()); + List shardAllocations = new ArrayList<>(); + for (EsShardRouting item : shardRouting) { + LOG.info("shardRouting [{}]", shardRouting); + shardAllocations.add(EsTable.TRANSPORT_HTTP.equals(table.getTransport()) ? item.getHttpAddress() : item.getAddress()); + } + Collections.shuffle(shardAllocations, random); for (TNetworkAddress address : shardAllocations) { colocatedBes.addAll(backendMap.get(address.getHostname())); From 78d3a357964cbe8d17f9a0acb2a7238387ca2ed9 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 8 Apr 2019 14:44:05 +0800 Subject: [PATCH 29/73] Delete unuseful debug log --- fe/src/main/java/org/apache/doris/planner/EsScanNode.java | 1 - 1 file changed, 1 deletion(-) diff --git a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java index d599369992de3e..f54c9e0846fd30 100644 --- a/fe/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -193,7 +193,6 @@ private List getShardLocations() throws UserException { int numBe = Math.min(3, backendMap.size()); List shardAllocations = new ArrayList<>(); for (EsShardRouting item : shardRouting) { - LOG.info("shardRouting [{}]", shardRouting); shardAllocations.add(EsTable.TRANSPORT_HTTP.equals(table.getTransport()) ? item.getHttpAddress() : item.getAddress()); } From a225df2ffe1794e3f1ee81b201a002227c92298e Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Mon, 8 Apr 2019 14:49:38 +0800 Subject: [PATCH 30/73] Change reference for check_es_query --- be/src/util/es_query_builder.cpp | 2 +- be/src/util/es_query_builder.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 18187db56ceab7..6b36658cb6edc2 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -297,7 +297,7 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { _must_not_clauses.push_back(filter); } -Status BooleanQueryBuilder::check_es_query(ExtFunction extFunction) { +Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { std::string esquery_str = extFunction.values.front().to_string(); rapidjson::Document scratch_document; scratch_document.Parse(esquery_str.c_str()); diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index 920002f862a34a..f5643db0040041 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -107,7 +107,7 @@ class BooleanQueryBuilder : public QueryBuilder { void must_not(QueryBuilder* filter); // class method for transfer predicate to es query value, invoker should enclose this value with `query` static rapidjson::Value to_query(const std::vector& predicates, rapidjson::Document& root); - static Status check_es_query(ExtFunction extFunction); + static Status check_es_query(const ExtFunction& extFunction); static std::vector validate(const std::vector& espredicates); private: From 784e55af169238b370349410e81d8baa47552734 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 8 Apr 2019 19:04:17 +0800 Subject: [PATCH 31/73] Process conjuncts to ES and doris respectively --- be/src/exec/es_http_scan_node.cpp | 35 ++++++++++++++++++++++++------- be/src/exec/es_http_scan_node.h | 2 +- be/src/exec/es_http_scanner.cpp | 2 ++ be/src/exec/es_predicate.cpp | 16 +++++++++++--- be/src/exec/es_predicate.h | 9 ++++++-- be/src/util/es_query_builder.cpp | 2 +- be/src/util/es_scroll_parser.cpp | 22 +++++++++++++++++-- run-ut.sh | 2 ++ 8 files changed, 73 insertions(+), 17 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index a1d64da781028f..2fe802715c9b35 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -29,6 +29,7 @@ #include "util/runtime_profile.h" #include "util/es_scan_reader.h" #include "util/es_scroll_query.h" +#include "util/es_query_builder.h" #include "exec/es_predicate.h" namespace doris { @@ -83,14 +84,20 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { } // build predicate -void EsHttpScanNode::build_conjuncts_list() { +Status EsHttpScanNode::build_conjuncts_list() { + Status status = Status::OK; for (int i = 0; i < _conjunct_ctxs.size(); ++i) { - EsPredicate* predicate = new EsPredicate(_conjunct_ctxs[i], _tuple_desc); + EsPredicate* predicate = _pool->add( + new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); if (predicate->build_disjuncts_list()) { _predicates.push_back(predicate); _predicate_to_conjunct.push_back(i); + } else if (!predicate->get_es_query_status().ok()) { + return predicate->get_es_query_status(); } } + + return status; } Status EsHttpScanNode::open(RuntimeState* state) { @@ -109,7 +116,24 @@ Status EsHttpScanNode::open(RuntimeState* state) { } } - build_conjuncts_list(); + RETURN_IF_ERROR(build_conjuncts_list()); + + // remove those predicates which ES cannot support + std::vector list = BooleanQueryBuilder::validate(_predicates); + DCHECK(list.size() == _predicate_to_conjunct.size()); + for(int i = list.size() - 1; i >= 0; i--) { + if(!list[i]) { + _predicate_to_conjunct.erase(_predicate_to_conjunct.begin() + i); + _predicates.erase(_predicates.begin() + i); + } + } + + // filter the conjuncts and ES will process them later + for (int i = _predicate_to_conjunct.size() - 1; i >= 0; i--) { + int conjunct_index = _predicate_to_conjunct[i]; + _conjunct_ctxs[conjunct_index]->close(_runtime_state); + _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); + } RETURN_IF_ERROR(start_scanners()); @@ -228,11 +252,6 @@ Status EsHttpScanNode::close(RuntimeState* state) { _batch_queue.clear(); - for(int i=0; i < _predicates.size(); i++) { - delete _predicates[i]; - } - _predicates.clear(); - return ExecNode::close(state); } diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 1779550faa62d8..31d3d2e3b8db6d 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -84,7 +84,7 @@ class EsHttpScanNode : public ScanNode { private: - void build_conjuncts_list(); + Status build_conjuncts_list(); TupleId _tuple_id; RuntimeState* _runtime_state; diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index dbdbab1e877b2f..ca6f7309f63a84 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -124,6 +124,8 @@ void EsHttpScanner::close() { if (_es_reader != nullptr) { _es_reader->close(); } + + Expr::close(_conjunct_ctxs, _state); } } diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 3a496d857f481f..5bad6ee969e2dd 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -41,8 +41,9 @@ #include "runtime/tuple_row.h" #include "service/backend_options.h" -#include "util/runtime_profile.h" #include "util/debug_util.h" +#include "util/es_query_builder.h" +#include "util/runtime_profile.h" namespace doris { @@ -166,7 +167,8 @@ EsPredicate::EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc) : _context(context), _disjuncts_num(0), - _tuple_desc(tuple_desc) { + _tuple_desc(tuple_desc), + _es_query_status(Status::OK) { } EsPredicate::~EsPredicate() { @@ -180,6 +182,7 @@ bool EsPredicate::build_disjuncts_list() { return build_disjuncts_list(_context->root(), _disjuncts); } +// make sure to build by build_disjuncts_list const vector& EsPredicate::get_predicate_list(){ return _disjuncts; } @@ -248,6 +251,13 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di conjunct->fn().name.function_name, cols, query_conditions); + if (_es_query_status.ok()) { + _es_query_status + = BooleanQueryBuilder::check_es_query(*(ExtFunction *)predicate); + if (!_es_query_status.ok()) { + return false; + } + } disjuncts.push_back(predicate); return true; @@ -255,7 +265,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di if (TExprNodeType::LIKE_PRED == conjunct->node_type()) { //TODO - return true; + return false; } if (TExprNodeType::IN_PRED == conjunct->node_type()) { diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 901413d4b4a23e..a789e58fc936d1 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -43,7 +43,7 @@ class ExtLiteral { _str = value_to_string(); } ~ExtLiteral(); - const std::string& to_string() { + const std::string& to_string() const { return _str; } @@ -163,7 +163,7 @@ struct ExtFunction : public ExtPredicate { const std::string& func_name; std::vector cols; - std::vector values; + const std::vector values; }; class EsPredicate { @@ -178,6 +178,10 @@ class EsPredicate { _disjuncts = all_predicates; }; + Status get_es_query_status() { + return _es_query_status; + } + private: @@ -190,6 +194,7 @@ class EsPredicate { int _disjuncts_num; const TupleDescriptor* _tuple_desc; std::vector _disjuncts; + Status _es_query_status; }; } diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 6b36658cb6edc2..f17994a16af2fc 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -298,7 +298,7 @@ void BooleanQueryBuilder::must_not(QueryBuilder* filter) { } Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { - std::string esquery_str = extFunction.values.front().to_string(); + const std::string& esquery_str = extFunction.values.front().to_string(); rapidjson::Document scratch_document; scratch_document.Parse(esquery_str.c_str()); rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 30f7086907f8d7..1c5a85ca6e681a 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -301,10 +301,28 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } case TYPE_BOOLEAN: { - if (!col.IsBool()) { + if (col.IsBool()) { + *reinterpret_cast(slot) = col.GetBool(); + break; + } + + if (col.IsNumber()) { + *reinterpret_cast(slot) = col.GetInt(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); + } + + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + StringParser::ParseResult result; + bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); + if (result != StringParser::PARSE_SUCCESS) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); } - *reinterpret_cast(slot) = col.GetBool(); + *reinterpret_cast(slot) = b; break; } diff --git a/run-ut.sh b/run-ut.sh index 2963013b595029..0caec5bf5d30b6 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -163,6 +163,8 @@ ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test +${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test +${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test From d35f437c5890911184a95d408156fdaee85a833b Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 8 Apr 2019 20:10:04 +0800 Subject: [PATCH 32/73] Pushdown Like, Date and DateTime predicate to ES --- be/src/exec/es_http_scan_node.cpp | 1 - be/src/exec/es_predicate.cpp | 43 ++++++++++++++++++++++++-- be/src/util/es_scroll_parser.cpp | 50 +++++++++++++++++++++++++++---- 3 files changed, 84 insertions(+), 10 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 2fe802715c9b35..5834fbd76fadd6 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -430,6 +430,5 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { if (!status.ok()) { _queue_writer_cond.notify_all(); } - Expr::close(scanner_expr_ctxs, _runtime_state); } } diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 5bad6ee969e2dd..efe2842a310f36 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -263,9 +263,46 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return true; } - if (TExprNodeType::LIKE_PRED == conjunct->node_type()) { - //TODO - return false; + if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()) { + std::string fname = conjunct->fn().name.function_name; + if (fname != "like") { + return false; + } + + SlotRef* slotRef = nullptr; + Expr* expr = nullptr; + if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { + expr = conjunct->get_child(1); + slotRef = (SlotRef*)(conjunct->get_child(0)); + } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { + expr = conjunct->get_child(0); + slotRef = (SlotRef*)(conjunct->get_child(1)); + } else { + VLOG(1) << "get disjuncts fail: no SLOT_REF child"; + return false; + } + + const SlotDescriptor* slot_desc = get_slot_desc(slotRef); + if (slot_desc == nullptr) { + VLOG(1) << "get disjuncts fail: slot_desc is null"; + return false; + } + + PrimitiveType type = expr->type().type; + if (type != TYPE_VARCHAR && type != TYPE_CHAR) { + VLOG(1) << "get disjuncts fail: like value is not a string"; + return false; + } + + ExtLiteral literal(type, _context->get_value(expr, NULL)); + ExtPredicate* predicate = new ExtLikePredicate( + TExprNodeType::LIKE_PRED, + slot_desc->col_name(), + slot_desc->type(), + literal); + + disjuncts.push_back(predicate); + return true; } if (TExprNodeType::IN_PRED == conjunct->node_type()) { diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 1c5a85ca6e681a..4818072efd5799 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -327,20 +327,58 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } case TYPE_DATE: { - if (!col.IsNumber() || - !reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + if (col.IsNumber()) { + if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + } + reinterpret_cast(slot)->cast_to_date(); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + } + + DateTimeValue* ts_slot = reinterpret_cast(slot); + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + if (!ts_slot->from_date_str(val.c_str(), val_size)) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); } - reinterpret_cast(slot)->cast_to_date(); + + if (ts_slot->year() < 1900) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + } + + ts_slot->cast_to_date(); break; } case TYPE_DATETIME: { - if (!col.IsNumber() || - !reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + if (col.IsNumber()) { + if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + } + reinterpret_cast(slot)->set_type(TIME_DATETIME); + break; + } + + if (!col.IsString()) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + } + + DateTimeValue* ts_slot = reinterpret_cast(slot); + const std::string& val = col.GetString(); + size_t val_size = col.GetStringLength(); + if (!ts_slot->from_date_str(val.c_str(), val_size)) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); } - reinterpret_cast(slot)->set_type(TIME_DATETIME); + + if (ts_slot->year() < 1900) { + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + } + + ts_slot->to_datetime(); break; } From 18b50a50f0e37767c4b93261d788a3c8d31736ed Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 9 Apr 2019 17:09:27 +0800 Subject: [PATCH 33/73] Fix shards to _shards in ESScanReader --- be/src/exec/es_http_scan_node.h | 2 -- be/src/util/es_scan_reader.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 31d3d2e3b8db6d..c65995ac523f8c 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -82,8 +82,6 @@ class EsHttpScanNode : public ScanNode { const std::vector& conjunct_ctxs, EsScanCounter* counter); -private: - Status build_conjuncts_list(); TupleId _tuple_id; diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index b4d0236dc5a585..e49cd92586b8e5 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -26,7 +26,7 @@ namespace doris { const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields"; const std::string REQUEST_SCROLL_PATH = "_scroll"; -const std::string REQUEST_PREFERENCE_PREFIX = "&preference=shards:"; +const std::string REQUEST_PREFERENCE_PREFIX = "&preference=_shards:"; const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; const std::string REQUEST_SEPARATOR = "/"; const std::string REQUEST_SCROLL_TIME = "5m"; From 59d1ac2cbd42896cc438bccacc67279288ac3bf8 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 9 Apr 2019 19:42:15 +0800 Subject: [PATCH 34/73] Fix multithreads for EsHttpScanNode --- be/src/exec/es_http_scan_node.cpp | 58 ++++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 5834fbd76fadd6..95e294d37b2c9a 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -143,10 +143,13 @@ Status EsHttpScanNode::open(RuntimeState* state) { Status EsHttpScanNode::start_scanners() { { std::unique_lock l(_batch_queue_lock); - _num_running_scanners = 1; + _num_running_scanners = _scan_ranges.size(); + } + + for (int i = 0; i < _scan_ranges.size(); i++) { + _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, + _scan_ranges.size()); } - _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, 0, - _scan_ranges.size()); return Status::OK; } @@ -378,6 +381,7 @@ static std::string get_host_port(const std::vector& es_hosts) { void EsHttpScanNode::scanner_worker(int start_idx, int length) { // Clone expr context std::vector scanner_expr_ctxs; + DCHECK(start_idx < length); auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs); if (!status.ok()) { @@ -385,31 +389,29 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { } EsScanCounter counter; - for (int i = 0; i < length && status.ok(); ++i) { - const TEsScanRange& es_scan_range = - _scan_ranges[start_idx + i].scan_range.es_scan_range; - - // Collect the informations from scan range to perperties - std::map properties(_properties); - properties[ESScanReader::KEY_INDEX] = es_scan_range.index; - if (es_scan_range.__isset.type) { - properties[ESScanReader::KEY_TYPE] = es_scan_range.type; - } - properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); - properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); - properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); - properties[ESScanReader::KEY_QUERY] - = ESScrollQueryBuilder::build(properties, _column_names, _predicates); - - // start scanner to scan - std::unique_ptr scanner(new EsHttpScanner( - _runtime_state, runtime_profile(), _tuple_id, - properties, scanner_expr_ctxs, &counter)); - status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); - if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx + i << "] process failed. status=" - << status.get_error_msg(); - } + const TEsScanRange& es_scan_range = + _scan_ranges[start_idx].scan_range.es_scan_range; + + // Collect the informations from scan range to perperties + std::map properties(_properties); + properties[ESScanReader::KEY_INDEX] = es_scan_range.index; + if (es_scan_range.__isset.type) { + properties[ESScanReader::KEY_TYPE] = es_scan_range.type; + } + properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); + properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); + properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); + properties[ESScanReader::KEY_QUERY] + = ESScrollQueryBuilder::build(properties, _column_names, _predicates); + + // start scanner to scan + std::unique_ptr scanner(new EsHttpScanner( + _runtime_state, runtime_profile(), _tuple_id, + properties, scanner_expr_ctxs, &counter)); + status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); + if (!status.ok()) { + LOG(WARNING) << "Scanner[" << start_idx << "] process failed. status=" + << status.get_error_msg(); } // Update stats From 7e3f6203b4e53b3c1adf535ce804013438c6bcd4 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 9 Apr 2019 20:02:00 +0800 Subject: [PATCH 35/73] Add error status when data has an array in EsScrollParser --- be/src/util/es_scroll_parser.cpp | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 4818072efd5799..425d4c4c82c863 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -40,6 +40,8 @@ static const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent "problem with the data source library."; static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " "$1 bytes for $2."; +static const string ERROR_COL_DATA_IS_ARRAY = "Data source returned an array for the type $0" + "based on column metadata."; ScrollParser::ScrollParser(const std::string& scroll_result) : _scroll_id(""), @@ -130,6 +132,9 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, switch (slot_desc->type().type) { case TYPE_CHAR: case TYPE_VARCHAR: { + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "STRING")); + } if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); } @@ -153,6 +158,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TINYINT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); } @@ -175,6 +184,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "SMALLINT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); } @@ -197,6 +210,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "INT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); } @@ -219,6 +236,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "BIGINT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); } @@ -241,6 +262,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "LARGEINT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); } @@ -263,6 +288,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "DOUBLE")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); } @@ -285,6 +314,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "FLOAT")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); } @@ -311,6 +344,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "BOOLEAN")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); } @@ -335,6 +372,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TYPE_DATE")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); } @@ -363,6 +404,10 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } + if (col.IsArray()) { + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TYPE_DATETIME")); + } + if (!col.IsString()) { return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); } From bb02355d8bd01ae11efe231e403ba8177bf80c2c Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 11 Apr 2019 17:57:30 +0800 Subject: [PATCH 36/73] Modify FE request logic and resolve http 401 problem --- .../apache/doris/external/EsRestClient.java | 33 +++++++++++++++---- .../apache/doris/external/EsStateStore.java | 18 ++++++---- .../org/apache/doris/es/EsRestClientTest.java | 4 --- 3 files changed, 39 insertions(+), 16 deletions(-) delete mode 100644 fe/src/test/java/org/apache/doris/es/EsRestClientTest.java diff --git a/fe/src/main/java/org/apache/doris/external/EsRestClient.java b/fe/src/main/java/org/apache/doris/external/EsRestClient.java index 6b033b197f7983..92e00634ee0a5d 100644 --- a/fe/src/main/java/org/apache/doris/external/EsRestClient.java +++ b/fe/src/main/java/org/apache/doris/external/EsRestClient.java @@ -16,6 +16,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; public class EsRestClient { private static final Logger LOG = LogManager.getLogger(EsRestClient.class); @@ -27,19 +28,26 @@ public class EsRestClient { mapper.configure(SerializationConfig.Feature.USE_ANNOTATIONS, false); } + private static OkHttpClient networkClient = new OkHttpClient.Builder() + .readTimeout(10, TimeUnit.SECONDS) + .build(); + + private String basicAuth; + private int nextClient = 0; - private OkHttpClient networkClient; + // private OkHttpClient networkClient; private String[] nodes; private String currentNode; public EsRestClient(String[] nodes, String authUser, String authPassword) { this.nodes = nodes; if (!Strings.isEmpty(authUser) && !Strings.isEmpty(authPassword)) { - networkClient = new OkHttpClient.Builder().authenticator((route, response) -> { - String credential = Credentials.basic(authUser, authPassword); - return response.request().newBuilder().header("Authorization", credential).build(); - } - ).build(); +// networkClient = new OkHttpClient.Builder().authenticator((route, response) -> { +// String credential = Credentials.basic(authUser, authPassword); +// return response.request().newBuilder().header("Authorization", credential).build(); +// } +// ).build(); + basicAuth = Credentials.basic(authUser, authPassword); } selectNextNode(); } @@ -67,12 +75,25 @@ public Map getHttpNodes() throws Exception { return nodes; } + public String getIndexMetaData(String indexName) { + String path = "_cluster/state?indices=" + indexName + + "&metric=routing_table,nodes,metadata&expand_wildcards=open"; + return execute(path); + + } + + /** + * execute request for specific path + * @param path the path must not leading with '/' + * @return + */ private String execute(String path) { selectNextNode(); boolean nextNode; do { Request request = new Request.Builder() .get() + .addHeader("Authorization", basicAuth) .url(currentNode + "/" + path) .build(); try { diff --git a/fe/src/main/java/org/apache/doris/external/EsStateStore.java b/fe/src/main/java/org/apache/doris/external/EsStateStore.java index 8cdbfd6eedcf0f..8bab6180fc6bce 100644 --- a/fe/src/main/java/org/apache/doris/external/EsStateStore.java +++ b/fe/src/main/java/org/apache/doris/external/EsStateStore.java @@ -86,16 +86,22 @@ public void deRegisterTable(long tableId) { protected void runOneCycle() { for (EsTable esTable : esTables.values()) { try { - EsTableState esTableState = loadEsIndexMetadataV55(esTable); + EsRestClient client = new EsRestClient(esTable.getSeeds(), + esTable.getUserName(), esTable.getPasswd()); +// EsTableState esTableState = loadEsIndexMetadataV55(esTable); + String indexMetaData = client.getIndexMetaData(esTable.getIndexName()); + if (indexMetaData == null) { + continue; + } + EsTableState esTableState = parseClusterState55(indexMetaData, esTable); + if (esTableState == null) { + continue; + } if (EsTable.TRANSPORT_HTTP.equals(esTable.getTransport())) { - EsRestClient client = new EsRestClient(esTable.getSeeds(), - esTable.getUserName(), esTable.getPasswd()); Map nodesInfo = client.getHttpNodes(); esTableState.addHttpAddress(nodesInfo); } - if (esTableState != null) { - esTable.setEsTableState(esTableState); - } + esTable.setEsTableState(esTableState); } catch (Throwable e) { LOG.error("errors while load table {} state from es", esTable.getName()); } diff --git a/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java b/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java deleted file mode 100644 index a8278d5c8a1d5a..00000000000000 --- a/fe/src/test/java/org/apache/doris/es/EsRestClientTest.java +++ /dev/null @@ -1,4 +0,0 @@ -package org.apache.doris.es; - -public class EsRestClientTest { -} From a29bde85700552d26c034c122603437ed53159af Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 11 Apr 2019 18:01:32 +0800 Subject: [PATCH 37/73] Modify FE request logic and resolve http 401 problem --- .../main/java/org/apache/doris/external/EsRestClient.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fe/src/main/java/org/apache/doris/external/EsRestClient.java b/fe/src/main/java/org/apache/doris/external/EsRestClient.java index 92e00634ee0a5d..279e5c824e4a32 100644 --- a/fe/src/main/java/org/apache/doris/external/EsRestClient.java +++ b/fe/src/main/java/org/apache/doris/external/EsRestClient.java @@ -35,18 +35,12 @@ public class EsRestClient { private String basicAuth; private int nextClient = 0; - // private OkHttpClient networkClient; private String[] nodes; private String currentNode; public EsRestClient(String[] nodes, String authUser, String authPassword) { this.nodes = nodes; if (!Strings.isEmpty(authUser) && !Strings.isEmpty(authPassword)) { -// networkClient = new OkHttpClient.Builder().authenticator((route, response) -> { -// String credential = Credentials.basic(authUser, authPassword); -// return response.request().newBuilder().header("Authorization", credential).build(); -// } -// ).build(); basicAuth = Credentials.basic(authUser, authPassword); } selectNextNode(); From 178f4bf8e6a3d2fbfa14580e43e566cb44120e01 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Fri, 12 Apr 2019 16:09:54 +0800 Subject: [PATCH 38/73] Introduce is_literal_node() to avoid get_value failure --- be/src/exec/es_predicate.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index efe2842a310f36..d9fb709507088a 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -197,6 +197,21 @@ static bool ignore_cast(const SlotDescriptor* slot, const Expr* expr) { return false; } +static bool is_literal_node(const Expr* expr) { + switch (expr->node_type()) { + case TExprNodeType::BOOL_LITERAL: + case TExprNodeType::INT_LITERAL: + case TExprNodeType::LARGE_INT_LITERAL: + case TExprNodeType::FLOAT_LITERAL: + case TExprNodeType::DECIMAL_LITERAL: + case TExprNodeType::STRING_LITERAL: + case TExprNodeType::DATE_LITERAL: + return true; + default: + return false; + } +} + bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { @@ -226,6 +241,10 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return false; } + if (!is_literal_node(expr)) { + VLOG(1) << "get disjuncts fail: expr is not literal type"; + return false; + } ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); ExtPredicate* predicate = new ExtBinaryPredicate( From 5e339bc899eb419c8c10889e9a64a57a29974a99 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Fri, 12 Apr 2019 19:48:43 +0800 Subject: [PATCH 39/73] Fix an error that send request with empty scroll_id --- be/src/util/es_scan_reader.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index e49cd92586b8e5..4e1c82586db85e 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -113,7 +113,13 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { scroll_parser = new ScrollParser(response); // maybe the index or shard is empty - if (scroll_parser == nullptr || scroll_parser->get_total() == 0) { + if (scroll_parser == nullptr) { + _eos = true; + return Status::OK; + } + + _scroll_id = scroll_parser->get_scroll_id(); + if (scroll_parser->get_total() == 0) { _eos = true; return Status::OK; } @@ -124,13 +130,16 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { _eos = false; } - _scroll_id = scroll_parser->get_scroll_id(); *parser = scroll_parser; *scan_eos = false; return Status::OK; } Status ESScanReader::close() { + if (_scroll_id.empty()) { + return Status::OK; + } + std::string scratch_target = _target + REQUEST_SEARCH_SCROLL_PATH; RETURN_IF_ERROR(_network_client.init(scratch_target)); _network_client.set_basic_auth(_user_name, _passwd); From 73d0b62336e5293b2cf62f924c9d52da2251d31a Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 15 Apr 2019 16:10:06 +0800 Subject: [PATCH 40/73] Fix a bug when converting int8_t to string --- be/src/exec/es_predicate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index d9fb709507088a..af50b2c5d3b63a 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -53,7 +53,7 @@ std::string ExtLiteral::value_to_string() { std::stringstream ss; switch (_type) { case TYPE_TINYINT: - ss << get_byte(); + ss << (int)get_byte(); break; case TYPE_SMALLINT: ss << get_short(); From 40e8d2d552922c11ad5a064324d74ea95c4df647 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 15 Apr 2019 19:35:41 +0800 Subject: [PATCH 41/73] Return status for scanner thread of EsHttpScanner --- be/src/exec/es_http_scan_node.cpp | 10 ++++++++-- be/src/exec/es_http_scan_node.h | 3 ++- be/src/util/es_scan_reader.cpp | 6 ++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 95e294d37b2c9a..5afb9d1b6cad13 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -147,8 +147,12 @@ Status EsHttpScanNode::start_scanners() { } for (int i = 0; i < _scan_ranges.size(); i++) { + std::promise p; + std::future f = p.get_future(); _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, - _scan_ranges.size()); + _scan_ranges.size(), std::ref(p)); + Status status = f.get(); + if (!status.ok()) return status; } return Status::OK; } @@ -378,7 +382,7 @@ static std::string get_host_port(const std::vector& es_hosts) { return host_port; } -void EsHttpScanNode::scanner_worker(int start_idx, int length) { +void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) { // Clone expr context std::vector scanner_expr_ctxs; DCHECK(start_idx < length); @@ -432,5 +436,7 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length) { if (!status.ok()) { _queue_writer_cond.notify_all(); } + + p_status.set_value(status); } } diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index c65995ac523f8c..46f4784e7efb08 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "common/status.h" #include "exec/scan_node.h" @@ -75,7 +76,7 @@ class EsHttpScanNode : public ScanNode { Status start_scanners(); // One scanner worker, This scanner will hanle 'length' ranges start from start_idx - void scanner_worker(int start_idx, int length); + void scanner_worker(int start_idx, int length, std::promise& p_status); // Scan one range Status scanner_scan(std::unique_ptr scanner, diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index 4e1c82586db85e..7f75351baf0cfd 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -66,8 +66,10 @@ Status ESScanReader::open() { _network_client.execute_post_request(_query, &_cached_response); long status = _network_client.get_http_status(); if (status != 200) { - LOG(WARNING) << "invalid response http status for open: " << status; - return Status(_cached_response); + std::stringstream ss; + ss << "invalid response http status for open: " << status; + LOG(WARNING) << ss.str(); + return Status(ss.str()); } VLOG(1) << "open _cached response: " << _cached_response; return Status::OK; From e87eee6ccae5002617205bcc84c069d11c5d5fdc Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 16 Apr 2019 10:53:27 +0800 Subject: [PATCH 42/73] Fix a bug of predicate when it includes is_not_in statement --- be/src/exec/es_predicate.cpp | 3 +-- be/src/exec/es_predicate.h | 6 ++++-- be/src/util/es_scan_reader.cpp | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index af50b2c5d3b63a..bfa82a90de594f 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -332,10 +332,8 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return false; } - TExtInPredicate ext_in_predicate; vector in_pred_values; InPredicate* pred = dynamic_cast(conjunct); - ext_in_predicate.__set_is_not_in(pred->is_not_in()); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { return false; } @@ -365,6 +363,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di ExtPredicate* predicate = new ExtInPredicate( TExprNodeType::IN_PRED, + pred->is_not_in(), slot_desc->col_name(), slot_desc->type(), in_pred_values); diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index a789e58fc936d1..02d0383104fef3 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -106,11 +106,12 @@ struct ExtBinaryPredicate : public ExtPredicate { struct ExtInPredicate : public ExtPredicate { ExtInPredicate( TExprNodeType::type node_type, + bool is_not_in, const std::string& name, const TypeDescriptor& type, const std::vector& values) : ExtPredicate(node_type), - is_not_in(false), + is_not_in(is_not_in), col(name, type), values(values) { } @@ -140,10 +141,11 @@ struct ExtIsNullPredicate : public ExtPredicate { TExprNodeType::type node_type, const std::string& name, const TypeDescriptor& type, + bool is_not_null, ExtLiteral value) : ExtPredicate(node_type), col(name, type), - is_not_null(false) { + is_not_null(is_not_null) { } ExtColumnDesc col; diff --git a/be/src/util/es_scan_reader.cpp b/be/src/util/es_scan_reader.cpp index 7f75351baf0cfd..797e1053dbcd3e 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/util/es_scan_reader.cpp @@ -67,7 +67,7 @@ Status ESScanReader::open() { long status = _network_client.get_http_status(); if (status != 200) { std::stringstream ss; - ss << "invalid response http status for open: " << status; + ss << "invalid response http status for open: " << status << ", response:" << _cached_response; LOG(WARNING) << ss.str(); return Status(ss.str()); } From 53ed7b4ae1aadbef27d0efd574f739d36b51b652 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 16 Apr 2019 13:51:53 +0800 Subject: [PATCH 43/73] Fix a bug in value_to_string() by introducing to_string() --- be/src/exec/es_predicate.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index bfa82a90de594f..ca867e8a78d9ca 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -53,22 +53,22 @@ std::string ExtLiteral::value_to_string() { std::stringstream ss; switch (_type) { case TYPE_TINYINT: - ss << (int)get_byte(); + ss << std::to_string(get_byte()); break; case TYPE_SMALLINT: - ss << get_short(); + ss << std::to_string(get_short()); break; case TYPE_INT: - ss << get_int(); + ss << std::to_string(get_int()); break; case TYPE_BIGINT: - ss << get_long(); + ss << std::to_string(get_long()); break; case TYPE_FLOAT: - ss << get_float(); + ss << std::to_string(get_float()); break; case TYPE_DOUBLE: - ss << get_double(); + ss << std::to_string(get_double()); break; case TYPE_CHAR: case TYPE_VARCHAR: @@ -79,7 +79,7 @@ std::string ExtLiteral::value_to_string() { ss << get_date_string(); break; case TYPE_BOOLEAN: - ss << get_bool(); + ss << std::to_string(get_bool()); break; case TYPE_DECIMAL: ss << get_decimal_string(); From f2a8c173d99473932f32ad33d1bf966f88943673 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 17 Apr 2019 19:45:21 +0800 Subject: [PATCH 44/73] Fix date issue in hash join --- be/src/exec/es_predicate.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index ca867e8a78d9ca..c6bc1c1ecdfa02 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -138,6 +138,10 @@ std::string ExtLiteral::get_string() { std::string ExtLiteral::get_date_string() { DCHECK(_type == TYPE_DATE || _type == TYPE_DATETIME); DateTimeValue date_value = *reinterpret_cast(_value); + if (_type == TYPE_DATE) { + date_value.cast_to_date(); + } + char str[MAX_DTVALUE_STR_LEN]; date_value.to_string(str); return std::string(str, strlen(str)); From b4bfbc7ae840beff2ddebfc5216f096ff2cb4079 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 18 Apr 2019 14:10:03 +0800 Subject: [PATCH 45/73] Add http client error logic for response callback and add some test to UT --- be/src/http/http_client.cpp | 3 +++ be/test/http/http_client_test.cpp | 16 ++++++++++++++++ be/test/util/es_query_builder_test.cpp | 8 +++----- run-ut.sh | 2 ++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index ca061cfea88a0c..e6a8047a6868d2 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -161,6 +161,9 @@ Status HttpClient::execute(const std::function +#include "boost/algorithm/string.hpp" #include "common/logging.h" #include "http/ev_http_server.h" #include "http/http_channel.h" @@ -151,6 +153,20 @@ TEST_F(HttpClientTest, post_normal) { ASSERT_STREQ(response.c_str(), request_body.c_str()); } +TEST_F(HttpClientTest, post_failed) { + HttpClient client; + auto st = client.init("http://127.0.0.1:29386/simple_pos"); + ASSERT_TRUE(st.ok()); + client.set_method(POST); + client.set_basic_auth("test1", ""); + std::string response; + std::string request_body = "simple post body query"; + st = client.execute_post_request(request_body, &response); + ASSERT_FALSE(st.ok()); + std::string not_found = "404"; + ASSERT_TRUE(boost::algorithm::contains(response, not_found)); +} + } int main(int argc, char* argv[]) { diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index 091f286cbfc052..3755defa995789 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -138,7 +138,7 @@ TEST_F(BooleanQueryBuilderTest, terms_in_query) { ExtLiteral term_literal_3(TYPE_VARCHAR, &string_value_3); std::vector terms_values = {term_literal_1, term_literal_2, term_literal_3}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, false, terms_in_field, terms_in_col_type_desc, terms_values); TermsInSetQueryBuilder terms_query(in_predicate); rapidjson::Document document; rapidjson::Value in_query_value = terms_query.to_json(document); @@ -281,8 +281,7 @@ TEST_F(BooleanQueryBuilderTest, compound_bool_query) { ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); std::vector terms_values = {term_literal_1, term_literal_2}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); - in_predicate->is_not_in = true; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, terms_in_col_type_desc, terms_values); std::vector bool_predicates_4 = {in_predicate}; EsPredicate* bool_predicate_4 = new EsPredicate(bool_predicates_4); @@ -386,8 +385,7 @@ TEST_F(BooleanQueryBuilderTest, validate_partial) { ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); std::vector terms_values = {term_literal_1, term_literal_2}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, terms_in_field, terms_in_col_type_desc, terms_values); - in_predicate->is_not_in = true; + ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, terms_in_col_type_desc, terms_values); std::vector bool_predicates_2 = {in_predicate}; EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); diff --git a/run-ut.sh b/run-ut.sh index 0caec5bf5d30b6..f1a424f5687677 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -150,6 +150,8 @@ ${DORIS_TEST_BINARY_DIR}/util/byte_buffer_test2 ${DORIS_TEST_BINARY_DIR}/util/uid_util_test ${DORIS_TEST_BINARY_DIR}/util/aes_util_test ${DORIS_TEST_BINARY_DIR}/util/string_util_test +${DORIS_TEST_BINARY_DIR}/util/es_scan_reader_test +${DORIS_TEST_BINARY_DIR}/util/es_query_builder_test ## Running common Unittest ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test From 1d38204784b0212507852f12d9c4598e12747052 Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Thu, 18 Apr 2019 14:13:17 +0800 Subject: [PATCH 46/73] Add http client error logic for response callback and add some test to UT --- be/src/http/http_client.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index e6a8047a6868d2..1ab2043c4c7429 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -161,10 +161,9 @@ Status HttpClient::execute(const std::function Date: Thu, 18 Apr 2019 15:27:37 +0800 Subject: [PATCH 47/73] Improve error prompt when query from ES server --- be/src/http/http_client.cpp | 4 +--- be/src/util/es_scan_reader.cpp | 7 +++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index 1ab2043c4c7429..f56592125d427e 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -162,9 +162,7 @@ Status HttpClient::execute(const std::function Date: Thu, 18 Apr 2019 14:10:03 +0800 Subject: [PATCH 48/73] Add http client error logic for response callback and add some test to UT --- be/src/http/http_client.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index f56592125d427e..605a18174b8f8c 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -161,6 +161,9 @@ Status HttpClient::execute(const std::function Date: Thu, 18 Apr 2019 20:32:35 +0800 Subject: [PATCH 50/73] Add missed license head and align code --- be/CMakeLists.txt | 2 +- be/src/exec/es_predicate.cpp | 16 +-- be/src/exec/es_predicate.h | 110 +++++++++--------- be/src/util/es_query_builder.cpp | 1 + be/src/util/es_query_builder.h | 3 + be/test/http/http_client_test.cpp | 1 - be/test/util/es_query_builder_test.cpp | 3 +- be/test/util/es_scan_reader_test.cpp | 1 + .../apache/doris/external/EsRestClient.java | 17 +++ 9 files changed, 86 insertions(+), 68 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 09a2487c2f3f93..152651a8119948 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -284,7 +284,7 @@ set(CXX_GCC_FLAGS "-g -Wno-unused-local-typedefs") # Debug information is stored as dwarf2 to be as compatible as possible # -Werror: compile warnings should be errors when using the toolchain compiler. # Only enable for debug builds because this is what we test in pre-commit tests. -set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -O0 -gdwarf-2") +set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -Werror -ggdb") # For CMAKE_BUILD_TYPE=Release # -O3: Enable all compiler optimizations diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index c6bc1c1ecdfa02..17df4135825154 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -183,7 +183,7 @@ EsPredicate::~EsPredicate() { } bool EsPredicate::build_disjuncts_list() { - return build_disjuncts_list(_context->root(), _disjuncts); + return build_disjuncts_list(_context->root()); } // make sure to build by build_disjuncts_list @@ -216,7 +216,7 @@ static bool is_literal_node(const Expr* expr) { } } -bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& disjuncts) { +bool EsPredicate::build_disjuncts_list(Expr* conjunct) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; @@ -258,7 +258,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di op, literal); - disjuncts.push_back(predicate); + _disjuncts.push_back(predicate); return true; } @@ -281,7 +281,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di return false; } } - disjuncts.push_back(predicate); + _disjuncts.push_back(predicate); return true; } @@ -324,7 +324,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di slot_desc->type(), literal); - disjuncts.push_back(predicate); + _disjuncts.push_back(predicate); return true; } @@ -371,7 +371,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di slot_desc->col_name(), slot_desc->type(), in_pred_values); - disjuncts.push_back(predicate); + _disjuncts.push_back(predicate); return true; } @@ -381,10 +381,10 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct, vector& di VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR"; return false; } - if (!build_disjuncts_list(conjunct->get_child(0), disjuncts)) { + if (!build_disjuncts_list(conjunct->get_child(0))) { return false; } - if (!build_disjuncts_list(conjunct->get_child(1), disjuncts)) { + if (!build_disjuncts_list(conjunct->get_child(1))) { return false; } diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index 02d0383104fef3..cbb8f06ebee0d5 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -36,36 +36,36 @@ class ExprContext; class ExtBinaryPredicate; class ExtLiteral { - public: - ExtLiteral(PrimitiveType type, void *value) : - _type(type), - _value(value) { - _str = value_to_string(); - } - ~ExtLiteral(); - const std::string& to_string() const { - return _str; - } - - private: - int8_t get_byte(); - int16_t get_short(); - int32_t get_int(); - int64_t get_long(); - float get_float(); - double get_double(); - std::string get_string(); - std::string get_date_string(); - bool get_bool(); - std::string get_decimal_string(); - std::string get_decimalv2_string(); - std::string get_largeint_string(); - - std::string value_to_string(); - - PrimitiveType _type; - void* _value; - std::string _str; +public: + ExtLiteral(PrimitiveType type, void *value) : + _type(type), + _value(value) { + _str = value_to_string(); + } + ~ExtLiteral(); + const std::string& to_string() const { + return _str; + } + +private: + int8_t get_byte(); + int16_t get_short(); + int32_t get_int(); + int64_t get_long(); + float get_float(); + double get_double(); + std::string get_string(); + std::string get_date_string(); + bool get_bool(); + std::string get_decimal_string(); + std::string get_decimalv2_string(); + std::string get_largeint_string(); + + std::string value_to_string(); + + PrimitiveType _type; + void* _value; + std::string _str; }; struct ExtColumnDesc { @@ -169,34 +169,30 @@ struct ExtFunction : public ExtPredicate { }; class EsPredicate { +public: + EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); + ~EsPredicate(); + const std::vector& get_predicate_list(); + bool build_disjuncts_list(); + // public for tests + EsPredicate(std::vector& all_predicates) { + _disjuncts = all_predicates; + }; + + Status get_es_query_status() { + return _es_query_status; + } + +private: + bool build_disjuncts_list(Expr* conjunct); + bool is_match_func(const Expr* conjunct); + const SlotDescriptor* get_slot_desc(SlotRef* slotRef); - public: - EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); - ~EsPredicate(); - const std::vector& get_predicate_list(); - bool build_disjuncts_list(); - // public for tests - EsPredicate(std::vector& all_predicates) { - _disjuncts = all_predicates; - }; - - Status get_es_query_status() { - return _es_query_status; - } - - - private: - - bool build_disjuncts_list(Expr* conjunct, - std::vector& disjuncts); - bool is_match_func(const Expr* conjunct); - const SlotDescriptor* get_slot_desc(SlotRef* slotRef); - - ExprContext* _context; - int _disjuncts_num; - const TupleDescriptor* _tuple_desc; - std::vector _disjuncts; - Status _es_query_status; + ExprContext* _context; + int _disjuncts_num; + const TupleDescriptor* _tuple_desc; + std::vector _disjuncts; + Status _es_query_status; }; } diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index f17994a16af2fc..847395288ad9fa 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + #include "util/es_query_builder.h" #include diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index f5643db0040041..96310d7b4dae0c 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -14,9 +14,12 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + #pragma once + #include #include + #include "rapidjson/document.h" #include "exec/es_predicate.h" #include "common/status.h" diff --git a/be/test/http/http_client_test.cpp b/be/test/http/http_client_test.cpp index c883e4c30374f8..e75a299142cf21 100644 --- a/be/test/http/http_client_test.cpp +++ b/be/test/http/http_client_test.cpp @@ -17,7 +17,6 @@ #include "http/http_client.h" - #include #include "boost/algorithm/string.hpp" diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index 3755defa995789..6f9f8691ac0d6d 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -26,6 +26,7 @@ #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" #include "runtime/string_value.h" + namespace doris { class BooleanQueryBuilderTest : public testing::Test { @@ -33,6 +34,7 @@ class BooleanQueryBuilderTest : public testing::Test { BooleanQueryBuilderTest() { } virtual ~BooleanQueryBuilderTest() { } }; + TEST_F(BooleanQueryBuilderTest, term_query) { // content = "wyf" char str[] = "wyf"; @@ -431,7 +433,6 @@ TEST_F(BooleanQueryBuilderTest, validate_partial) { ASSERT_TRUE(result == expected1); } - } int main(int argc, char* argv[]) { diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 06b194d13ca4d2..57b38aeb4ffcb2 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + #include "util/es_scan_reader.h" #include "util/es_scroll_query.h" #include diff --git a/fe/src/main/java/org/apache/doris/external/EsRestClient.java b/fe/src/main/java/org/apache/doris/external/EsRestClient.java index 279e5c824e4a32..1159cadc646f30 100644 --- a/fe/src/main/java/org/apache/doris/external/EsRestClient.java +++ b/fe/src/main/java/org/apache/doris/external/EsRestClient.java @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + package org.apache.doris.external; import okhttp3.Credentials; From 5442f6441c3bf907ea0a31d0ce6b24a470727460 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 18 Apr 2019 20:49:12 +0800 Subject: [PATCH 51/73] Adjust some code format --- be/src/exec/es_http_scan_node.h | 5 ----- be/src/util/es_scroll_parser.h | 2 +- be/src/util/es_scroll_query.cpp | 5 +++-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 46f4784e7efb08..555a44a31bde5e 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -46,15 +46,10 @@ class EsHttpScanNode : public ScanNode { virtual ~EsHttpScanNode(); virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - virtual Status prepare(RuntimeState* state) override; - virtual Status open(RuntimeState* state) override; - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - virtual Status close(RuntimeState* state) override; - virtual Status set_scan_ranges(const std::vector& scan_ranges) override; protected: diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index 820aeaa94e8979..b12b9f7186988c 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -17,7 +17,7 @@ #pragma once -#include +#include #include "rapidjson/document.h" #include "runtime/descriptors.h" diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index 3f268c532fbba4..49aee11eba646a 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include +#include "util/es_query_builder.h" + +#include #include #include "common/logging.h" #include "es_scroll_query.h" @@ -23,7 +25,6 @@ #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" #include "util/es_scan_reader.h" -#include "util/es_query_builder.h" namespace doris { From 73b5f0ad8693a847c043a843f0bd2d62bf181b85 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Fri, 19 Apr 2019 11:15:02 +0800 Subject: [PATCH 52/73] Improve the readability of code by adding const --- be/src/exec/es_predicate.cpp | 6 +++--- be/src/exec/es_predicate.h | 6 +++--- be/src/exprs/in_predicate.h | 2 +- be/src/util/es_scroll_parser.cpp | 2 +- be/src/util/es_scroll_parser.h | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es_predicate.cpp index 17df4135825154..7ccb066ef99807 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es_predicate.cpp @@ -216,7 +216,7 @@ static bool is_literal_node(const Expr* expr) { } } -bool EsPredicate::build_disjuncts_list(Expr* conjunct) { +bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; @@ -337,7 +337,7 @@ bool EsPredicate::build_disjuncts_list(Expr* conjunct) { } vector in_pred_values; - InPredicate* pred = dynamic_cast(conjunct); + const InPredicate* pred = dynamic_cast(conjunct); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { return false; } @@ -405,7 +405,7 @@ bool EsPredicate::is_match_func(const Expr* conjunct) { return false; } -const SlotDescriptor* EsPredicate::get_slot_desc(SlotRef* slotRef) { +const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { std::vector slot_ids; slotRef->get_slot_ids(&slot_ids); const SlotDescriptor* slot_desc = nullptr; diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es_predicate.h index cbb8f06ebee0d5..a697a0f967fdeb 100644 --- a/be/src/exec/es_predicate.h +++ b/be/src/exec/es_predicate.h @@ -175,7 +175,7 @@ class EsPredicate { const std::vector& get_predicate_list(); bool build_disjuncts_list(); // public for tests - EsPredicate(std::vector& all_predicates) { + EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; }; @@ -184,9 +184,9 @@ class EsPredicate { } private: - bool build_disjuncts_list(Expr* conjunct); + bool build_disjuncts_list(const Expr* conjunct); bool is_match_func(const Expr* conjunct); - const SlotDescriptor* get_slot_desc(SlotRef* slotRef); + const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); ExprContext* _context; int _disjuncts_num; diff --git a/be/src/exprs/in_predicate.h b/be/src/exprs/in_predicate.h index aec176730f764b..4b3c6fa5729bb2 100644 --- a/be/src/exprs/in_predicate.h +++ b/be/src/exprs/in_predicate.h @@ -55,7 +55,7 @@ class InPredicate : public Predicate { // if add to children, when List is long, copy is a expensive op. void insert(void* value); - HybirdSetBase* hybird_set() { + HybirdSetBase* hybird_set() const { return _hybird_set.get(); } diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/util/es_scroll_parser.cpp index 425d4c4c82c863..c672604fda90da 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/util/es_scroll_parser.cpp @@ -54,7 +54,7 @@ ScrollParser::ScrollParser(const std::string& scroll_result) : ScrollParser::~ScrollParser() { } -void ScrollParser::parsing(const std::string scroll_result) { +void ScrollParser::parsing(const std::string& scroll_result) { _document_node.Parse(scroll_result.c_str()); if (!_document_node.HasMember(FIELD_SCROLL_ID)) { diff --git a/be/src/util/es_scroll_parser.h b/be/src/util/es_scroll_parser.h index b12b9f7186988c..494fef23b280f1 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/util/es_scroll_parser.h @@ -41,7 +41,7 @@ class ScrollParser { int get_size(); private: - void parsing(const std::string scroll_result); + void parsing(const std::string& scroll_result); std::string _scroll_id; int _total; From a79f38ea04b5f57492facd3fab6c042db5acd89b Mon Sep 17 00:00:00 2001 From: wuyunfeng Date: Fri, 19 Apr 2019 19:31:25 +0800 Subject: [PATCH 53/73] Optimize es_query_builder function call --- be/src/exec/es_http_scan_node.cpp | 3 +- be/src/util/es_query_builder.cpp | 202 +++++++++++-------------- be/src/util/es_query_builder.h | 47 +++--- be/src/util/es_scroll_query.cpp | 4 +- be/test/exec/es_predicate_test.cpp | 4 +- be/test/util/es_query_builder_test.cpp | 51 ++++--- 6 files changed, 159 insertions(+), 152 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 5afb9d1b6cad13..3ca91dde784111 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -119,7 +119,8 @@ Status EsHttpScanNode::open(RuntimeState* state) { RETURN_IF_ERROR(build_conjuncts_list()); // remove those predicates which ES cannot support - std::vector list = BooleanQueryBuilder::validate(_predicates); + std::vector list; + BooleanQueryBuilder::validate(_predicates, &list); DCHECK(list.size() == _predicate_to_conjunct.size()); for(int i = list.size() - 1; i >= 0; i--) { if(!list[i]) { diff --git a/be/src/util/es_query_builder.cpp b/be/src/util/es_query_builder.cpp index 847395288ad9fa..dc0b572fa600dc 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/util/es_query_builder.cpp @@ -28,104 +28,60 @@ namespace doris { ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) { } -ESQueryBuilder::ESQueryBuilder(ExtFunction* es_query) { - auto first = es_query->values.front(); +ESQueryBuilder::ESQueryBuilder(const ExtFunction& es_query) { + auto first = es_query.values.front(); _es_query_str = first.to_string(); } // note: call this function must invoke BooleanQueryBuilder::check_es_query to check validation -rapidjson::Value ESQueryBuilder::to_json(rapidjson::Document& document) { +void ESQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { rapidjson::Document scratch_document; scratch_document.Parse(_es_query_str.c_str()); - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); rapidjson::Value query_key; rapidjson::Value query_value; //{ "term": { "dv": "2" } } - rapidjson::Value es_query(rapidjson::kObjectType); rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue query_key.CopyFrom(first->name, allocator); // if we found one key, then end loop as QueryDSL only support one `query` root query_value.CopyFrom(first->value, allocator); - es_query.SetObject(); // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics - es_query.AddMember(query_key, query_value, allocator); - return es_query; -} -rapidjson::Value WildCardQueryBuilder::to_json(rapidjson::Document& document) { - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); - rapidjson::Value term_node(rapidjson::kObjectType); - term_node.SetObject(); - rapidjson::Value field_value(_field.c_str(), allocator); - rapidjson::Value term_value(_like_value.c_str(), allocator); - term_node.AddMember(field_value, term_value, allocator); - rapidjson::Value wildcard_query(rapidjson::kObjectType); - wildcard_query.SetObject(); - wildcard_query.AddMember("wildcard", term_node, allocator); - return wildcard_query; - -} -WildCardQueryBuilder::WildCardQueryBuilder(ExtLikePredicate* like_predicate) { - _like_value = like_predicate->value.to_string(); - std::replace(_like_value.begin(), _like_value.end(), '_', '?'); - std::replace(_like_value.begin(), _like_value.end(), '%', '*'); - _field = like_predicate->col.name; + query->AddMember(query_key, query_value, allocator); } TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) : _field(field), _term(term) { } -TermQueryBuilder::TermQueryBuilder(ExtBinaryPredicate* binary_predicate) { - _field = binary_predicate->col.name; - ExtLiteral literal = binary_predicate->value; - _term = literal.to_string(); +TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) { + _field = binary_predicate.col.name; + _term = binary_predicate.value.to_string(); } -rapidjson::Value TermQueryBuilder::to_json(rapidjson::Document& document) { - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); +void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); rapidjson::Value term_node(rapidjson::kObjectType); term_node.SetObject(); rapidjson::Value field_value(_field.c_str(), allocator); rapidjson::Value term_value(_term.c_str(), allocator); term_node.AddMember(field_value, term_value, allocator); - rapidjson::Value term_query(rapidjson::kObjectType); - term_query.SetObject(); - term_query.AddMember("term", term_node, allocator); - return term_query; + query->AddMember("term", term_node, allocator); } -rapidjson::Value TermsInSetQueryBuilder::to_json(rapidjson::Document& document) { - std::string field = _in_predicate->col.name; - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); - rapidjson::Value terms_node(rapidjson::kObjectType); - rapidjson::Value values_node(rapidjson::kArrayType); - for (auto value : _in_predicate->values) { - rapidjson::Value value_value(value.to_string().c_str(), allocator); - values_node.PushBack(value_value, allocator); - } - rapidjson::Value field_value(field.c_str(), allocator); - terms_node.AddMember(field_value, values_node, allocator); - rapidjson::Value terms_in_set_query(rapidjson::kObjectType); - terms_in_set_query.SetObject(); - terms_in_set_query.AddMember("terms", terms_node, allocator); - return terms_in_set_query; +RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) { + _field = range_predicate.col.name; + _value = range_predicate.value.to_string(); + _op = range_predicate.op; } -TermsInSetQueryBuilder::TermsInSetQueryBuilder(ExtInPredicate* in_predicate) { - _in_predicate = in_predicate; -} - -rapidjson::Value RangeQueryBuilder::to_json(rapidjson::Document& document) { - std::string field = _range_predicate->col.name; - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); - rapidjson::Value field_value(field.c_str(), allocator); - ExtLiteral b_value = _range_predicate->value; - rapidjson::Value value(b_value.to_string().c_str(), allocator); +void RangeQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value value(_value.c_str(), allocator); rapidjson::Value op_node(rapidjson::kObjectType); op_node.SetObject(); - switch (_range_predicate->op) - { + switch (_op) { case TExprOpcode::LT: op_node.AddMember("lt", value, allocator); break; @@ -144,25 +100,50 @@ rapidjson::Value RangeQueryBuilder::to_json(rapidjson::Document& document) { rapidjson::Value field_node(rapidjson::kObjectType); field_node.SetObject(); field_node.AddMember(field_value, op_node, allocator); + query->AddMember("range", field_node, allocator); +} - rapidjson::Value range_query(rapidjson::kObjectType); - range_query.SetObject(); - range_query.AddMember("range", field_node, allocator); - return range_query; +void WildCardQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value term_node(rapidjson::kObjectType); + term_node.SetObject(); + rapidjson::Value field_value(_field.c_str(), allocator); + rapidjson::Value term_value(_like_value.c_str(), allocator); + term_node.AddMember(field_value, term_value, allocator); + query->AddMember("wildcard", term_node, allocator); +} +WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicate) { + _like_value = like_predicate.value.to_string(); + std::replace(_like_value.begin(), _like_value.end(), '_', '?'); + std::replace(_like_value.begin(), _like_value.end(), '%', '*'); + _field = like_predicate.col.name; } -RangeQueryBuilder::RangeQueryBuilder(ExtBinaryPredicate* range_predicate) { - _range_predicate = range_predicate; +void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); + rapidjson::Value terms_node(rapidjson::kObjectType); + rapidjson::Value values_node(rapidjson::kArrayType); + for (auto value : _values) { + rapidjson::Value value_value(value.c_str(), allocator); + values_node.PushBack(value_value, allocator); + } + rapidjson::Value field_value(_field.c_str(), allocator); + terms_node.AddMember(field_value, values_node, allocator); + query->AddMember("terms", terms_node, allocator); } -rapidjson::Value MatchAllQueryBuilder::to_json(rapidjson::Document& document) { - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); +TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) { + _field = in_predicate.col.name; + for (auto value : in_predicate.values) { + _values.push_back(value.to_string()); + } +} + +void MatchAllQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); rapidjson::Value match_all_node(rapidjson::kObjectType); match_all_node.SetObject(); - rapidjson::Value match_all_query(rapidjson::kObjectType); - match_all_query.SetObject(); - match_all_query.AddMember("match_all", match_all_node, allocator); - return match_all_query; + query->AddMember("match_all", match_all_node, allocator); } BooleanQueryBuilder::BooleanQueryBuilder() { @@ -192,15 +173,14 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi switch (predicate->node_type) { case TExprNodeType::BINARY_PRED: { ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - switch (binary_predicate->op) - { + switch (binary_predicate->op) { case TExprOpcode::EQ: { - TermQueryBuilder* term_query = new TermQueryBuilder(binary_predicate); + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); _should_clauses.push_back(term_query); break; } case TExprOpcode::NE:{ // process NE - TermQueryBuilder* term_query = new TermQueryBuilder(binary_predicate); + TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); bool_query->must_not(term_query); _should_clauses.push_back(bool_query); @@ -210,7 +190,7 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi case TExprOpcode::LE: case TExprOpcode::GT: case TExprOpcode::GE: { - RangeQueryBuilder* range_query = new RangeQueryBuilder(binary_predicate); + RangeQueryBuilder* range_query = new RangeQueryBuilder(*binary_predicate); _should_clauses.push_back(range_query); break; } @@ -223,26 +203,26 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi ExtInPredicate* in_predicate = (ExtInPredicate *)predicate; bool is_not_in = in_predicate->is_not_in; if (is_not_in) { // process not in predicate - TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(in_predicate); + TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(*in_predicate); BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); bool_query->must_not(terms_predicate); _should_clauses.push_back(bool_query); } else { // process in predicate - TermsInSetQueryBuilder* terms_query= new TermsInSetQueryBuilder(in_predicate); + TermsInSetQueryBuilder* terms_query= new TermsInSetQueryBuilder(*in_predicate); _should_clauses.push_back(terms_query); } break; } case TExprNodeType::LIKE_PRED: { ExtLikePredicate* like_predicate = (ExtLikePredicate *)predicate; - WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(like_predicate); + WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(*like_predicate); _should_clauses.push_back(wild_card_query); break; } case TExprNodeType::FUNCTION_CALL: { ExtFunction* function_predicate = (ExtFunction *)predicate; if ("esquery" == function_predicate->func_name ) { - ESQueryBuilder* es_query = new ESQueryBuilder(function_predicate); + ESQueryBuilder* es_query = new ESQueryBuilder(*function_predicate); _should_clauses.push_back(es_query); }; break; @@ -253,13 +233,16 @@ BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predi } } -rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& document) { - rapidjson::Document::AllocatorType &allocator = document.GetAllocator(); +void BooleanQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { + rapidjson::Document::AllocatorType &allocator = document->GetAllocator(); rapidjson::Value root_node_object(rapidjson::kObjectType); if (_filter_clauses.size() > 0) { rapidjson::Value filter_node(rapidjson::kArrayType); for (auto must_clause : _filter_clauses) { - filter_node.PushBack(must_clause->to_json(document), allocator); + rapidjson::Value must_clause_query(rapidjson::kObjectType); + must_clause_query.SetObject(); + must_clause->to_json(document, &must_clause_query); + filter_node.PushBack(must_clause_query, allocator); } root_node_object.AddMember("filter", filter_node, allocator); } @@ -267,7 +250,10 @@ rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& document) { if (_should_clauses.size() > 0) { rapidjson::Value should_node(rapidjson::kArrayType); for (auto should_clause : _should_clauses) { - should_node.PushBack(should_clause->to_json(document), allocator); + rapidjson::Value should_clause_query(rapidjson::kObjectType); + should_clause_query.SetObject(); + should_clause->to_json(document, &should_clause_query); + should_node.PushBack(should_clause_query, allocator); } root_node_object.AddMember("should", should_node, allocator); } @@ -275,14 +261,14 @@ rapidjson::Value BooleanQueryBuilder::to_json(rapidjson::Document& document) { if (_must_not_clauses.size() > 0) { rapidjson::Value must_not_node(rapidjson::kArrayType); for (auto must_not_clause : _must_not_clauses) { - must_not_node.PushBack(must_not_clause->to_json(document), allocator); + rapidjson::Value must_not_clause_query(rapidjson::kObjectType); + must_not_clause_query.SetObject(); + must_not_clause->to_json(document, &must_not_clause_query); + must_not_node.PushBack(must_not_clause_query, allocator); } root_node_object.AddMember("must_not", must_not_node, allocator); } - - rapidjson::Value bool_query(rapidjson::kObjectType); - bool_query.AddMember("bool", root_node_object, allocator); - return bool_query; + query->AddMember("bool", root_node_object, allocator); } void BooleanQueryBuilder::should(QueryBuilder* filter) { @@ -304,7 +290,7 @@ Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { scratch_document.Parse(esquery_str.c_str()); rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); rapidjson::Value query_key; - //{ "term": { "dv": "2" } } + // { "term": { "dv": "2" } } if (!scratch_document.HasParseError()) { if (!scratch_document.IsObject()) { return Status(TStatusCode::ES_REQUEST_ERROR, "esquery must be a object"); @@ -326,10 +312,9 @@ Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { return Status::OK; } -std::vector BooleanQueryBuilder::validate(const std::vector& espredicates) { +void BooleanQueryBuilder::validate(const std::vector& espredicates, std::vector* result) { int conjunct_size = espredicates.size(); - std::vector result; - result.reserve(conjunct_size); + result->reserve(conjunct_size); for (auto espredicate : espredicates) { bool flag = true; for (auto predicate : espredicate->get_predicate_list()) { @@ -357,7 +342,7 @@ std::vector BooleanQueryBuilder::validate(const std::vector& } } else { flag = false; - } + } break; } default: { @@ -369,29 +354,22 @@ std::vector BooleanQueryBuilder::validate(const std::vector& break; } } - result.push_back(flag); + result->push_back(flag); } - return result; } -rapidjson::Value BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document& root) { +void BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query) { if (predicates.size() == 0) { MatchAllQueryBuilder match_all_query; - return match_all_query.to_json(root); + match_all_query.to_json(root, query); + return; } - root.SetObject(); + root->SetObject(); BooleanQueryBuilder bool_query; for (auto es_predicate : predicates) { vector or_predicates = es_predicate->get_predicate_list(); BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); bool_query.must(inner_bool_query); } - rapidjson::Value root_value_node = bool_query.to_json(root); - // root.AddMember("query", root_value_node, allocator); - // rapidjson::StringBuffer buffer; - // rapidjson::Writer writer(buffer); - // root.Accept(writer); - // std::string es_query_dsl_json = buffer.GetString(); - return root_value_node; -} + bool_query.to_json(root, query);} } diff --git a/be/src/util/es_query_builder.h b/be/src/util/es_query_builder.h index 96310d7b4dae0c..2575063f43335a 100644 --- a/be/src/util/es_query_builder.h +++ b/be/src/util/es_query_builder.h @@ -29,7 +29,7 @@ namespace doris { class QueryBuilder { public: - virtual rapidjson::Value to_json(rapidjson::Document& allocator) = 0; + virtual void to_json(rapidjson::Document* document, rapidjson::Value* query) = 0; virtual ~QueryBuilder() { }; }; @@ -38,8 +38,8 @@ class QueryBuilder { class ESQueryBuilder : public QueryBuilder { public: ESQueryBuilder(const std::string& es_query_str); - ESQueryBuilder(ExtFunction* es_query); - rapidjson::Value to_json(rapidjson::Document& allocator) override; + ESQueryBuilder(const ExtFunction& es_query); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; private: std::string _es_query_str; }; @@ -49,8 +49,8 @@ class TermQueryBuilder : public QueryBuilder { public: TermQueryBuilder(const std::string& field, const std::string& term); - TermQueryBuilder(ExtBinaryPredicate* binary_predicate); - rapidjson::Value to_json(rapidjson::Document& document) override; + TermQueryBuilder(const ExtBinaryPredicate& binary_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; private: std::string _field; @@ -61,28 +61,31 @@ class TermQueryBuilder : public QueryBuilder { class RangeQueryBuilder : public QueryBuilder { public: - rapidjson::Value to_json(rapidjson::Document& document) override; - RangeQueryBuilder(ExtBinaryPredicate* range_predicate); + RangeQueryBuilder(const ExtBinaryPredicate& range_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; private: - ExtBinaryPredicate* _range_predicate; + std::string _field; + std::string _value; + TExprOpcode::type _op; }; // process in predicate : field in [value1, value2] class TermsInSetQueryBuilder : public QueryBuilder { public: - rapidjson::Value to_json(rapidjson::Document& document) override; - TermsInSetQueryBuilder(ExtInPredicate* in_predicate); + TermsInSetQueryBuilder(const ExtInPredicate& in_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; private: - ExtInPredicate* _in_predicate; + std::string _field; + std::vector _values; }; // process like predicate : field like "a%b%c_" class WildCardQueryBuilder : public QueryBuilder { public: - rapidjson::Value to_json(rapidjson::Document& document) override; - WildCardQueryBuilder(ExtLikePredicate* like_predicate); + WildCardQueryBuilder(const ExtLikePredicate& like_predicate); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; private: std::string _like_value; @@ -93,7 +96,7 @@ class WildCardQueryBuilder : public QueryBuilder { class MatchAllQueryBuilder : public QueryBuilder { public: - rapidjson::Value to_json(rapidjson::Document& document) override; + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; }; // proccess bool compound query, and play the role of a bridge for transferring predicates to es native query @@ -103,17 +106,21 @@ class BooleanQueryBuilder : public QueryBuilder { BooleanQueryBuilder(const std::vector& predicates); BooleanQueryBuilder(); ~BooleanQueryBuilder(); - rapidjson::Value to_json(rapidjson::Document& document) override; + // class method for transfer predicate to es query value, invoker should enclose this value with `query` + static void to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query); + // validate esquery syntax + static Status check_es_query(const ExtFunction& extFunction); + // decide which predicate can process + static void validate(const std::vector& espredicates, std::vector* result); + +private: + // add child query void should(QueryBuilder* filter); void filter(QueryBuilder* filter); void must(QueryBuilder* filter); void must_not(QueryBuilder* filter); - // class method for transfer predicate to es query value, invoker should enclose this value with `query` - static rapidjson::Value to_query(const std::vector& predicates, rapidjson::Document& root); - static Status check_es_query(const ExtFunction& extFunction); - static std::vector validate(const std::vector& espredicates); + void to_json(rapidjson::Document* document, rapidjson::Value* query) override; -private: std::vector _must_clauses; std::vector _must_not_clauses; std::vector _filter_clauses; diff --git a/be/src/util/es_scroll_query.cpp b/be/src/util/es_scroll_query.cpp index 49aee11eba646a..e12f0d4c0fe79c 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/util/es_scroll_query.cpp @@ -69,7 +69,9 @@ std::string ESScrollQueryBuilder::build(const std::map es_query_dsl.SetObject(); // generate the filter caluse rapidjson::Document scratch_document; - rapidjson::Value query_node = BooleanQueryBuilder::to_query(predicates, scratch_document); + rapidjson::Value query_node(rapidjson::kObjectType); + query_node.SetObject(); + BooleanQueryBuilder::to_query(predicates, &scratch_document, &query_node); // note: add `query` for this value.... es_query_dsl.AddMember("query", query_node, allocator); // just filter the selected fields for reducing the network cost diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp index d448114336d40f..85a245a1cf5fce 100644 --- a/be/test/exec/es_predicate_test.cpp +++ b/be/test/exec/es_predicate_test.cpp @@ -153,7 +153,9 @@ TEST_F(EsPredicateTest, normal) { } rapidjson::Document document; - rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(predicates, document); + rapidjson::Value compound_bool_value(rapidjson::kObjectType); + compound_bool_value.SetObject(); + BooleanQueryBuilder::to_query(predicates, &document, &compound_bool_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); compound_bool_value.Accept(writer); diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/util/es_query_builder_test.cpp index 6f9f8691ac0d6d..d2a6277f1c914a 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/util/es_query_builder_test.cpp @@ -42,10 +42,12 @@ TEST_F(BooleanQueryBuilderTest, term_query) { ExtLiteral term_literal(TYPE_VARCHAR, &value); TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(3); std::string name = "content"; - ExtBinaryPredicate* term_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::EQ, term_literal); + ExtBinaryPredicate term_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::EQ, term_literal); TermQueryBuilder term_query(term_predicate); rapidjson::Document document; - rapidjson::Value term_value = term_query.to_json(document); + rapidjson::Value term_value(rapidjson::kObjectType); + term_value.SetObject(); + term_query.to_json(&document, &term_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); term_value.Accept(writer); @@ -61,10 +63,12 @@ TEST_F(BooleanQueryBuilderTest, range_query) { ExtLiteral term_literal(TYPE_VARCHAR, &value); TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(1); std::string name = "k"; - ExtBinaryPredicate* range_predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::GE, term_literal); + ExtBinaryPredicate range_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::GE, term_literal); RangeQueryBuilder range_query(range_predicate); rapidjson::Document document; - rapidjson::Value range_value = range_query.to_json(document); + rapidjson::Value range_value(rapidjson::kObjectType); + range_value.SetObject(); + range_query.to_json(&document, &range_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); range_value.Accept(writer); @@ -85,10 +89,12 @@ TEST_F(BooleanQueryBuilderTest, es_query) { ExtLiteral term_literal(TYPE_VARCHAR, &value); std::vector values = {term_literal}; std::string function_name = "esquery"; - ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, cols, values); + ExtFunction function_predicate(TExprNodeType::FUNCTION_CALL, function_name, cols, values); ESQueryBuilder es_query(function_predicate); rapidjson::Document document; - rapidjson::Value es_query_value = es_query.to_json(document); + rapidjson::Value es_query_value(rapidjson::kObjectType); + es_query_value.SetObject(); + es_query.to_json(&document, &es_query_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); es_query_value.Accept(writer); @@ -106,10 +112,12 @@ TEST_F(BooleanQueryBuilderTest, like_query) { StringValue value(str, length); ExtLiteral like_literal(TYPE_VARCHAR, &value); std::string name = "content"; - ExtLikePredicate* like_predicate = new ExtLikePredicate(TExprNodeType::LIKE_PRED, name, type_desc, like_literal); + ExtLikePredicate like_predicate(TExprNodeType::LIKE_PRED, name, type_desc, like_literal); WildCardQueryBuilder like_query(like_predicate); rapidjson::Document document; - rapidjson::Value like_query_value = like_query.to_json(document); + rapidjson::Value like_query_value(rapidjson::kObjectType); + like_query_value.SetObject(); + like_query.to_json(&document, &like_query_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); like_query_value.Accept(writer); @@ -140,10 +148,12 @@ TEST_F(BooleanQueryBuilderTest, terms_in_query) { ExtLiteral term_literal_3(TYPE_VARCHAR, &string_value_3); std::vector terms_values = {term_literal_1, term_literal_2, term_literal_3}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, false, terms_in_field, terms_in_col_type_desc, terms_values); + ExtInPredicate in_predicate(TExprNodeType::IN_PRED, false, terms_in_field, terms_in_col_type_desc, terms_values); TermsInSetQueryBuilder terms_query(in_predicate); rapidjson::Document document; - rapidjson::Value in_query_value = terms_query.to_json(document); + rapidjson::Value in_query_value(rapidjson::kObjectType); + in_query_value.SetObject(); + terms_query.to_json(&document, &in_query_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); in_query_value.Accept(writer); @@ -156,7 +166,9 @@ TEST_F(BooleanQueryBuilderTest, match_all_query) { // match all docs MatchAllQueryBuilder match_all_query; rapidjson::Document document; - rapidjson::Value match_all_query_value = match_all_query.to_json(document); + rapidjson::Value match_all_query_value(rapidjson::kObjectType); + match_all_query_value.SetObject(); + match_all_query.to_json(&document, &match_all_query_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); match_all_query_value.Accept(writer); @@ -209,7 +221,9 @@ TEST_F(BooleanQueryBuilderTest, bool_query) { std::vector or_predicates = {like_predicate, function_predicate, range_predicate, term_predicate}; BooleanQueryBuilder bool_query(or_predicates); rapidjson::Document document; - rapidjson::Value bool_query_value = bool_query.to_json(document); + rapidjson::Value bool_query_value(rapidjson::kObjectType); + bool_query_value.SetObject(); + bool_query.to_json(&document, &bool_query_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); bool_query_value.Accept(writer); @@ -291,7 +305,9 @@ TEST_F(BooleanQueryBuilderTest, compound_bool_query) { std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3, bool_predicate_4}; rapidjson::Document document; - rapidjson::Value compound_bool_value = BooleanQueryBuilder::to_query(and_bool_predicates, document); + rapidjson::Value compound_bool_value(rapidjson::kObjectType); + compound_bool_value.SetObject(); + BooleanQueryBuilder::to_query(and_bool_predicates, &document, &compound_bool_value); rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); compound_bool_value.Accept(writer); @@ -416,7 +432,8 @@ TEST_F(BooleanQueryBuilderTest, validate_partial) { EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, bool_predicate_3}; - std::vector result = BooleanQueryBuilder::validate(and_bool_predicates); + std::vector result; + BooleanQueryBuilder::validate(and_bool_predicates, &result); std::vector expected = {true, true, true}; ASSERT_TRUE(result == expected); char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; @@ -428,11 +445,11 @@ TEST_F(BooleanQueryBuilderTest, validate_partial) { std::vector illegal_bool_predicates_3 = {term_ne_predicate, illegal_function_preficate}; EsPredicate* illegal_bool_predicate_3 = new EsPredicate(illegal_bool_predicates_3); std::vector and_bool_predicates_1 = {bool_predicate_1, bool_predicate_2, illegal_bool_predicate_3}; - result = BooleanQueryBuilder::validate(and_bool_predicates_1); + std::vector result1; + BooleanQueryBuilder::validate(and_bool_predicates_1, &result1); std::vector expected1 = {true, true, false}; - ASSERT_TRUE(result == expected1); + ASSERT_TRUE(result1 == expected1); } - } int main(int argc, char* argv[]) { From 290af1c6f4536f08c77ee9aea42a6f5883844cac Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 22 Apr 2019 11:08:24 +0800 Subject: [PATCH 54/73] Fix some format issues --- be/src/exec/es_http_scan_node.cpp | 1 - be/test/util/es_scan_reader_test.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 3ca91dde784111..c485291ba0c41a 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -319,7 +319,6 @@ Status EsHttpScanNode::scanner_scan( } // eval conjuncts of this row. - // TODO exclude those predicates which ES applied by _predicate_to_conjunct if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { row_batch->commit_last_row(); char* new_tuple = reinterpret_cast(tuple); diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 57b38aeb4ffcb2..5df79d15306929 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -171,8 +171,8 @@ class RestClearScrollAction : public HttpHandler { HttpChannel::send_reply(req,HttpStatus::NOT_FOUND, "invalid scroll request"); return; } else { - rapidjson::Document clear_scroll_result; - rapidjson::Document::AllocatorType &allocator = clear_scroll_result.GetAllocator(); + rapidjson::Document clear_scroll_result; + rapidjson::Document::AllocatorType &allocator = clear_scroll_result.GetAllocator(); clear_scroll_result.SetObject(); clear_scroll_result.AddMember("succeeded", true, allocator); clear_scroll_result.AddMember("num_freed", 1, allocator); From be5be89475fbf31badef679768d423f52b2c372b Mon Sep 17 00:00:00 2001 From: lide-reed Date: Mon, 22 Apr 2019 11:18:23 +0800 Subject: [PATCH 55/73] Fix a bug when show a ES table --- be/test/util/es_scan_reader_test.cpp | 14 -------------- .../java/org/apache/doris/catalog/Catalog.java | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/util/es_scan_reader_test.cpp index 5df79d15306929..a1eb0882e73c93 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/util/es_scan_reader_test.cpp @@ -232,20 +232,6 @@ TEST_F(MockESServerTest, workflow) { if(eos) { break; } - //rapidjson::Document docuemnt_node; - //docuemnt_node.Parse<0>(response.c_str()); - //rapidjson::Value &scroll_node = docuemnt_node["_scroll_id"]; - //std::string _scroll_id = scroll_node.GetString(); - //int id = atoi(_scroll_id.c_str()); - //rapidjson::Value &outer_hits_node = docuemnt_node["hits"]; - //rapidjson::Value &inner_hits_node = outer_hits_node["hits"]; - //rapidjson::Value &source_node = inner_hits_node[0]; - //rapidjson::Value &id_node = source_node["id"]; - //rapidjson::Value &value_node = source_node["value"]; - //ASSERT_EQ(id, id_node.GetInt()); - //std::string value = value_node.GetString(); - //ASSERT_EQ(id, atoi(value.c_str())); - //ASSERT_TRUE(st.ok()); } auto cst = reader.close(); ASSERT_TRUE(cst.ok()); diff --git a/fe/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/src/main/java/org/apache/doris/catalog/Catalog.java index 6d571ee58a667c..da9d5253b4d740 100644 --- a/fe/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/doris/catalog/Catalog.java @@ -3946,7 +3946,7 @@ public static void getDdlStmt(Table table, List createTableStmt, List Date: Tue, 23 Apr 2019 15:12:11 +0800 Subject: [PATCH 56/73] Relocate es related source --- be/src/exec/CMakeLists.txt | 6 ++++++ be/src/{util => exec/es}/es_query_builder.cpp | 2 +- be/src/{util => exec/es}/es_query_builder.h | 0 be/src/{util => exec/es}/es_scan_reader.cpp | 5 +++-- be/src/{util => exec/es}/es_scan_reader.h | 0 be/src/{util => exec/es}/es_scroll_parser.cpp | 5 ++--- be/src/{util => exec/es}/es_scroll_parser.h | 2 -- be/src/{util => exec/es}/es_scroll_query.cpp | 4 ++-- be/src/{util => exec/es}/es_scroll_query.h | 3 +-- be/src/util/CMakeLists.txt | 4 ---- be/test/exec/CMakeLists.txt | 2 ++ be/test/{util => exec/es}/es_query_builder_test.cpp | 5 +++-- be/test/{util => exec/es}/es_scan_reader_test.cpp | 11 ++++++----- be/test/util/CMakeLists.txt | 2 -- 14 files changed, 26 insertions(+), 25 deletions(-) rename be/src/{util => exec/es}/es_query_builder.cpp (99%) rename be/src/{util => exec/es}/es_query_builder.h (100%) rename be/src/{util => exec/es}/es_scan_reader.cpp (99%) rename be/src/{util => exec/es}/es_scan_reader.h (100%) rename be/src/{util => exec/es}/es_scroll_parser.cpp (99%) rename be/src/{util => exec/es}/es_scroll_parser.h (99%) rename be/src/{util => exec/es}/es_scroll_query.cpp (99%) rename be/src/{util => exec/es}/es_scroll_query.h (99%) rename be/test/{util => exec/es}/es_query_builder_test.cpp (99%) rename be/test/{util => exec/es}/es_scan_reader_test.cpp (99%) diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 14f499183cbe0f..6a6d4ecd29362d 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -66,6 +66,10 @@ set(EXEC_FILES es_http_scan_node.cpp es_http_scanner.cpp es_predicate.cpp + es/es_scan_reader.cpp + es/es_scroll_query.cpp + es/es_scroll_parser.cpp + es/es_query_builder.cpp spill_sort_node.cc union_node.cpp union_node_ir.cpp @@ -104,6 +108,8 @@ add_library(Exec STATIC ) # TODO: why is this test disabled? +#ADD_BE_TEST(es/es_query_builder_test) +#ADD_BE_TEST(es/es_scan_reader_test) #ADD_BE_TEST(new_olap_scan_node_test) #ADD_BE_TEST(pre_aggregation_node_test) #ADD_BE_TEST(hash_table_test) diff --git a/be/src/util/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp similarity index 99% rename from be/src/util/es_query_builder.cpp rename to be/src/exec/es/es_query_builder.cpp index dc0b572fa600dc..da927391ad9e62 100644 --- a/be/src/util/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "util/es_query_builder.h" +#include "exec/es/es_query_builder.h" #include #include "rapidjson/rapidjson.h" diff --git a/be/src/util/es_query_builder.h b/be/src/exec/es/es_query_builder.h similarity index 100% rename from be/src/util/es_query_builder.h rename to be/src/exec/es/es_query_builder.h diff --git a/be/src/util/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp similarity index 99% rename from be/src/util/es_scan_reader.cpp rename to be/src/exec/es/es_scan_reader.cpp index ec9a007e8f3535..517c6ef2333f37 100644 --- a/be/src/util/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. +#include "exec/es/es_scan_reader.h" + #include #include -#include "es_scan_reader.h" +#include #include "es_scroll_query.h" #include "common/logging.h" #include "common/status.h" -#include namespace doris { const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields"; diff --git a/be/src/util/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h similarity index 100% rename from be/src/util/es_scan_reader.h rename to be/src/exec/es/es_scan_reader.h diff --git a/be/src/util/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp similarity index 99% rename from be/src/util/es_scroll_parser.cpp rename to be/src/exec/es/es_scroll_parser.cpp index c672604fda90da..1f932dc9771699 100644 --- a/be/src/util/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -15,12 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "es_scroll_parser.h" +#include "exec/es/es_scroll_parser.h" -#include #include #include - +#include #include "common/logging.h" #include "common/status.h" #include "runtime/mem_pool.h" diff --git a/be/src/util/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h similarity index 99% rename from be/src/util/es_scroll_parser.h rename to be/src/exec/es/es_scroll_parser.h index 494fef23b280f1..3802afdaf7eb2c 100644 --- a/be/src/util/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -16,9 +16,7 @@ // under the License. #pragma once - #include - #include "rapidjson/document.h" #include "runtime/descriptors.h" #include "runtime/tuple.h" diff --git a/be/src/util/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp similarity index 99% rename from be/src/util/es_scroll_query.cpp rename to be/src/exec/es/es_scroll_query.cpp index e12f0d4c0fe79c..e97fc90c3ef917 100644 --- a/be/src/util/es_scroll_query.cpp +++ b/be/src/exec/es/es_scroll_query.cpp @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "util/es_query_builder.h" +#include "exec/es/es_query_builder.h" -#include #include +#include #include "common/logging.h" #include "es_scroll_query.h" #include "rapidjson/document.h" diff --git a/be/src/util/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h similarity index 99% rename from be/src/util/es_scroll_query.h rename to be/src/exec/es/es_scroll_query.h index c1e99f899f2485..4ef584711c9067 100644 --- a/be/src/util/es_scroll_query.h +++ b/be/src/exec/es/es_scroll_query.h @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include "exec/es_predicate.h" #include #include -#include "exec/es_predicate.h" namespace doris { diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 1dce557dca9f4e..e7206b80d7fdb6 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -74,10 +74,6 @@ add_library(Util STATIC aes_util.cpp string_util.cpp md5.cpp - es_scan_reader.cpp - es_scroll_query.cpp - es_scroll_parser.cpp - es_query_builder.cpp ) #ADD_BE_TEST(integer-array-test) diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index defb493151e2b9..18771ff43417ed 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -62,3 +62,5 @@ ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scanner/schema_engines_scanner_test) #ADD_BE_TEST(schema_scanner/schema_collations_scanner_test) #ADD_BE_TEST(schema_scanner/schema_charsets_scanner_test) +#ADD_BE_TEST(es/es_scan_reader_test) +#ADD_BE_TEST(es/es_query_builder_test) diff --git a/be/test/util/es_query_builder_test.cpp b/be/test/exec/es/es_query_builder_test.cpp similarity index 99% rename from be/test/util/es_query_builder_test.cpp rename to be/test/exec/es/es_query_builder_test.cpp index d2a6277f1c914a..be33978837f13b 100644 --- a/be/test/util/es_query_builder_test.cpp +++ b/be/test/exec/es/es_query_builder_test.cpp @@ -18,10 +18,11 @@ #include #include #include + #include "common/logging.h" -#include "util/es_query_builder.h" -#include "rapidjson/document.h" +#include "exec/es/es_query_builder.h" #include "exec/es_predicate.h" +#include "rapidjson/document.h" #include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" diff --git a/be/test/util/es_scan_reader_test.cpp b/be/test/exec/es/es_scan_reader_test.cpp similarity index 99% rename from be/test/util/es_scan_reader_test.cpp rename to be/test/exec/es/es_scan_reader_test.cpp index a1eb0882e73c93..2f822ba176e774 100644 --- a/be/test/util/es_scan_reader_test.cpp +++ b/be/test/exec/es/es_scan_reader_test.cpp @@ -15,10 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include "util/es_scan_reader.h" -#include "util/es_scroll_query.h" +#include +#include #include +#include + #include "common/logging.h" +#include "exec/es/es_scroll_query.h" #include "http/ev_http_server.h" #include "http/http_channel.h" #include "http/http_handler.h" @@ -26,9 +29,7 @@ #include "rapidjson/document.h" #include "rapidjson/writer.h" #include "rapidjson/stringbuffer.h" -#include -#include -#include +#include "util/es_scan_reader.h" namespace doris { diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index bff2fb9b1eb0ac..0ac6e774d4661f 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -38,5 +38,3 @@ ADD_BE_TEST(uid_util_test) ADD_BE_TEST(arena_test) ADD_BE_TEST(aes_util_test) ADD_BE_TEST(md5_test) -ADD_BE_TEST(es_scan_reader_test) -ADD_BE_TEST(es_query_builder_test) From 3fa5b1cc426bdb4b4c44ef9e491958b12678207f Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 23 Apr 2019 18:40:23 +0800 Subject: [PATCH 57/73] Optimize codes --- be/src/exec/CMakeLists.txt | 2 +- be/src/exec/{ => es}/es_predicate.cpp | 4 +- be/src/exec/{ => es}/es_predicate.h | 0 be/src/exec/es/es_query_builder.h | 2 +- be/src/exec/es/es_scan_reader.cpp | 5 +- be/src/exec/es/es_scan_reader.h | 3 +- be/src/exec/es/es_scroll_parser.cpp | 238 ++++++++++---------------- be/src/exec/es/es_scroll_parser.h | 2 + be/src/exec/es/es_scroll_query.cpp | 7 +- be/src/exec/es/es_scroll_query.h | 4 +- be/src/exec/es_http_scan_node.cpp | 8 +- be/src/exec/es_http_scanner.h | 2 +- 12 files changed, 114 insertions(+), 163 deletions(-) rename be/src/exec/{ => es}/es_predicate.cpp (99%) rename be/src/exec/{ => es}/es_predicate.h (100%) diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 6a6d4ecd29362d..554ecc13afdbac 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -65,7 +65,7 @@ set(EXEC_FILES es_scan_node.cpp es_http_scan_node.cpp es_http_scanner.cpp - es_predicate.cpp + es/es_predicate.cpp es/es_scan_reader.cpp es/es_scroll_query.cpp es/es_scroll_parser.cpp diff --git a/be/src/exec/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp similarity index 99% rename from be/src/exec/es_predicate.cpp rename to be/src/exec/es/es_predicate.cpp index 7ccb066ef99807..60077634c3d5e4 100644 --- a/be/src/exec/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/es_predicate.h" +#include "exec/es/es_predicate.h" #include #include @@ -25,6 +25,7 @@ #include "common/status.h" #include "common/logging.h" +#include "exec/es/es_query_builder.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "exprs/in_predicate.h" @@ -42,7 +43,6 @@ #include "service/backend_options.h" #include "util/debug_util.h" -#include "util/es_query_builder.h" #include "util/runtime_profile.h" namespace doris { diff --git a/be/src/exec/es_predicate.h b/be/src/exec/es/es_predicate.h similarity index 100% rename from be/src/exec/es_predicate.h rename to be/src/exec/es/es_predicate.h diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h index 2575063f43335a..e7c5e563356069 100644 --- a/be/src/exec/es/es_query_builder.h +++ b/be/src/exec/es/es_query_builder.h @@ -21,7 +21,7 @@ #include #include "rapidjson/document.h" -#include "exec/es_predicate.h" +#include "exec/es/es_predicate.h" #include "common/status.h" namespace doris { diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 517c6ef2333f37..7df7fbc3da5acd 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -17,12 +17,13 @@ #include "exec/es/es_scan_reader.h" +#include #include #include -#include -#include "es_scroll_query.h" + #include "common/logging.h" #include "common/status.h" +#include "exec/es/es_scroll_query.h" namespace doris { const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields"; diff --git a/be/src/exec/es/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h index d7499ff27bafdf..449c2686909a9d 100644 --- a/be/src/exec/es/es_scan_reader.h +++ b/be/src/exec/es/es_scan_reader.h @@ -18,8 +18,9 @@ #pragma once #include + +#include "exec/es/es_scroll_parser.h" #include "http/http_client.h" -#include "util/es_scroll_parser.h" using std::string; diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 1f932dc9771699..7e15047c074f1b 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -20,6 +20,7 @@ #include #include #include + #include "common/logging.h" #include "common/status.h" #include "runtime/mem_pool.h" @@ -42,6 +43,29 @@ static const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed static const string ERROR_COL_DATA_IS_ARRAY = "Data source returned an array for the type $0" "based on column metadata."; +#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ + do { \ + if (col.IsArray()) { \ + return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, type_to_string(type))); \ + } \ + } while (false) + + +#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type) \ + do { \ + if (!col.IsString()) { \ + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); \ + } \ + } while (false) + + +#define RETURN_ERROR_IF_PARSING_FAILED(result, type) \ + do { \ + if (result != StringParser::PARSE_SUCCESS) { \ + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); \ + } \ + } while (false) + ScrollParser::ScrollParser(const std::string& scroll_result) : _scroll_id(""), _total(0), @@ -55,6 +79,10 @@ ScrollParser::~ScrollParser() { void ScrollParser::parsing(const std::string& scroll_result) { _document_node.Parse(scroll_result.c_str()); + if (_document_node.HasParseError()) { + LOG(ERROR) << "Parsing json error, json is: " << scroll_result; + return; + } if (!_document_node.HasMember(FIELD_SCROLL_ID)) { LOG(ERROR) << "maybe not a scroll request"; @@ -116,8 +144,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, continue; } - std::string s(slot_desc->col_name()); - const char* col_name = s.c_str(); + const char* col_name = slot_desc->col_name().c_str(); rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); if (itr == line.MemberEnd()) { tuple->set_null(slot_desc->null_indicator_offset()); @@ -128,15 +155,13 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, const rapidjson::Value &col = line[col_name]; void* slot = tuple->get_slot(slot_desc->tuple_offset()); - switch (slot_desc->type().type) { + PrimitiveType type = slot_desc->type().type; + switch (type) { case TYPE_CHAR: case TYPE_VARCHAR: { - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "STRING")); - } - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); @@ -157,22 +182,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TINYINT")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + StringParser::ParseResult result; const std::string& val = col.GetString(); - const char* data = val.c_str(); size_t len = col.GetStringLength(); - StringParser::ParseResult result; - int8_t v = StringParser::string_to_int(data, len, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); - } + int8_t v = + StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = v; break; } @@ -183,22 +201,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "SMALLINT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); - } - const std::string& val = col.GetString(); - const char* data = val.c_str(); size_t len = col.GetStringLength(); StringParser::ParseResult result; - int16_t v = StringParser::string_to_int(data, len, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); - } + int16_t v = + StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = v; break; } @@ -209,22 +220,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "INT")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); - const char* data = val.c_str(); size_t len = col.GetStringLength(); StringParser::ParseResult result; - int32_t v = StringParser::string_to_int(data, len, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); - } + int32_t v = + StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = v; break; } @@ -235,22 +239,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "BIGINT")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); - const char* data = val.c_str(); size_t len = col.GetStringLength(); StringParser::ParseResult result; - int64_t v = StringParser::string_to_int(data, len, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); - } + int64_t v = + StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = v; break; } @@ -261,22 +258,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "LARGEINT")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); - const char* data = val.c_str(); size_t len = col.GetStringLength(); StringParser::ParseResult result; - __int128 v = StringParser::string_to_int<__int128>(data, len, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); - } + __int128 v = + StringParser::string_to_int<__int128>(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); memcpy(slot, &v, sizeof(v)); break; } @@ -287,22 +277,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "DOUBLE")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); StringParser::ParseResult result; - double d = StringParser::string_to_float(val.c_str(), - val_size, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); - } + double d = + StringParser::string_to_float(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = d; break; } @@ -313,21 +296,15 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "FLOAT")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); StringParser::ParseResult result; - float f = StringParser::string_to_float(val.c_str(), val_size, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); - } + float f = + StringParser::string_to_float(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = f; break; } @@ -343,86 +320,53 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "BOOLEAN")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); StringParser::ParseResult result; - bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); - if (result != StringParser::PARSE_SUCCESS) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); - } + bool b = + StringParser::string_to_bool(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); *reinterpret_cast(slot) = b; break; } - case TYPE_DATE: { + case TYPE_DATE: + case TYPE_DATETIME: { if (col.IsNumber()) { if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); } - reinterpret_cast(slot)->cast_to_date(); - break; - } - - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TYPE_DATE")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); - } - - DateTimeValue* ts_slot = reinterpret_cast(slot); - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - if (!ts_slot->from_date_str(val.c_str(), val_size)) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); - } - - if (ts_slot->year() < 1900) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); - } - ts_slot->cast_to_date(); - break; - } - - case TYPE_DATETIME: { - if (col.IsNumber()) { - if (!reinterpret_cast(slot)->from_unixtime(col.GetInt64())) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + if (type == TYPE_DATE) { + reinterpret_cast(slot)->cast_to_date(); + } else { + reinterpret_cast(slot)->set_type(TIME_DATETIME); } - reinterpret_cast(slot)->set_type(TIME_DATETIME); break; } - if (col.IsArray()) { - return Status(strings::Substitute(ERROR_COL_DATA_IS_ARRAY, "TYPE_DATETIME")); - } - - if (!col.IsString()) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); - } + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); DateTimeValue* ts_slot = reinterpret_cast(slot); const std::string& val = col.GetString(); size_t val_size = col.GetStringLength(); if (!ts_slot->from_date_str(val.c_str(), val_size)) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); } if (ts_slot->year() < 1900) { - return Status(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); + return Status(strings::Substitute(ERROR_INVALID_COL_DATA, type_to_string(type))); } - ts_slot->to_datetime(); + if (type == TYPE_DATE) { + ts_slot->cast_to_date(); + } else { + ts_slot->to_datetime(); + } break; } diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index 3802afdaf7eb2c..494fef23b280f1 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -16,7 +16,9 @@ // under the License. #pragma once + #include + #include "rapidjson/document.h" #include "runtime/descriptors.h" #include "runtime/tuple.h" diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp index e97fc90c3ef917..1c405136e749d3 100644 --- a/be/src/exec/es/es_scroll_query.cpp +++ b/be/src/exec/es/es_scroll_query.cpp @@ -15,16 +15,17 @@ // specific language governing permissions and limitations // under the License. -#include "exec/es/es_query_builder.h" +#include "exec/es/es_scroll_query.h" #include #include + #include "common/logging.h" -#include "es_scroll_query.h" +#include "exec/es/es_query_builder.h" +#include "exec/es/es_scan_reader.h" #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/writer.h" -#include "util/es_scan_reader.h" namespace doris { diff --git a/be/src/exec/es/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h index 4ef584711c9067..0f6c20457ad713 100644 --- a/be/src/exec/es/es_scroll_query.h +++ b/be/src/exec/es/es_scroll_query.h @@ -16,10 +16,12 @@ // under the License. #pragma once -#include "exec/es_predicate.h" + #include #include +#include "exec/es/es_predicate.h" + namespace doris { class ESScrollQueryBuilder { diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index c485291ba0c41a..0081b6c447a061 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -21,16 +21,16 @@ #include #include "common/object_pool.h" +#include "exec/es/es_predicate.h" +#include "exec/es/es_query_builder.h" +#include "exec/es/es_scan_reader.h" +#include "exec/es/es_scroll_query.h" #include "exprs/expr.h" #include "runtime/runtime_state.h" #include "runtime/row_batch.h" #include "runtime/dpp_sink_internal.h" #include "service/backend_options.h" #include "util/runtime_profile.h" -#include "util/es_scan_reader.h" -#include "util/es_scroll_query.h" -#include "util/es_query_builder.h" -#include "exec/es_predicate.h" namespace doris { diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index fa5bde12a8cb9c..1b91ae70950997 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -26,10 +26,10 @@ #include "common/status.h" #include "common/global_types.h" +#include "exec/es/es_scan_reader.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" -#include "util/es_scan_reader.h" #include "util/runtime_profile.h" namespace doris { From b23a5d3033376ccd828d342af2a3945108e81539 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 23 Apr 2019 19:15:22 +0800 Subject: [PATCH 58/73] Optimize some codes --- be/src/exec/es/es_predicate.cpp | 4 +--- be/src/exec/es_http_scan_node.cpp | 7 +++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 60077634c3d5e4..38f380c1c2c1a1 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -406,11 +406,9 @@ bool EsPredicate::is_match_func(const Expr* conjunct) { } const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { - std::vector slot_ids; - slotRef->get_slot_ids(&slot_ids); const SlotDescriptor* slot_desc = nullptr; for (SlotDescriptor* slot : _tuple_desc->slots()) { - if (slot->id() == slot_ids[0]) { + if (slot->id() == slotRef->slot_id()) { slot_desc = slot; break; } diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 0081b6c447a061..111767594ee12a 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -92,8 +92,11 @@ Status EsHttpScanNode::build_conjuncts_list() { if (predicate->build_disjuncts_list()) { _predicates.push_back(predicate); _predicate_to_conjunct.push_back(i); - } else if (!predicate->get_es_query_status().ok()) { - return predicate->get_es_query_status(); + } else { + status = predicate->get_es_query_status(); + if (!status.ok()) { + LOG(WARNING) << "Build es_query failed: " << status.get_error_msg(); + } } } From df2a7e03304eca6d8b2bcabdb8d3362f52dbc2ab Mon Sep 17 00:00:00 2001 From: lide-reed Date: Tue, 23 Apr 2019 19:39:02 +0800 Subject: [PATCH 59/73] Adjust unit test ES http related accordingly --- be/test/exec/CMakeLists.txt | 6 +++--- be/test/exec/{ => es}/es_predicate_test.cpp | 4 ++-- be/test/exec/es/es_query_builder_test.cpp | 2 +- be/test/exec/es/es_scan_reader_test.cpp | 2 +- run-ut.sh | 6 +++--- 5 files changed, 10 insertions(+), 10 deletions(-) rename be/test/exec/{ => es}/es_predicate_test.cpp (98%) diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index 18771ff43417ed..e97e822fe48cd1 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -45,7 +45,9 @@ ADD_BE_TEST(broker_scanner_test) ADD_BE_TEST(broker_scan_node_test) ADD_BE_TEST(es_scan_node_test) ADD_BE_TEST(es_http_scan_node_test) -ADD_BE_TEST(es_predicate_test) +ADD_BE_TEST(es/es_predicate_test) +ADD_BE_TEST(es/es_query_builder_test) +ADD_BE_TEST(es/es_scan_reader_test) ADD_BE_TEST(olap_table_info_test) ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scan_node_test) @@ -62,5 +64,3 @@ ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scanner/schema_engines_scanner_test) #ADD_BE_TEST(schema_scanner/schema_collations_scanner_test) #ADD_BE_TEST(schema_scanner/schema_charsets_scanner_test) -#ADD_BE_TEST(es/es_scan_reader_test) -#ADD_BE_TEST(es/es_query_builder_test) diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es/es_predicate_test.cpp similarity index 98% rename from be/test/exec/es_predicate_test.cpp rename to be/test/exec/es/es_predicate_test.cpp index 85a245a1cf5fce..72d4ec428eee9d 100644 --- a/be/test/exec/es_predicate_test.cpp +++ b/be/test/exec/es/es_predicate_test.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exec/es_predicate.h" +#include "exec/es/es_predicate.h" #include #include @@ -24,7 +24,7 @@ #include "common/status.h" #include "exprs/binary_predicate.h" #include "gen_cpp/Exprs_types.h" -#include "util/es_query_builder.h" +#include "exec/es/es_query_builder.h" #include "rapidjson/document.h" #include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" diff --git a/be/test/exec/es/es_query_builder_test.cpp b/be/test/exec/es/es_query_builder_test.cpp index be33978837f13b..f6d7938a9ff702 100644 --- a/be/test/exec/es/es_query_builder_test.cpp +++ b/be/test/exec/es/es_query_builder_test.cpp @@ -21,7 +21,7 @@ #include "common/logging.h" #include "exec/es/es_query_builder.h" -#include "exec/es_predicate.h" +#include "exec/es/es_predicate.h" #include "rapidjson/document.h" #include "rapidjson/rapidjson.h" #include "rapidjson/stringbuffer.h" diff --git a/be/test/exec/es/es_scan_reader_test.cpp b/be/test/exec/es/es_scan_reader_test.cpp index 2f822ba176e774..7765a6d5fca912 100644 --- a/be/test/exec/es/es_scan_reader_test.cpp +++ b/be/test/exec/es/es_scan_reader_test.cpp @@ -21,6 +21,7 @@ #include #include "common/logging.h" +#include "exec/es/es_scan_reader.h" #include "exec/es/es_scroll_query.h" #include "http/ev_http_server.h" #include "http/http_channel.h" @@ -29,7 +30,6 @@ #include "rapidjson/document.h" #include "rapidjson/writer.h" #include "rapidjson/stringbuffer.h" -#include "util/es_scan_reader.h" namespace doris { diff --git a/run-ut.sh b/run-ut.sh index f1a424f5687677..83152274a0731f 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -150,8 +150,6 @@ ${DORIS_TEST_BINARY_DIR}/util/byte_buffer_test2 ${DORIS_TEST_BINARY_DIR}/util/uid_util_test ${DORIS_TEST_BINARY_DIR}/util/aes_util_test ${DORIS_TEST_BINARY_DIR}/util/string_util_test -${DORIS_TEST_BINARY_DIR}/util/es_scan_reader_test -${DORIS_TEST_BINARY_DIR}/util/es_query_builder_test ## Running common Unittest ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test @@ -166,7 +164,9 @@ ${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test -${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test +${DORIS_TEST_BINARY_DIR}/exec/es/es_predicate_test +${DORIS_TEST_BINARY_DIR}/exec/es/es_scan_reader_test +${DORIS_TEST_BINARY_DIR}/exec/es/es_query_builder_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test From dedbda554f961d4900b9f23900d528ddc2b4ca04 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 24 Apr 2019 10:05:56 +0800 Subject: [PATCH 60/73] Adjust unit test ES http related accordingly --- be/test/exec/CMakeLists.txt | 6 +++--- be/test/exec/{es => }/es_predicate_test.cpp | 0 be/test/exec/{es => }/es_query_builder_test.cpp | 0 be/test/exec/{es => }/es_scan_reader_test.cpp | 0 run-ut.sh | 6 +++--- 5 files changed, 6 insertions(+), 6 deletions(-) rename be/test/exec/{es => }/es_predicate_test.cpp (100%) rename be/test/exec/{es => }/es_query_builder_test.cpp (100%) rename be/test/exec/{es => }/es_scan_reader_test.cpp (100%) diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index e97e822fe48cd1..79e33f5b8f516a 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -45,9 +45,9 @@ ADD_BE_TEST(broker_scanner_test) ADD_BE_TEST(broker_scan_node_test) ADD_BE_TEST(es_scan_node_test) ADD_BE_TEST(es_http_scan_node_test) -ADD_BE_TEST(es/es_predicate_test) -ADD_BE_TEST(es/es_query_builder_test) -ADD_BE_TEST(es/es_scan_reader_test) +ADD_BE_TEST(es_predicate_test) +ADD_BE_TEST(es_query_builder_test) +ADD_BE_TEST(es_scan_reader_test) ADD_BE_TEST(olap_table_info_test) ADD_BE_TEST(olap_table_sink_test) #ADD_BE_TEST(schema_scan_node_test) diff --git a/be/test/exec/es/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp similarity index 100% rename from be/test/exec/es/es_predicate_test.cpp rename to be/test/exec/es_predicate_test.cpp diff --git a/be/test/exec/es/es_query_builder_test.cpp b/be/test/exec/es_query_builder_test.cpp similarity index 100% rename from be/test/exec/es/es_query_builder_test.cpp rename to be/test/exec/es_query_builder_test.cpp diff --git a/be/test/exec/es/es_scan_reader_test.cpp b/be/test/exec/es_scan_reader_test.cpp similarity index 100% rename from be/test/exec/es/es_scan_reader_test.cpp rename to be/test/exec/es_scan_reader_test.cpp diff --git a/run-ut.sh b/run-ut.sh index 83152274a0731f..bd6a6983a3db4a 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -164,9 +164,9 @@ ${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test ${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test ${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test -${DORIS_TEST_BINARY_DIR}/exec/es/es_predicate_test -${DORIS_TEST_BINARY_DIR}/exec/es/es_scan_reader_test -${DORIS_TEST_BINARY_DIR}/exec/es/es_query_builder_test +${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test +${DORIS_TEST_BINARY_DIR}/exec/es_scan_reader_test +${DORIS_TEST_BINARY_DIR}/exec/es_query_builder_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test ${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test From 10d6f97a7d8faeac9b5b522ff76490024d9998fa Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 24 Apr 2019 15:50:43 +0800 Subject: [PATCH 61/73] Reconstruct ScrollParser --- be/src/exec/es/es_scan_reader.cpp | 10 +- be/src/exec/es/es_scroll_parser.cpp | 186 ++++++++++++---------------- be/src/exec/es/es_scroll_parser.h | 4 +- 3 files changed, 85 insertions(+), 115 deletions(-) diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 7df7fbc3da5acd..94b433257ba28b 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -113,12 +113,12 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { } } - scroll_parser = new ScrollParser(response); - - // maybe the index or shard is empty - if (scroll_parser == nullptr) { + scroll_parser = new ScrollParser(); + Status status = scroll_parser->parse(response); + if (!status.ok()){ _eos = true; - return Status::OK; + LOG(WARNING) << status.get_error_msg(); + return status; } _scroll_id = scroll_parser->get_scroll_id(); diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 7e15047c074f1b..e1ee317f777cf5 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -66,27 +66,73 @@ static const string ERROR_COL_DATA_IS_ARRAY = "Data source returned an array for } \ } while (false) -ScrollParser::ScrollParser(const std::string& scroll_result) : +template +static Status get_int_value(const rapidjson::Value &col, PrimitiveType type, void* slot) { + if (col.IsNumber()) { + *reinterpret_cast(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); + return Status::OK; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + StringParser::ParseResult result; + const std::string& val = col.GetString(); + size_t len = col.GetStringLength(); + T v = StringParser::string_to_int(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); + + if (sizeof(T) < 16) { + *reinterpret_cast(slot) = v; + } else { + DCHECK(sizeof(T) == 16); + memcpy(slot, &v, sizeof(v)); + } + + return Status::OK; +} + +template +static Status get_float_value(const rapidjson::Value &col, PrimitiveType type, void* slot) { + DCHECK(sizeof(T) == 4 || sizeof(T) == 8); + if (col.IsNumber()) { + *reinterpret_cast(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); + return Status::OK; + } + + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + + StringParser::ParseResult result; + const std::string& val = col.GetString(); + size_t len = col.GetStringLength(); + T v = StringParser::string_to_float(val.c_str(), len, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, type); + *reinterpret_cast(slot) = v; + + return Status::OK; +} + +ScrollParser::ScrollParser() : _scroll_id(""), _total(0), _size(0), _line_index(0) { - parsing(scroll_result); } ScrollParser::~ScrollParser() { } -void ScrollParser::parsing(const std::string& scroll_result) { +Status ScrollParser::parse(const std::string& scroll_result) { _document_node.Parse(scroll_result.c_str()); if (_document_node.HasParseError()) { - LOG(ERROR) << "Parsing json error, json is: " << scroll_result; - return; + std::stringstream ss; + ss << "Parsing json error, json is: " << scroll_result; + return Status(ss.str()); } if (!_document_node.HasMember(FIELD_SCROLL_ID)) { - LOG(ERROR) << "maybe not a scroll request"; - return; + return Status("Document has not a scroll id field"); } const rapidjson::Value &scroll_node = _document_node[FIELD_SCROLL_ID]; @@ -96,19 +142,20 @@ void ScrollParser::parsing(const std::string& scroll_result) { const rapidjson::Value &field_total = outer_hits_node[FIELD_TOTAL]; _total = field_total.GetInt(); if (_total == 0) { - return; + return Status::OK; } VLOG(1) << "es_scan_reader total hits: " << _total << " documents"; const rapidjson::Value &inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; if (!inner_hits_node.IsArray()) { - LOG(ERROR) << "maybe not a scroll request"; - return; + return Status("inner hits node is not an array"); } rapidjson::Document::AllocatorType& a = _document_node.GetAllocator(); _inner_hits_node.CopyFrom(inner_hits_node, a); _size = _inner_hits_node.Size(); + + return Status::OK; } int ScrollParser::get_size() { @@ -177,135 +224,58 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, } case TYPE_TINYINT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = (int8_t)col.GetInt(); - break; + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - StringParser::ParseResult result; - const std::string& val = col.GetString(); - size_t len = col.GetStringLength(); - int8_t v = - StringParser::string_to_int(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = v; break; } case TYPE_SMALLINT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = (int16_t)col.GetInt(); - break; + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t len = col.GetStringLength(); - StringParser::ParseResult result; - int16_t v = - StringParser::string_to_int(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = v; break; } case TYPE_INT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = (int32_t)col.GetInt(); - break; + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t len = col.GetStringLength(); - StringParser::ParseResult result; - int32_t v = - StringParser::string_to_int(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = v; break; } case TYPE_BIGINT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetInt64(); - break; + Status status = get_int_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t len = col.GetStringLength(); - StringParser::ParseResult result; - int64_t v = - StringParser::string_to_int(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = v; break; } case TYPE_LARGEINT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetInt64(); - break; + Status status = get_int_value<__int128>(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t len = col.GetStringLength(); - StringParser::ParseResult result; - __int128 v = - StringParser::string_to_int<__int128>(val.c_str(), len, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - memcpy(slot, &v, sizeof(v)); break; } case TYPE_DOUBLE: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetDouble(); - break; + Status status = get_float_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - StringParser::ParseResult result; - double d = - StringParser::string_to_float(val.c_str(), val_size, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = d; break; } case TYPE_FLOAT: { - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetFloat(); - break; + Status status = get_float_value(col, type, slot); + if (!status.ok()) { + return status; } - - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - StringParser::ParseResult result; - float f = - StringParser::string_to_float(val.c_str(), val_size, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, type); - *reinterpret_cast(slot) = f; break; } diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index 494fef23b280f1..5af75a85eec2aa 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -30,9 +30,10 @@ class Status; class ScrollParser { public: - ScrollParser(const std::string& scroll_result); + ScrollParser(); ~ScrollParser(); + Status parse(const std::string& scroll_result); Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, MemPool* mem_pool, bool* line_eof); @@ -41,7 +42,6 @@ class ScrollParser { int get_size(); private: - void parsing(const std::string& scroll_result); std::string _scroll_id; int _total; From 6b7b4050b6335757a8707081aa298a4ce70e8192 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 24 Apr 2019 16:51:44 +0800 Subject: [PATCH 62/73] Change build_disjuncts_list to return error message --- be/src/exec/es/es_predicate.cpp | 69 ++++++++++++++---------------- be/src/exec/es/es_predicate.h | 4 +- be/src/exec/es_http_scan_node.cpp | 9 ++-- be/test/exec/es_predicate_test.cpp | 2 +- 4 files changed, 41 insertions(+), 43 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 38f380c1c2c1a1..56f7c97a20f66e 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -182,7 +182,7 @@ EsPredicate::~EsPredicate() { _disjuncts.clear(); } -bool EsPredicate::build_disjuncts_list() { +Status EsPredicate::build_disjuncts_list() { return build_disjuncts_list(_context->root()); } @@ -216,11 +216,10 @@ static bool is_literal_node(const Expr* expr) { } } -bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { +Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { - VLOG(1) << "get disjuncts fail: number of childs is not 2"; - return false; + return Status("build disjuncts failed: number of childs is not 2"); } SlotRef* slotRef = nullptr; @@ -235,19 +234,16 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { slotRef = (SlotRef*)(conjunct->get_child(1)); op = conjunct->op(); } else { - VLOG(1) << "get disjuncts fail: no SLOT_REF child"; - return false; + return Status("build disjuncts failed: no SLOT_REF child"); } const SlotDescriptor* slot_desc = get_slot_desc(slotRef); if (slot_desc == nullptr) { - VLOG(1) << "get disjuncts fail: slot_desc is null"; - return false; + return Status("build disjuncts failed: slot_desc is null"); } if (!is_literal_node(expr)) { - VLOG(1) << "get disjuncts fail: expr is not literal type"; - return false; + return Status("build disjuncts failed: expr is not literal type"); } ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); @@ -259,11 +255,10 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { literal); _disjuncts.push_back(predicate); - return true; + return Status::OK; } if (is_match_func(conjunct)) { - Expr* expr = conjunct->get_child(1); ExtLiteral literal(expr->type().type, _context->get_value(expr, NULL)); vector query_conditions; @@ -278,18 +273,18 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { _es_query_status = BooleanQueryBuilder::check_es_query(*(ExtFunction *)predicate); if (!_es_query_status.ok()) { - return false; + return _es_query_status; } } _disjuncts.push_back(predicate); - return true; + return Status::OK; } if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()) { std::string fname = conjunct->fn().name.function_name; if (fname != "like") { - return false; + return Status("build disjuncts failed: function name is not like"); } SlotRef* slotRef = nullptr; @@ -301,20 +296,17 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { expr = conjunct->get_child(0); slotRef = (SlotRef*)(conjunct->get_child(1)); } else { - VLOG(1) << "get disjuncts fail: no SLOT_REF child"; - return false; + return Status("build disjuncts failed: no SLOT_REF child"); } const SlotDescriptor* slot_desc = get_slot_desc(slotRef); if (slot_desc == nullptr) { - VLOG(1) << "get disjuncts fail: slot_desc is null"; - return false; + return Status("build disjuncts failed: slot_desc is null"); } PrimitiveType type = expr->type().type; if (type != TYPE_VARCHAR && type != TYPE_CHAR) { - VLOG(1) << "get disjuncts fail: like value is not a string"; - return false; + return Status("build disjuncts failed: like value is not a string"); } ExtLiteral literal(type, _context->get_value(expr, NULL)); @@ -325,7 +317,7 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { literal); _disjuncts.push_back(predicate); - return true; + return Status::OK; } if (TExprNodeType::IN_PRED == conjunct->node_type()) { @@ -333,31 +325,32 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { // like col_a in (abs(1)) if (TExprOpcode::FILTER_IN != conjunct->op() && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { - return false; + return Status("build disjuncts failed: " + "opcode in IN_PRED is neither FILTER_IN nor FILTER_NOT_IN"); } vector in_pred_values; const InPredicate* pred = dynamic_cast(conjunct); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - return false; + return Status("build disjuncts failed"); } SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0)); const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); if (slot_desc == nullptr) { - return false; + return Status("build disjuncts failed: slot_desc is null"); } if (pred->get_child(0)->type().type != slot_desc->type().type) { if (!ignore_cast(slot_desc, pred->get_child(0))) { - return false; + return Status("build disjuncts failed"); } } HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin(); while (iter->has_next()) { if (nullptr == iter->get_value()) { - return false; + return Status("build disjuncts failed: hybird set has a null value"); } ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); @@ -373,28 +366,30 @@ bool EsPredicate::build_disjuncts_list(const Expr* conjunct) { in_pred_values); _disjuncts.push_back(predicate); - return true; + return Status::OK; } if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) { if (TExprOpcode::COMPOUND_OR != conjunct->op()) { - VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR"; - return false; + return Status("build disjuncts failed: op is not COMPOUND_OR"); } - if (!build_disjuncts_list(conjunct->get_child(0))) { - return false; + Status status = build_disjuncts_list(conjunct->get_child(0)); + if (!status.ok()) { + return status; } - if (!build_disjuncts_list(conjunct->get_child(1))) { - return false; + status = build_disjuncts_list(conjunct->get_child(1)); + if (!status.ok()) { + return status; } - return true; + return Status::OK; } // if go to here, report error - VLOG(1) << "get disjuncts fail: node type is " << conjunct->node_type() + std::stringstream ss; + ss << "build disjuncts failed: node type is " << conjunct->node_type() << ", should be BINARY_PRED or COMPOUND_PRED"; - return false; + return Status(ss.str()); } bool EsPredicate::is_match_func(const Expr* conjunct) { diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index a697a0f967fdeb..6b24754e6710a5 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -173,7 +173,7 @@ class EsPredicate { EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc); ~EsPredicate(); const std::vector& get_predicate_list(); - bool build_disjuncts_list(); + Status build_disjuncts_list(); // public for tests EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; @@ -184,7 +184,7 @@ class EsPredicate { } private: - bool build_disjuncts_list(const Expr* conjunct); + Status build_disjuncts_list(const Expr* conjunct); bool is_match_func(const Expr* conjunct); const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 111767594ee12a..5467b4966d4d20 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -89,18 +89,21 @@ Status EsHttpScanNode::build_conjuncts_list() { for (int i = 0; i < _conjunct_ctxs.size(); ++i) { EsPredicate* predicate = _pool->add( new EsPredicate(_conjunct_ctxs[i], _tuple_desc)); - if (predicate->build_disjuncts_list()) { + status = predicate->build_disjuncts_list(); + if (status.ok()) { _predicates.push_back(predicate); _predicate_to_conjunct.push_back(i); } else { + VLOG(1) << status.get_error_msg(); status = predicate->get_es_query_status(); if (!status.ok()) { - LOG(WARNING) << "Build es_query failed: " << status.get_error_msg(); + LOG(WARNING) << status.get_error_msg(); + return status; } } } - return status; + return Status::OK; } Status EsHttpScanNode::open(RuntimeState* state) { diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp index 72d4ec428eee9d..3c18bf1af4c454 100644 --- a/be/test/exec/es_predicate_test.cpp +++ b/be/test/exec/es_predicate_test.cpp @@ -147,7 +147,7 @@ TEST_F(EsPredicateTest, normal) { std::vector predicates; for (int i = 0; i < conjunct_ctxs.size(); ++i) { EsPredicate* predicate = new EsPredicate(conjunct_ctxs[i], tuple_desc); - if (predicate->build_disjuncts_list()) { + if (predicate->build_disjuncts_list().ok()) { predicates.push_back(predicate); } } From 7431d34eed52de35166fd2b0ed7dfb3cef6befc6 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 24 Apr 2019 19:04:48 +0800 Subject: [PATCH 63/73] Fix a memory leak in ESScanReader --- be/src/exec/es/es_predicate.cpp | 3 +-- be/src/exec/es/es_scan_reader.cpp | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 56f7c97a20f66e..472dd95feba29f 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -387,8 +387,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { // if go to here, report error std::stringstream ss; - ss << "build disjuncts failed: node type is " << conjunct->node_type() - << ", should be BINARY_PRED or COMPOUND_PRED"; + ss << "build disjuncts failed: node type " << conjunct->node_type() << " is not supported"; return Status(ss.str()); } diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 94b433257ba28b..4ad7b6f3b75f75 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -78,7 +78,6 @@ Status ESScanReader::open() { Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { std::string response; - ScrollParser* scroll_parser = nullptr; // if is first scroll request, should return the cached response *parser = nullptr; *scan_eos = true; @@ -113,7 +112,7 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { } } - scroll_parser = new ScrollParser(); + std::unique_ptr scroll_parser(new ScrollParser()); Status status = scroll_parser->parse(response); if (!status.ok()){ _eos = true; @@ -133,7 +132,7 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { _eos = false; } - *parser = scroll_parser; + *parser = std::move(scroll_parser.get()); *scan_eos = false; return Status::OK; } From 15fada7241ab851226dbe81b73d898b815dc74ce Mon Sep 17 00:00:00 2001 From: lide-reed Date: Wed, 24 Apr 2019 20:29:28 +0800 Subject: [PATCH 64/73] Avoid memory issue by introducing unique_ptr --- be/src/exec/es/es_scan_reader.cpp | 6 ++---- be/src/exec/es/es_scan_reader.h | 2 +- be/src/exec/es_http_scanner.cpp | 8 ++------ be/src/exec/es_http_scanner.h | 2 +- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 4ad7b6f3b75f75..e2f17a0908d1a7 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -76,10 +76,9 @@ Status ESScanReader::open() { return Status::OK; } -Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { +Status ESScanReader::get_next(bool* scan_eos, std::unique_ptr& scroll_parser) { std::string response; // if is first scroll request, should return the cached response - *parser = nullptr; *scan_eos = true; if (_eos) { return Status::OK; @@ -112,7 +111,7 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { } } - std::unique_ptr scroll_parser(new ScrollParser()); + scroll_parser.reset(new ScrollParser()); Status status = scroll_parser->parse(response); if (!status.ok()){ _eos = true; @@ -132,7 +131,6 @@ Status ESScanReader::get_next(bool* scan_eos, ScrollParser** parser) { _eos = false; } - *parser = std::move(scroll_parser.get()); *scan_eos = false; return Status::OK; } diff --git a/be/src/exec/es/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h index 449c2686909a9d..b03701c11e772c 100644 --- a/be/src/exec/es/es_scan_reader.h +++ b/be/src/exec/es/es_scan_reader.h @@ -45,7 +45,7 @@ class ESScanReader { // launch the first scroll request, this method will cache the first scroll response, and return the this cached response when invoke get_next Status open(); // invoke get_next to get next batch documents from elasticsearch - Status get_next(bool *eos, ScrollParser** parser); + Status get_next(bool *eos, std::unique_ptr& parser); // clear scroll context from elasticsearch Status close(); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index ca6f7309f63a84..331ede8902ea2e 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -97,12 +97,8 @@ Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) { while (!_batch_eof) { if (_line_eof || _es_scroll_parser == nullptr) { - if (_es_scroll_parser != nullptr) { - delete _es_scroll_parser; - _es_scroll_parser = nullptr; - } - RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, &_es_scroll_parser)); - if (_batch_eof || _es_scroll_parser == nullptr) { + RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, _es_scroll_parser)); + if (_batch_eof) { *eof = true; return Status::OK; } diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index 1b91ae70950997..ed4cf9bd8b4294 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -91,7 +91,7 @@ class EsHttpScanner { const TupleDescriptor* _tuple_desc; EsScanCounter* _counter; std::unique_ptr _es_reader; - ScrollParser* _es_scroll_parser; + std::unique_ptr _es_scroll_parser; // Profile RuntimeProfile::Counter* _rows_read_counter; From 29a6b4dc424adbfe331cab67d7613474227402d7 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 25 Apr 2019 10:13:18 +0800 Subject: [PATCH 65/73] Fix unit test accordingly --- be/test/exec/es_scan_reader_test.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/be/test/exec/es_scan_reader_test.cpp b/be/test/exec/es_scan_reader_test.cpp index 7765a6d5fca912..2da96c025f8355 100644 --- a/be/test/exec/es_scan_reader_test.cpp +++ b/be/test/exec/es_scan_reader_test.cpp @@ -227,15 +227,15 @@ TEST_F(MockESServerTest, workflow) { auto st = reader.open(); // ASSERT_TRUE(st.ok()); bool eos = false; - while(!eos){ - ScrollParser* parser = nullptr; - st = reader.get_next(&eos, &parser); + std::unique_ptr parser = nullptr; + while(!eos){ + st = reader.get_next(&eos, parser); if(eos) { - break; + break; } - } - auto cst = reader.close(); - ASSERT_TRUE(cst.ok()); + } + auto cst = reader.close(); + ASSERT_TRUE(cst.ok()); } } From fc6a955c3f9f64385f6fb6de58d687a874caf4fd Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 25 Apr 2019 10:41:52 +0800 Subject: [PATCH 66/73] Supress ASAN error --- be/src/exec/es/es_scroll_parser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index e1ee317f777cf5..2e4d4e737c4656 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -191,7 +191,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, continue; } - const char* col_name = slot_desc->col_name().c_str(); + std::string s = slot_desc->col_name(); + const char* col_name = s.c_str(); rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); if (itr == line.MemberEnd()) { tuple->set_null(slot_desc->null_indicator_offset()); From bac4634cd7263c2dc14971822c4e6db3f8176efb Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 25 Apr 2019 11:00:28 +0800 Subject: [PATCH 67/73] Supress ASAN error --- be/src/exec/es/es_scroll_parser.cpp | 3 +-- be/src/runtime/descriptors.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 2e4d4e737c4656..e1ee317f777cf5 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -191,8 +191,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, continue; } - std::string s = slot_desc->col_name(); - const char* col_name = s.c_str(); + const char* col_name = slot_desc->col_name().c_str(); rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); if (itr == line.MemberEnd()) { tuple->set_null(slot_desc->null_indicator_offset()); diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 15219e09ec2944..e88cfb44783209 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -124,7 +124,7 @@ class SlotDescriptor { return _slot_size; } - std::string col_name() const { + const std::string& col_name() const { return _col_name; } From b997c7cd20d17e5127e577d2dabe9cb6a9fcc672 Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 25 Apr 2019 11:28:43 +0800 Subject: [PATCH 68/73] Add missed delete --- be/src/exec/es/es_predicate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 472dd95feba29f..aa482338a34088 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -273,6 +273,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { _es_query_status = BooleanQueryBuilder::check_es_query(*(ExtFunction *)predicate); if (!_es_query_status.ok()) { + delete predicate; return _es_query_status; } } From 0d492fa9515f3da0e97d7ae37d0251a75d69bdae Mon Sep 17 00:00:00 2001 From: lide-reed Date: Thu, 25 Apr 2019 15:15:53 +0800 Subject: [PATCH 69/73] Fix a memory issue --- be/src/exec/es/es_predicate.cpp | 9 +++++---- be/src/exec/es/es_predicate.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index aa482338a34088..d8ce5cf0d8a37e 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -336,8 +336,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status("build disjuncts failed"); } - SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0)); - const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); + const SlotDescriptor* slot_desc = get_slot_desc(conjunct->get_child(0)); if (slot_desc == nullptr) { return Status("build disjuncts failed: slot_desc is null"); } @@ -400,10 +399,12 @@ bool EsPredicate::is_match_func(const Expr* conjunct) { return false; } -const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { +const SlotDescriptor* EsPredicate::get_slot_desc(const Expr* expr) { + std::vector slot_ids; + expr->get_slot_ids(&slot_ids); const SlotDescriptor* slot_desc = nullptr; for (SlotDescriptor* slot : _tuple_desc->slots()) { - if (slot->id() == slotRef->slot_id()) { + if (slot->id() == slot_ids[0]) { slot_desc = slot; break; } diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index 6b24754e6710a5..6bf4500c6048c5 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -186,7 +186,7 @@ class EsPredicate { private: Status build_disjuncts_list(const Expr* conjunct); bool is_match_func(const Expr* conjunct); - const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); + const SlotDescriptor* get_slot_desc(const Expr* expr); ExprContext* _context; int _disjuncts_num; From c215fbab0ead6ae0f89ad37151e835114349747f Mon Sep 17 00:00:00 2001 From: lide-reed Date: Sun, 28 Apr 2019 10:48:46 +0800 Subject: [PATCH 70/73] =?UTF-8?q?Introduce=20expr=5Fwithout=5Fcast()=20?= =?UTF-8?q?=C3=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- be/src/exec/es/es_predicate.cpp | 29 ++++++++++++++--------------- be/src/exec/es/es_predicate.h | 2 +- be/src/exec/es/es_query_builder.cpp | 2 +- be/src/exprs/expr.cpp | 7 +++++++ be/src/exprs/expr.h | 2 ++ 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index d8ce5cf0d8a37e..a7f0bc1f453929 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -222,22 +222,22 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status("build disjuncts failed: number of childs is not 2"); } - SlotRef* slotRef = nullptr; + SlotRef* slot_ref = nullptr; TExprOpcode::type op; Expr* expr = nullptr; if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { expr = conjunct->get_child(1); - slotRef = (SlotRef*)(conjunct->get_child(0)); + slot_ref = (SlotRef*)(conjunct->get_child(0)); op = conjunct->op(); } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { expr = conjunct->get_child(0); - slotRef = (SlotRef*)(conjunct->get_child(1)); + slot_ref = (SlotRef*)(conjunct->get_child(1)); op = conjunct->op(); } else { return Status("build disjuncts failed: no SLOT_REF child"); } - const SlotDescriptor* slot_desc = get_slot_desc(slotRef); + const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); if (slot_desc == nullptr) { return Status("build disjuncts failed: slot_desc is null"); } @@ -288,19 +288,19 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { return Status("build disjuncts failed: function name is not like"); } - SlotRef* slotRef = nullptr; + SlotRef* slot_ref = nullptr; Expr* expr = nullptr; if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { expr = conjunct->get_child(1); - slotRef = (SlotRef*)(conjunct->get_child(0)); + slot_ref = (SlotRef*)(conjunct->get_child(0)); } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { expr = conjunct->get_child(0); - slotRef = (SlotRef*)(conjunct->get_child(1)); + slot_ref = (SlotRef*)(conjunct->get_child(1)); } else { return Status("build disjuncts failed: no SLOT_REF child"); } - const SlotDescriptor* slot_desc = get_slot_desc(slotRef); + const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); if (slot_desc == nullptr) { return Status("build disjuncts failed: slot_desc is null"); } @@ -332,11 +332,12 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { vector in_pred_values; const InPredicate* pred = dynamic_cast(conjunct); - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - return Status("build disjuncts failed"); + const Expr* expr = Expr::expr_without_cast(pred->get_child(0)); + if (expr->node_type() != TExprNodeType::SLOT_REF) { + return Status("build disjuncts failed: node type is not slot ref"); } - const SlotDescriptor* slot_desc = get_slot_desc(conjunct->get_child(0)); + const SlotDescriptor* slot_desc = get_slot_desc((const SlotRef *)expr); if (slot_desc == nullptr) { return Status("build disjuncts failed: slot_desc is null"); } @@ -399,12 +400,10 @@ bool EsPredicate::is_match_func(const Expr* conjunct) { return false; } -const SlotDescriptor* EsPredicate::get_slot_desc(const Expr* expr) { - std::vector slot_ids; - expr->get_slot_ids(&slot_ids); +const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { const SlotDescriptor* slot_desc = nullptr; for (SlotDescriptor* slot : _tuple_desc->slots()) { - if (slot->id() == slot_ids[0]) { + if (slot->id() == slotRef->slot_id()) { slot_desc = slot; break; } diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index 6bf4500c6048c5..6b24754e6710a5 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -186,7 +186,7 @@ class EsPredicate { private: Status build_disjuncts_list(const Expr* conjunct); bool is_match_func(const Expr* conjunct); - const SlotDescriptor* get_slot_desc(const Expr* expr); + const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); ExprContext* _context; int _disjuncts_num; diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp index da927391ad9e62..fc6bfcce386e5b 100644 --- a/be/src/exec/es/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -134,7 +134,7 @@ void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::V TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) { _field = in_predicate.col.name; - for (auto value : in_predicate.values) { + for (auto& value : in_predicate.values) { _values.push_back(value.to_string()); } } diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 439885f66b5ef9..9ca09ca9e573eb 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -700,6 +700,13 @@ TExprNodeType::type Expr::type_without_cast(const Expr* expr) { return expr->_node_type; } +const Expr* Expr::expr_without_cast(const Expr* expr) { + if (expr->_opcode == TExprOpcode::CAST) { + return expr_without_cast(expr->_children[0]); + } + return expr; +} + doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { if (!is_constant()) { return NULL; diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index 7d1118acdcc2c1..a32bfd3bbd4e65 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -181,6 +181,8 @@ class Expr { static TExprNodeType::type type_without_cast(const Expr* expr); + static const Expr* expr_without_cast(const Expr* expr); + // Returns true if expr doesn't contain slotrefs, ie, can be evaluated // with get_value(NULL). The default implementation returns true if all of // the children are constant. From b97ac21965802996bc980303c29e2a12830c3505 Mon Sep 17 00:00:00 2001 From: "Yunfeng,Wu" Date: Sun, 28 Apr 2019 13:44:08 +0800 Subject: [PATCH 71/73] Add reference for for-each (#1055) * Add reference for for-each * Add reference for for-each * Add reference for for-each --- be/src/exec/es/es_query_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp index fc6bfcce386e5b..8fc260c6715601 100644 --- a/be/src/exec/es/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -123,7 +123,7 @@ void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::V rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); rapidjson::Value terms_node(rapidjson::kObjectType); rapidjson::Value values_node(rapidjson::kArrayType); - for (auto value : _values) { + for (auto& value : _values) { rapidjson::Value value_value(value.c_str(), allocator); values_node.PushBack(value_value, allocator); } From 0c9e00e8cb580d8ba0fc57377b7c4e3420fe1a29 Mon Sep 17 00:00:00 2001 From: lide Date: Sun, 28 Apr 2019 16:23:38 +0800 Subject: [PATCH 72/73] Remove obsolete code --- be/src/http/http_client.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index 605a18174b8f8c..f56592125d427e 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -161,9 +161,6 @@ Status HttpClient::execute(const std::function