Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove stringstream performance on very short queries #391

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions dbms/include/DB/Interpreters/LogicalExpressionsOptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ class LogicalExpressionsOptimizer final
*/
struct OrWithExpression
{
OrWithExpression(ASTFunction * or_function_, const std::string & expression_,
OrWithExpression(ASTFunction * or_function_, const IAST::Hash & expression_,
const std::string & alias_);
bool operator<(const OrWithExpression & rhs) const;

ASTFunction * or_function;
const std::string expression;
const IAST::Hash expression;
const std::string alias;
};

Expand Down
16 changes: 14 additions & 2 deletions dbms/include/DB/Parsers/IAST.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include <DB/Parsers/StringRange.h>


class SipHash;


namespace DB
{

Expand All @@ -25,6 +28,8 @@ class IAST;
using ASTPtr = std::shared_ptr<IAST>;
using ASTs = std::vector<ASTPtr>;

class WriteBuffer;


/** Элемент синтаксического дерева (в дальнейшем - направленного ациклического графа с элементами семантики)
*/
Expand Down Expand Up @@ -64,10 +69,17 @@ class IAST
/** Получить глубокую копию дерева. */
virtual ASTPtr clone() const = 0;

/** Получить текст, который идентифицирует этот элемент и всё поддерево.
* Обычно он содержит идентификатор элемента и getTreeID от всех детей.
/** Get text, describing and identifying this element and its subtree.
* Usually it consist of element's id and getTreeID of all children.
*/
String getTreeID() const;
void getTreeIDImpl(WriteBuffer & out) const;

/** Get hash code, identifying this element and its subtree.
*/
using Hash = std::pair<UInt64, UInt64>;
Hash getTreeHash() const;
void getTreeHashImpl(SipHash & hash_state) const;

void dumpTree(std::ostream & ostr, size_t indent = 0) const
{
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Analyzers/CollectAliases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static void processImpl(const ASTPtr & ast, CollectAliases::Aliases & aliases, C
{
auto it_inserted = aliases.emplace(alias, CollectAliases::AliasInfo(ast, kind));

if (!it_inserted.second && ast->getTreeID() != it_inserted.first->second.node->getTreeID())
if (!it_inserted.second && ast->getTreeHash() != it_inserted.first->second.node->getTreeHash())
{
std::stringstream message;
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Interpreters/ExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels)
String alias = ast->tryGetAlias();
if (!alias.empty())
{
if (aliases.count(alias) && ast->getTreeID() != aliases[alias]->getTreeID())
if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
throw Exception("Different expressions with the same alias " + alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);

aliases[alias] = ast;
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Interpreters/LogicalExpressionsOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace ErrorCodes


LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_,
const std::string & expression_, const std::string & alias_)
const IAST::Hash & expression_, const std::string & alias_)
: or_function(or_function_), expression(expression_), alias(alias_)
{
}
Expand Down Expand Up @@ -130,7 +130,7 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains()
auto literal = typeid_cast<ASTLiteral *>(&*(equals_expression_list->children[1]));
if (literal != nullptr)
{
auto expr_lhs = equals_expression_list->children[0]->getTreeID();
auto expr_lhs = equals_expression_list->children[0]->getTreeHash();
OrWithExpression or_with_expression{function, expr_lhs, function->tryGetAlias()};
disjunctive_equality_chains_map[or_with_expression].functions.push_back(equals);
found_chain = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ bool equals(const DB::ASTPtr & lhs, const DB::ASTPtr & rhs)
DB::ASTPtr rhs_reordered = rhs->clone();
reorder(&*rhs_reordered);

return lhs_reordered->getTreeID() == rhs_reordered->getTreeID();
return lhs_reordered->getTreeHash() == rhs_reordered->getTreeHash();
}

void reorder(DB::IAST * ast)
Expand All @@ -1267,7 +1267,7 @@ void reorder(DB::IAST * ast)

std::sort(children.begin(), children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs)
{
return lhs->getTreeID() < rhs->getTreeID();
return lhs->getTreeHash() < rhs->getTreeHash();
});
}

Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Interpreters/tests/logical_expressions_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ bool equals(const DB::ASTPtr & lhs, const DB::ASTPtr & rhs)
DB::ASTPtr rhs_reordered = rhs->clone();
reorder(&*rhs_reordered);

return lhs_reordered->getTreeID() == rhs_reordered->getTreeID();
return lhs_reordered->getTreeHash() == rhs_reordered->getTreeHash();
}

void reorderImpl(DB::IAST * ast)
Expand All @@ -269,7 +269,7 @@ void reorderImpl(DB::IAST * ast)

std::sort(children.begin(), children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs)
{
return lhs->getTreeID() < rhs->getTreeID();
return lhs->getTreeHash() < rhs->getTreeHash();
});
}

Expand Down
47 changes: 40 additions & 7 deletions dbms/src/Parsers/IAST.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <DB/IO/WriteBufferFromOStream.h>
#include <DB/IO/WriteBufferFromString.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/Operators.h>
#include <DB/Common/SipHash.h>
#include <DB/Parsers/IAST.h>


Expand Down Expand Up @@ -61,22 +63,53 @@ size_t IAST::checkSize(size_t max_size) const

String IAST::getTreeID() const
{
std::stringstream s;
s << getID();
String res;
{
WriteBufferFromString out(res);
getTreeIDImpl(out);
}
return res;
}


void IAST::getTreeIDImpl(WriteBuffer & out) const
{
out << getID();

if (!children.empty())
{
s << "(";
out << '(';
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
s << ", ";
s << (*it)->getTreeID();
out << ", ";
(*it)->getTreeIDImpl(out);
}
s << ")";
out << ')';
}
}


return s.str();
IAST::Hash IAST::getTreeHash() const
{
SipHash hash_state;
getTreeHashImpl(hash_state);
IAST::Hash res;
hash_state.get128(res.first, res.second);
return res;
}


void IAST::getTreeHashImpl(SipHash & hash_state) const
{
auto id = getID();
hash_state.update(id.data(), id.size());

size_t num_children = children.size();
hash_state.update(reinterpret_cast<const char *>(&num_children), sizeof(num_children));

for (const auto & child : children)
child->getTreeHashImpl(hash_state);
}


Expand Down