Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse Tuple as literal if possible #9740

Merged
merged 6 commits into from
Mar 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 45 additions & 24 deletions dbms/src/Interpreters/evaluateConstantExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ namespace
using Conjunction = ColumnsWithTypeAndName;
using Disjunction = std::vector<Conjunction>;

Disjunction analyzeEquals(const ASTIdentifier * identifier, const ASTLiteral * literal, const ExpressionActionsPtr & expr)
Disjunction analyzeEquals(const ASTIdentifier * identifier, const Field & value, const ExpressionActionsPtr & expr)
{
if (!identifier || !literal)
if (!identifier || value.isNull())
{
return {};
}
Expand All @@ -116,10 +116,10 @@ namespace
if (name == identifier->name)
{
ColumnWithTypeAndName column;
Field value = convertFieldToType(literal->value, *type);
if (!literal->value.isNull() && value.isNull())
Field converted = convertFieldToType(value, *type);
if (converted.isNull())
return {};
column.column = type->createColumnConst(1, value);
column.column = type->createColumnConst(1, converted);
column.name = name;
column.type = type;
return {{std::move(column)}};
Expand All @@ -129,6 +129,16 @@ namespace
return {};
}

Disjunction analyzeEquals(const ASTIdentifier * identifier, const ASTLiteral * literal, const ExpressionActionsPtr & expr)
{
if (!identifier || !literal)
{
return {};
}

return analyzeEquals(identifier, literal->value, expr);
}

Disjunction andDNF(const Disjunction & left, const Disjunction & right)
{
if (left.empty())
Expand Down Expand Up @@ -174,33 +184,44 @@ namespace
const auto * left = fn->arguments->children.front().get();
const auto * right = fn->arguments->children.back().get();
const auto * identifier = left->as<ASTIdentifier>();
const auto * inner_fn = right->as<ASTFunction>();

if (!inner_fn)
{
return {};
}

const auto * tuple = inner_fn->children.front()->as<ASTExpressionList>();
Disjunction result;

if (!tuple)
if (const auto * tuple_func = right->as<ASTFunction>(); tuple_func && tuple_func->name == "tuple")
{
return {};
}
const auto * tuple_elements = tuple_func->children.front()->as<ASTExpressionList>();
for (const auto & child : tuple_elements->children)
{
const auto * literal = child->as<ASTLiteral>();
const auto dnf = analyzeEquals(identifier, literal, expr);

Disjunction result;
if (dnf.empty())
{
return {};
}

for (const auto & child : tuple->children)
result.insert(result.end(), dnf.begin(), dnf.end());
}
}
else if (const auto * tuple_literal = right->as<ASTLiteral>();
tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple)
{
const auto * literal = child->as<ASTLiteral>();
const auto dnf = analyzeEquals(identifier, literal, expr);

if (dnf.empty())
const auto & tuple = tuple_literal->value.get<const Tuple &>();
for (const auto & child : tuple)
{
return {};
}
const auto dnf = analyzeEquals(identifier, child, expr);

result.insert(result.end(), dnf.begin(), dnf.end());
if (dnf.empty())
{
return {};
}

result.insert(result.end(), dnf.begin(), dnf.end());
}
}
else
{
return {};
}

return result;
Expand Down
12 changes: 8 additions & 4 deletions dbms/src/Parsers/ASTLiteral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,21 @@ void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const

void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
/// Special case for very large arrays. Instead of listing all elements, will use hash of them.
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;

/// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them.
/// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.)
if (value.getType() == Field::Types::Array
&& value.get<const Array &>().size() > 100) /// 100 - just arbitrary value.
auto type = value.getType();
if ((type == Field::Types::Array && value.get<const Array &>().size() > min_elements_for_hashing)
|| (type == Field::Types::Tuple && value.get<const Tuple &>().size() > min_elements_for_hashing))
{
SipHash hash;
applyVisitor(FieldVisitorHash(hash), value);
UInt64 low, high;
hash.get128(low, high);

writeCString("__array_", ostr);
writeCString(type == Field::Types::Array ? "__array_" : "__tuple_", ostr);
writeText(low, ostr);
ostr.write('_');
writeText(high, ostr);
Expand Down
26 changes: 16 additions & 10 deletions dbms/src/Parsers/ExpressionElementParsers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,27 +990,31 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
return true;
}


bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
template <typename Collection>
bool ParserCollectionOfLiterals<Collection>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::OpeningSquareBracket)
if (pos->type != opening_bracket)
return false;

Pos literal_begin = pos;

Array arr;

Collection arr;
ParserLiteral literal_p;

++pos;

while (pos.isValid())
{
if (!arr.empty())
{
if (pos->type == TokenType::ClosingSquareBracket)
if (pos->type == closing_bracket)
{
auto literal = std::make_shared<ASTLiteral>(arr);
std::shared_ptr<ASTLiteral> literal;

/// Parse one-element tuples (e.g. (1)) later as single values for backward compatibility.
if (std::is_same_v<Collection, Tuple> && arr.size() == 1)
return false;

literal = std::make_shared<ASTLiteral>(arr);
literal->begin = literal_begin;
literal->end = ++pos;
node = literal;
Expand All @@ -1022,7 +1026,8 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
}
else
{
expected.add(pos, "comma or closing square bracket");
String message = String("comma or ") + getTokenName(closing_bracket);
expected.add(pos, message.c_str());
return false;
}
}
Expand All @@ -1034,7 +1039,7 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
arr.push_back(literal_node->as<ASTLiteral &>().value);
}

expected.add(pos, "closing square bracket");
expected.add(pos, getTokenName(closing_bracket));
return false;
}

Expand Down Expand Up @@ -1235,6 +1240,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserSubquery().parse(pos, node, expected)
|| ParserTupleOfLiterals().parse(pos, node, expected)
|| ParserParenthesisExpression().parse(pos, node, expected)
|| ParserArrayOfLiterals().parse(pos, node, expected)
|| ParserArray().parse(pos, node, expected)
Expand Down
46 changes: 40 additions & 6 deletions dbms/src/Parsers/ExpressionElementParsers.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <Core/Field.h>
#include <Parsers/IParserBase.h>


Expand Down Expand Up @@ -217,17 +218,50 @@ class ParserStringLiteral : public IParserBase
};


/** An array of literals.
* Arrays can also be parsed as an application of [] operator.
* But parsing the whole array as a whole constant seriously speeds up the analysis of expressions in the case of very large arrays.
* We try to parse the array as an array of literals first (fast path),
* and if it did not work out (when the array consists of complex expressions) - parse as an application of [] operator (slow path).
/** An array or tuple of literals.
* Arrays can also be parsed as an application of [] operator and tuples as an application of 'tuple' function.
* But parsing the whole array/tuple as a whole constant seriously speeds up the analysis of expressions in the case of very large collection.
* We try to parse the array or tuple as a collection of literals first (fast path),
* and if it did not work out (when the collection consists of complex expressions) -
* parse as an application of [] operator or 'tuple' function (slow path).
*/
template <typename Collection>
class ParserCollectionOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals(TokenType opening_bracket_, TokenType closing_bracket_)
: opening_bracket(opening_bracket_), closing_bracket(closing_bracket_) {}
protected:
const char * getName() const override { return "collection of literals"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
TokenType opening_bracket;
TokenType closing_bracket;
};

/// A tuple of literals with same type.
class ParserTupleOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals<Tuple> tuple_parser{TokenType::OpeningRoundBracket, TokenType::ClosingRoundBracket};
protected:
const char * getName() const override { return "tuple"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
return tuple_parser.parse(pos, node, expected);
}
};

class ParserArrayOfLiterals : public IParserBase
{
public:
ParserCollectionOfLiterals<Array> array_parser{TokenType::OpeningSquareBracket, TokenType::ClosingSquareBracket};
protected:
const char * getName() const override { return "array"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
return array_parser.parse(pos, node, expected);
}
};


Expand Down
27 changes: 20 additions & 7 deletions dbms/src/Parsers/ParserPartition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,34 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
String fields_str;

const auto * tuple_ast = value->as<ASTFunction>();
bool surrounded_by_parens = false;
if (tuple_ast && tuple_ast->name == "tuple")
{
surrounded_by_parens = true;
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
if (arguments_ast)
fields_count = arguments_ast->children.size();
else
fields_count = 0;
}
else if (auto literal = value->as<ASTLiteral>())
{
if (literal->value.getType() == Field::Types::Tuple)
{
surrounded_by_parens = true;
fields_count = literal->value.get<Tuple &>().size();
}
else
{
fields_count = 1;
fields_str = String(begin->begin, pos->begin - begin->begin);
}
}
else
return false;

if (surrounded_by_parens)
{
Pos left_paren = begin;
Pos right_paren = pos;

Expand All @@ -61,13 +81,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)

fields_str = String(left_paren->end, right_paren->begin - left_paren->end);
}
else if (value->as<ASTLiteral>())
{
fields_count = 1;
fields_str = String(begin->begin, pos->begin - begin->begin);
}
else
return false;

partition->value = value;
partition->children.push_back(value);
Expand Down