Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better ambiguous column detection #7358

Merged
merged 12 commits into from
Oct 24, 2019
3 changes: 2 additions & 1 deletion dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde

/// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor.
/// In this case it saves membership in IdentifierSemantic.
size_t membership = IdentifierSemantic::getMembership(*identifier);
auto opt = IdentifierSemantic::getMembership(*identifier);
size_t membership = opt ? (*opt + 1) : 0;

if (!membership)
{
Expand Down
53 changes: 16 additions & 37 deletions dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,12 @@ class CheckExpressionVisitorData
using TypeToVisit = const ASTFunction;

CheckExpressionVisitorData(const std::vector<JoinedTable> & tables_)
: tables(tables_)
: joined_tables(tables_)
, ands_only(true)
{}
{
for (auto & joined : joined_tables)
tables.push_back(joined.table);
}

void visit(const ASTFunction & node, const ASTPtr & ast)
{
Expand Down Expand Up @@ -156,7 +159,8 @@ class CheckExpressionVisitorData
}

private:
const std::vector<JoinedTable> & tables;
const std::vector<JoinedTable> & joined_tables;
std::vector<DatabaseAndTableWithAlias> tables;
std::map<size_t, std::vector<ASTPtr>> asts_to_join_on;
bool ands_only;

Expand All @@ -180,52 +184,27 @@ class CheckExpressionVisitorData
/// @return table position to attach expression to or 0.
size_t checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right)
{
/// {best_match, best_table_pos}
std::pair<size_t, size_t> left_best{0, 0};
std::pair<size_t, size_t> right_best{0, 0};
size_t left_table_pos = 0;
bool left_match = IdentifierSemantic::chooseTable(left, tables, left_table_pos);

for (size_t i = 0; i < tables.size(); ++i)
{
size_t match = IdentifierSemantic::canReferColumnToTable(left, tables[i].table);
if (match > left_best.first)
{
left_best.first = match;
left_best.second = i;
}

match = IdentifierSemantic::canReferColumnToTable(right, tables[i].table);
if (match > right_best.first)
{
right_best.first = match;
right_best.second = i;
}
}
size_t right_table_pos = 0;
bool right_match = IdentifierSemantic::chooseTable(right, tables, right_table_pos);

if (left_best.first && right_best.first && (left_best.second != right_best.second))
if (left_match && right_match && (left_table_pos != right_table_pos))
{
size_t table_pos = std::max(left_best.second, right_best.second);
if (tables[table_pos].canAttachOnExpression())
size_t table_pos = std::max(left_table_pos, right_table_pos);
if (joined_tables[table_pos].canAttachOnExpression())
return table_pos;
}
return 0;
}

size_t checkIdentifier(const ASTIdentifier & identifier)
{
size_t best_match = 0;
size_t best_table_pos = 0;
bool match = IdentifierSemantic::chooseTable(identifier, tables, best_table_pos);

for (size_t i = 0; i < tables.size(); ++i)
{
size_t match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i].table);
if (match > best_match)
{
best_match = match;
best_table_pos = i;
}
}

if (best_match && tables[best_table_pos].canAttachOnExpression())
if (match && joined_tables[best_table_pos].canAttachOnExpression())
return best_table_pos;
return 0;
}
Expand Down
14 changes: 3 additions & 11 deletions dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,9 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
}
else
{
// FIXME: make a better matcher using `names`?
size_t best_match = 0;
for (const auto & table_names : tables)
{
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table_names.first))
if (match > best_match)
{
best_match = match;
best_table = &table_names.first;
}
}
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(identifier, tables, best_table_pos))
best_table = &tables[best_table_pos].first;
}

identifier_table.emplace_back(&identifier, best_table);
Expand Down
150 changes: 108 additions & 42 deletions dbms/src/Interpreters/IdentifierSemantic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,61 @@
namespace DB
{

namespace ErrorCodes
{
extern const int AMBIGUOUS_COLUMN_NAME;
}

namespace
{

const DatabaseAndTableWithAlias & extractTable(const DatabaseAndTableWithAlias & table)
{
return table;
}

const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & table)
{
return table.first;
}

template <typename T>
IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables,
size_t & best_table_pos, bool allow_ambiguous)
{
using ColumnMatch = IdentifierSemantic::ColumnMatch;

best_table_pos = 0;
auto best_match = ColumnMatch::NoMatch;
size_t same_match = 0;

for (size_t i = 0; i < tables.size(); ++i)
{
auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i]));
if (match != ColumnMatch::NoMatch)
{
if (match > best_match)
{
best_match = match;
best_table_pos = i;
same_match = 0;
}
else if (match == best_match)
++same_match;
}
}

if ((best_match != ColumnMatch::NoMatch) && same_match)
{
if (!allow_ambiguous)
throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
return ColumnMatch::Ambiguous;
}
return best_match;
}

}

std::optional<String> IdentifierSemantic::getColumnName(const ASTIdentifier & node)
{
if (!node.semantic->special)
Expand Down Expand Up @@ -37,26 +92,36 @@ std::optional<String> IdentifierSemantic::getTableName(const ASTPtr & ast)
return {};
}

void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value)
{
identifier.semantic->need_long_name = value;
}

bool IdentifierSemantic::canBeAlias(const ASTIdentifier & identifier)
{
return identifier.semantic->can_be_alias;
}

void IdentifierSemantic::setMembership(ASTIdentifier & identifier, size_t table_no)
void IdentifierSemantic::setMembership(ASTIdentifier & identifier, size_t table_pos)
{
identifier.semantic->membership = table_no;
identifier.semantic->membership = table_pos;
identifier.semantic->can_be_alias = false;
}

size_t IdentifierSemantic::getMembership(const ASTIdentifier & identifier)
std::optional<size_t> IdentifierSemantic::getMembership(const ASTIdentifier & identifier)
{
return identifier.semantic->membership;
}

bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
size_t & best_table_pos, bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, best_table_pos, ambiguous) != no_match;
}

bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
size_t & best_table_pos, bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<TableWithColumnNames>(identifier, tables, best_table_pos, ambiguous) != no_match;
}

std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
{
if (identifier.name_parts.size() > 2)
Expand Down Expand Up @@ -84,24 +149,49 @@ bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier
return false;
}

size_t IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & db_and_table)
{
/// database.table.column
if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table))
return 2;
return ColumnMatch::DbAndTable;

/// alias.column
if (doesIdentifierBelongTo(identifier, db_and_table.alias))
return ColumnMatch::TableAlias;

/// table.column or alias.column.
if (doesIdentifierBelongTo(identifier, db_and_table.table) ||
doesIdentifierBelongTo(identifier, db_and_table.alias))
return 1;
/// table.column
if (doesIdentifierBelongTo(identifier, db_and_table.table))
{
if (!db_and_table.alias.empty())
return ColumnMatch::AliasedTableName;
else
return ColumnMatch::TableName;
}

return 0;
return ColumnMatch::NoMatch;
}

/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left.
/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'.
void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t to_strip)
/// Strip qualificators from left side of column name.
/// Example: 'database.table.name' -> 'name'.
void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
auto match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);
size_t to_strip = 0;
switch (match)
{
case ColumnMatch::TableName:
case ColumnMatch::AliasedTableName:
case ColumnMatch::TableAlias:
to_strip = 1;
break;
case ColumnMatch::DbAndTable:
to_strip = 2;
break;
default:
break;
}

if (!to_strip)
return;

Expand All @@ -117,18 +207,6 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t
identifier.name.swap(new_name);
}

void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);

setColumnShortName(identifier, match);
if (match)
identifier.semantic->can_be_alias = false;

if (identifier.semantic->need_long_name)
setColumnLongName(identifier, db_and_table);
}

void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
String prefix = db_and_table.getQualifiedNamePrefix();
Expand All @@ -141,16 +219,4 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat
}
}

String IdentifierSemantic::columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
ASTPtr copy = identifier.clone();
setColumnNormalName(copy->as<ASTIdentifier &>(), db_and_table);
return copy->getAliasOrColumnName();
}

String IdentifierSemantic::columnLongName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
return db_and_table.getQualifiedNamePrefix() + identifier.shortName();
}

}
31 changes: 21 additions & 10 deletions dbms/src/Interpreters/IdentifierSemantic.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <optional>

#include <Parsers/ASTIdentifier.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>

Expand All @@ -9,14 +11,23 @@ namespace DB
struct IdentifierSemanticImpl
{
bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT
bool need_long_name = false;/// if column presents in multiple tables we need qualified names
bool can_be_alias = true; /// if it's a cropped name it could not be an alias
size_t membership = 0; /// table position in join (starting from 1) detected by qualifier or 0 if not detected.
std::optional<size_t> membership; /// table position in join
};

/// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier
struct IdentifierSemantic
{
enum class ColumnMatch
{
NoMatch,
AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName)
TableName, /// column qualified with table name
DbAndTable, /// column qualified with database and table name
TableAlias, /// column qualified with table alias
Ambiguous,
};

/// @returns name for column identifiers
static std::optional<String> getColumnName(const ASTIdentifier & node);
static std::optional<String> getColumnName(const ASTPtr & ast);
Expand All @@ -26,20 +37,20 @@ struct IdentifierSemantic
static std::optional<String> getTableName(const ASTPtr & ast);
static std::pair<String, String> extractDatabaseAndTable(const ASTIdentifier & identifier);

static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static String columnLongName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name
static bool canBeAlias(const ASTIdentifier & identifier);
static void setMembership(ASTIdentifier & identifier, size_t table_no);
static size_t getMembership(const ASTIdentifier & identifier);
static void setMembership(ASTIdentifier &, size_t table_no);
static std::optional<size_t> getMembership(const ASTIdentifier & identifier);
static bool chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables, size_t & best_table_pos,
bool ambiguous = false);
static bool chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables, size_t & best_table_pos,
bool ambiguous = false);

private:
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
};

}