Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement and fix for use_structure_from_insertion_table_in_table_functions #47962

Merged
merged 17 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
141 changes: 135 additions & 6 deletions src/Analyzer/Passes/QueryAnalysisPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>

#include <TableFunctions/TableFunctionFactory.h>
#include <Formats/FormatFactory.h>

#include <Databases/IDatabase.h>

Expand Down Expand Up @@ -75,6 +76,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>

namespace ProfileEvents
{
Expand Down Expand Up @@ -112,6 +114,8 @@ namespace ErrorCodes
extern const int ALIAS_REQUIRED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}

/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h before.
Expand Down Expand Up @@ -6079,6 +6083,18 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
}

bool findIdentifier(const FunctionNode & function)
{
for (const auto & argument : function.getArguments())
{
if (argument->as<IdentifierNode>())
return true;
if (const auto * f = argument->as<FunctionNode>(); f && findIdentifier(*f))
return true;
}
return false;
}

/// Resolve table function node in scope
void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
IdentifierResolveScope & scope,
Expand All @@ -6090,12 +6106,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
if (!nested_table_function)
expressions_visitor.visit(table_function_node_typed.getArgumentsNode());

const auto & table_function_factory = TableFunctionFactory::instance();
const auto & table_function_name = table_function_node_typed.getTableFunctionName();

auto & scope_context = scope.context;

TableFunctionPtr table_function_ptr = table_function_factory.tryGet(table_function_name, scope_context);
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().tryGet(table_function_name, scope_context);
if (!table_function_ptr)
{
auto hints = TableFunctionFactory::instance().getHints(table_function_name);
Expand All @@ -6110,17 +6125,131 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_function_name);
}

uint64_t use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
if (!nested_table_function &&
scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
use_structure_from_insertion_table_in_table_functions &&
scope_context->hasInsertionTable() &&
table_function_ptr->needStructureHint())
{
const auto & insertion_table = scope_context->getInsertionTable();
if (!insertion_table.empty())
{
auto insertion_table_storage = DatabaseCatalog::instance().getTable(insertion_table, scope_context);
const auto & structure_hint = insertion_table_storage->getInMemoryMetadataPtr()->columns;
table_function_ptr->setStructureHint(structure_hint);
const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
DB::ColumnsDescription structure_hint;

bool use_columns_from_insert_query = true;

/// Insert table matches columns against SELECT expression by position, so we want to map
/// insert table columns to table function columns through names from SELECT expression.

auto insert_column = insert_structure.begin();
auto insert_structure_end = insert_structure.end(); /// end iterator of the range covered by possible asterisk
auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
bool asterisk = false;
const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
auto expression = expression_list.begin();

/// We want to go through SELECT expression list and correspond each expression to column in insert table
/// which type will be used as a hint for the file structure inference.
for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
{
if (auto * identifier_node = (*expression)->as<IdentifierNode>())
{

if (!virtual_column_names.contains(identifier_node->getIdentifier().getFullName()))
{
if (asterisk)
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}

structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
}

/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
{
if (asterisk)
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}
if (!structure_hint.empty())
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}

asterisk = true;
}
else if (auto * function = (*expression)->as<FunctionNode>())
{
if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(*function))
{
use_columns_from_insert_query = false;
break;
}

/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
else
{
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
}

if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
}

if (use_columns_from_insert_query)
{
if (expression == expression_list.end())
{
/// Append tail of insert structure to the hint
if (asterisk)
{
for (; insert_column != insert_structure_end; ++insert_column)
structure_hint.add({ insert_column->name, insert_column->type });
}

if (!structure_hint.empty())
table_function_ptr->setStructureHint(structure_hint);

} else if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
}
}
}

Expand Down
141 changes: 111 additions & 30 deletions src/Interpreters/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ namespace ErrorCodes
extern const int UNKNOWN_READ_METHOD;
extern const int NOT_IMPLEMENTED;
extern const int UNKNOWN_FUNCTION;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}


Expand Down Expand Up @@ -1395,6 +1397,22 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
}
}

static bool findIdentifier(const ASTFunction * function)
{
if (!function || !function->arguments)
return false;
if (const auto * arguments = function->arguments->as<ASTExpressionList>())
{
for (const auto & argument : arguments->children)
{
if (argument->as<ASTIdentifier>())
return true;
if (const auto * f = argument->as<ASTFunction>(); f && findIdentifier(f))
return true;
}
}
return false;
}

StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint)
{
Expand Down Expand Up @@ -1441,62 +1459,125 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
}
throw;
}
if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())

uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
{
const auto & structure_hint = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
DB::ColumnsDescription structure_hint;

bool use_columns_from_insert_query = true;

/// use_structure_from_insertion_table_in_table_functions=2 means `auto`
if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2)
/// Insert table matches columns against SELECT expression by position, so we want to map
/// insert table columns to table function columns through names from SELECT expression.

auto insert_column = insert_structure.begin();
auto insert_structure_end = insert_structure.end(); /// end iterator of the range covered by possible asterisk
auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
bool asterisk = false;
const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
const auto * expression = expression_list.begin();

/// We want to go through SELECT expression list and correspond each expression to column in insert table
/// which type will be used as a hint for the file structure inference.
for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
{
const auto * expression_list = select_query_hint->select()->as<ASTExpressionList>();
std::unordered_set<String> virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
Names columns_names;
bool have_asterisk = false;
/// First, check if we have only identifiers, asterisk and literals in select expression,
/// and if no, we cannot use the structure from insertion table.
for (const auto & expression : expression_list->children)
if (auto * identifier = (*expression)->as<ASTIdentifier>())
{
if (auto * identifier = expression->as<ASTIdentifier>())
if (!virtual_column_names.contains(identifier->name()))
{
columns_names.push_back(identifier->name());
if (asterisk)
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}

structure_hint.add({ identifier->name(), insert_column->type });
}
else if (expression->as<ASTAsterisk>())

/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
Avogar marked this conversation as resolved.
Show resolved Hide resolved
else
++insert_column;
}
else if ((*expression)->as<ASTAsterisk>())
{
if (asterisk)
{
have_asterisk = true;
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}
else if (!expression->as<ASTLiteral>())
if (!structure_hint.empty())
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");

use_columns_from_insert_query = false;
break;
}
}

/// Check that all identifiers are column names from insertion table and not virtual column names from storage.
for (const auto & column_name : columns_names)
asterisk = true;
}
else if (auto * function = (*expression)->as<ASTFunction>())
{
if (!structure_hint.has(column_name) || virtual_column_names.contains(column_name))
if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(function))
{
use_columns_from_insert_query = false;
break;
}
}

/// If we don't have asterisk but only subset of columns, we should use
/// structure from insertion table only in case when table function
/// supports reading subset of columns from data.
if (use_columns_from_insert_query && !have_asterisk && !columns_names.empty())
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
else
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat());
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
++insert_column;
Avogar marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat());
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
}

if (use_columns_from_insert_query)
table_function_ptr->setStructureHint(structure_hint);
{
if (expression == expression_list.end())
{
/// Append tail of insert structure to the hint
if (asterisk)
{
for (; insert_column != insert_structure_end; ++insert_column)
structure_hint.add({ insert_column->name, insert_column->type });
}

if (!structure_hint.empty())
table_function_ptr->setStructureHint(structure_hint);
Avogar marked this conversation as resolved.
Show resolved Hide resolved

} else if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
}
}

res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
\N 0
\N 1
1 2
\N 42
\N 42
\N 42
\N 42
42
42
\N
\N
\N