Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict to use SELECT in columns default expressions. #9481

Merged
merged 7 commits into from
Mar 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions dbms/src/Core/BlockInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,13 @@ const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(s
return none;
}

bool BlockMissingValues::hasDefaultBits(size_t column_idx) const
{
auto it = rows_mask_by_column_id.find(column_idx);
if (it == rows_mask_by_column_id.end())
return false;

const auto & col_mask = it->second;
return std::find(col_mask.begin(), col_mask.end(), true) != col_mask.end();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok but it can be terribly slow.

}
}
3 changes: 3 additions & 0 deletions dbms/src/Core/BlockInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ class BlockMissingValues
public:
using RowsBitMask = std::vector<bool>; /// a bit per row for a column

/// Get mask for column, column_idx is index inside corresponding block
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
/// Check that we have to replace default value at least in one of columns
bool hasDefaultBits(size_t column_idx) const;
void setBit(size_t column_idx, size_t row_idx);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }
Expand Down
11 changes: 10 additions & 1 deletion dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,20 @@ Block AddingDefaultsBlockInputStream::readImpl()
if (block_missing_values.empty())
return res;

/// res block alredy has all columns values, with default value for type
/// (not value specified in table). We identify which columns we need to
/// recalculate with help of block_missing_values.
Block evaluate_block{res};
/// remove columns for recalculation
for (const auto & column : column_defaults)
{
if (evaluate_block.has(column.first))
evaluate_block.erase(column.first);
{
size_t column_idx = res.getPositionByName(column.first);
if (block_missing_values.hasDefaultBits(column_idx))
evaluate_block.erase(column.first);
}
}

if (!evaluate_block.columns())
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});
Expand Down
11 changes: 1 addition & 10 deletions dbms/src/Interpreters/InterpreterCreateQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int DUPLICATE_COLUMN;
extern const int DATABASE_ALREADY_EXISTS;
extern const int THERE_IS_NO_DEFAULT_VALUE;
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE;
extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
extern const int DICTIONARY_ALREADY_EXISTS;
Expand Down Expand Up @@ -315,15 +314,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres
Block defaults_sample_block;
/// set missing types and wrap default_expression's in a conversion-function if necessary
if (!default_expr_list->children.empty())
{
auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, column_names_and_types);
const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
for (auto & action : actions->getActions())
if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
throw Exception("Cannot CREATE table. Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);

defaults_sample_block = actions->getSampleBlock();
}
defaults_sample_block = validateColumnsDefaultsAndGetSampleBlock(default_expr_list, column_names_and_types, context);

ColumnsDescription res;
auto name_type_it = column_names_and_types.begin();
Expand Down
15 changes: 13 additions & 2 deletions dbms/src/Interpreters/inplaceBlockConversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <Parsers/ASTFunction.h>
#include <utility>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>


namespace DB
Expand All @@ -32,10 +33,20 @@ ASTPtr defaultRequiredExpressions(Block & block, const NamesAndTypesList & requi

const auto it = column_defaults.find(column.name);

/// expressions must be cloned to prevent modification by the ExpressionAnalyzer
if (it != column_defaults.end())
{
auto cast_func = makeASTFunction("CAST", it->second.expression->clone(), std::make_shared<ASTLiteral>(column.type->getName()));
/// expressions must be cloned to prevent modification by the ExpressionAnalyzer
auto column_default_expr = it->second.expression->clone();
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr);
NameSet required_columns_names = columns_context.requiredColumns();

for (const auto & required_column_name : required_columns_names)
if (auto rit = column_defaults.find(required_column_name);
rit != column_defaults.end() && rit->second.kind == ColumnDefaultKind::Alias)
default_expr_list->children.emplace_back(setAlias(rit->second.expression->clone(), required_column_name));

auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared<ASTLiteral>(column.type->getName()));
default_expr_list->children.emplace_back(setAlias(cast_func, it->first));
}
}
Expand Down
87 changes: 46 additions & 41 deletions dbms/src/Storages/AlterCommands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <DataTypes/NestedUtils.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Parsers/ASTAlterQuery.h>
Expand Down Expand Up @@ -664,6 +665,8 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Context & context) const
{
auto all_columns = metadata.columns;
/// Default expression for all added/modified columns
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
for (size_t i = 0; i < size(); ++i)
{
auto & command = (*this)[i];
Expand All @@ -684,9 +687,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
throw Exception{"Data type have to be specified for column " + backQuote(column_name) + " to add",
ErrorCodes::BAD_ARGUMENTS};

if (command.default_expression)
validateDefaultExpressionForColumn(command.default_expression, column_name, command.data_type, all_columns, context);

all_columns.add(ColumnDescription(column_name, command.data_type, false));
}
else if (command.type == AlterCommand::MODIFY_COLUMN)
Expand All @@ -699,22 +699,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
else
continue;
}

auto column_in_table = metadata.columns.get(column_name);
if (command.default_expression)
{
if (!command.data_type)
validateDefaultExpressionForColumn(
command.default_expression, column_name, column_in_table.type, all_columns, context);
else
validateDefaultExpressionForColumn(
command.default_expression, column_name, command.data_type, all_columns, context);
}
else if (column_in_table.default_desc.expression && command.data_type)
{
validateDefaultExpressionForColumn(
column_in_table.default_desc.expression, column_name, command.data_type, all_columns, context);
}
}
else if (command.type == AlterCommand::DROP_COLUMN)
{
Expand Down Expand Up @@ -756,31 +740,52 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
if (metadata.settings_ast == nullptr)
throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS};
}
}
}

void AlterCommands::validateDefaultExpressionForColumn(
const ASTPtr default_expression,
const String & column_name,
const DataTypePtr column_type,
const ColumnsDescription & all_columns,
const Context & context) const
{
/// Collect default expressions for MODIFY and ADD comands
if (command.type == AlterCommand::MODIFY_COLUMN || command.type == AlterCommand::ADD_COLUMN)
{
if (command.default_expression)
{
/// If we modify default, but not type
if (!command.data_type)
{
default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), column_name));
}
else
{
const auto & final_column_name = column_name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto data_type_ptr = command.data_type;

try
{
String tmp_column_name = "__tmp" + column_name;
auto copy_expression = default_expression->clone();
auto default_with_cast = makeASTFunction("CAST", copy_expression, std::make_shared<ASTLiteral>(column_type->getName()));
auto query_with_alias = setAlias(default_with_cast, tmp_column_name);
auto syntax_result = SyntaxAnalyzer(context).analyze(query_with_alias, all_columns.getAll());
ExpressionAnalyzer(query_with_alias, syntax_result, context).getActions(true);
}
catch (Exception & ex)
{
ex.addMessage("default expression and column type are incompatible. Cannot alter column " + backQuote(column_name));
throw;

default_expr_list->children.emplace_back(setAlias(
addTypeConversionToAST(std::make_shared<ASTIdentifier>(tmp_column_name), data_type_ptr->getName()),
final_column_name));

default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name));
}
} /// if we change data type for column with default
else if (metadata.columns.has(column_name) && command.data_type)
{
auto column_in_table = metadata.columns.get(column_name);
/// Column doesn't have a default, nothing to check
if (!column_in_table.default_desc.expression)
continue;

const auto & final_column_name = column_name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto data_type_ptr = command.data_type;


default_expr_list->children.emplace_back(setAlias(
addTypeConversionToAST(std::make_shared<ASTIdentifier>(tmp_column_name), data_type_ptr->getName()), final_column_name));

default_expr_list->children.emplace_back(setAlias(column_in_table.default_desc.expression->clone(), tmp_column_name));
}
}
}

validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context);
}

bool AlterCommands::isModifyingData() const
Expand Down
10 changes: 0 additions & 10 deletions dbms/src/Storages/AlterCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,6 @@ class AlterCommands : public std::vector<AlterCommand>
{
private:
bool prepared = false;
private:

/// Validate that default expression and type are compatible, i.e. default
/// expression result can be casted to column_type
void validateDefaultExpressionForColumn(
const ASTPtr default_expression,
const String & column_name,
const DataTypePtr column_type,
const ColumnsDescription & all_columns,
const Context & context) const;

public:
/// Validate that commands can be applied to metadata.
Expand Down
32 changes: 31 additions & 1 deletion dbms/src/Storages/ColumnsDescription.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBuffer.h>
Expand All @@ -20,7 +23,9 @@
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
#include <Compression/CompressionFactory.h>

#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/ExpressionActions.h>

namespace DB
{
Expand All @@ -30,6 +35,7 @@ namespace ErrorCodes
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PARSE_TEXT;
extern const int THERE_IS_NO_DEFAULT_VALUE;
}

ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_)
Expand Down Expand Up @@ -421,4 +427,28 @@ ColumnsDescription ColumnsDescription::parse(const String & str)
return result;
}


Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context)
{
for (const auto & child : default_expr_list->children)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@4ertus2 I think this check is enough, but maybe I have missed something.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally ASTSelectQuery appears inside of ASTSelectWithUnionQuery. So we'd better add both or none of them in check.

if (child->as<ASTSelectQuery>() || child->as<ASTSelectWithUnionQuery>() || child->as<ASTSubquery>())
throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);

try
{
auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns);
const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
for (auto & action : actions->getActions())
if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
throw Exception("Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);

return actions->getSampleBlock();
}
catch (Exception & ex)
{
ex.addMessage("default expression and column type are incompatible.");
throw;
}
}

}
5 changes: 5 additions & 0 deletions dbms/src/Storages/ColumnsDescription.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,9 @@ class ColumnsDescription
Container columns;
};

/// Validate default expressions and corresponding types compatibility, i.e.
/// default expression result can be casted to column_type. Also checks, that we
/// don't have strange constructions in default expression like SELECT query or
/// arrayJoin function.
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context);
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ col3 UInt64 MATERIALIZED col1 + 2
col4 UInt64 ALIAS col1 + 3
10 11
12 13
99
payload String
date Date MATERIALIZED today()
key UInt64 MATERIALIZED 0 * rand()
Expand Down
5 changes: 1 addition & 4 deletions dbms/tests/queries/0_stateless/00079_defaulted_columns.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ select * from defaulted;
select col3, col4 from defaulted;
drop table defaulted;

create table defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory;
insert into defaulted (col1) values (0);
select col2 from defaulted;
drop table defaulted;
create table defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; --{serverError 116}

create table defaulted (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192);
desc table defaulted;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,8 @@ echo
echo
${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default
(
a UInt8 DEFAULT
(
SELECT number FROM system.numbers LIMIT 3,1
),
b UInt8 ALIAS
(
SELECT dummy+9 FROM system.one
),
a UInt8 DEFAULT 3,
b UInt8 ALIAS a + 5,
c UInt8
) ENGINE = Memory; SELECT count() FROM system.tables WHERE name='sophisticated_default';"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
1 1
1 1 1
2 2 4
2 2 2 4
3 3 9
3 3 3 9 27
30 changes: 30 additions & 0 deletions dbms/tests/queries/0_stateless/01084_defaults_on_aliases.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
DROP TABLE IF EXISTS table_with_defaults_on_aliases;

CREATE TABLE table_with_defaults_on_aliases (col1 UInt32, col2 ALIAS col1, col3 DEFAULT col2) Engine = MergeTree() ORDER BY tuple();

INSERT INTO table_with_defaults_on_aliases (col1) VALUES (1);

SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 1;

SELECT col1, col2, col3 FROM table_with_defaults_on_aliases WHERE col1 = 1;

ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col4 UInt64 DEFAULT col2 * col3;

INSERT INTO table_with_defaults_on_aliases (col1) VALUES (2);

SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 2;

SELECT col1, col2, col3, col4 FROM table_with_defaults_on_aliases WHERE col1 = 2;

ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col5 UInt64 ALIAS col2 * col4;

INSERT INTO table_with_defaults_on_aliases (col1) VALUES (3);

SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 3;

SELECT col1, col2, col3, col4, col5 FROM table_with_defaults_on_aliases WHERE col1 = 3;


ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col6 UInt64 MATERIALIZED col2 * col4;

DROP TABLE IF EXISTS table_with_defaults_on_aliases;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0 0 test0
ClickHouse is great ClickHouse is fast