Skip to content
Permalink
Browse files
[Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
Co-authored-by: lihaopeng <lihaopeng@baidu.com>
  • Loading branch information
HappenLee and lihaopeng committed May 17, 2022
1 parent 1cc9653 commit 536d8ca1ed2c5897859fd6d029555d1bbf8ddad2
Showing 3 changed files with 132 additions and 111 deletions.
@@ -393,6 +393,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block)
// because of "data unqualified"
return Status::EndOfFile("Encountered unqualified data, stop processing");
}
_convert_to_dest_desc_block(&block);
}

BlockRow block_row;
@@ -474,132 +475,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
block->get_by_position(i).column->convert_to_full_column_if_const();
const auto& column = block->get_by_position(i).column;

if (desc->type() == TYPE_OBJECT && column->is_nullable()) {
const auto& null_map =
vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
->get_null_map_data();
fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ",
desc->col_name());
auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
auto& real_column_ptr =
column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());

switch (desc->type().type) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
const auto column_string =
assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());

size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
if (null_map[j]) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
auto str_val = column_string->get_data_at(j);
bool invalid = str_val.size > limit;

error_msg.clear();
if (str_val.size > desc->type().len) {
fmt::format_to(error_msg, "{}",
"the length of input is too long than schema. ");
fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
} else if (str_val.size > limit) {
fmt::format_to(error_msg, "{}",
"the length of input string is too long than vec schema. ");
fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
fmt::format_to(error_msg, "limit length: {}; ", limit);
fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
}
}
}
} else {
auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
auto& real_column_ptr =
column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());

switch (desc->type().type) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
const auto column_string =
assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());

size_t limit =
std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
auto str_val = column_string->get_data_at(j);
bool invalid = str_val.size > limit;

error_msg.clear();
if (str_val.size > desc->type().len) {
fmt::format_to(error_msg, "{}",
"the length of input is too long than schema. ");
fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
} else if (str_val.size > limit) {
fmt::format_to(
error_msg, "{}",
"the length of input string is too long than vec schema. ");
fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
fmt::format_to(error_msg, "limit length: {}; ", limit);
fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
}

if (invalid) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
if (invalid) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
break;
}
case TYPE_DECIMALV2: {
auto column_decimal = const_cast<
vectorized::ColumnDecimal<vectorized::Decimal128>*>(
assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
real_column_ptr.get()));

for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
column_decimal->get_data()[j]);
error_msg.clear();
bool invalid = false;

if (dec_val.greater_than_scale(desc->type().scale)) {
auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
column_decimal->get_data()[j] =
binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);

if (code != E_DEC_OK) {
fmt::format_to(error_msg, "round one decimal failed.value={}; ",
dec_val.to_string());
invalid = true;
}
}
if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
fmt::format_to(error_msg,
"decimal value is not valid for definition, column={}",
desc->col_name());
fmt::format_to(error_msg, ", value={}", dec_val.to_string());
fmt::format_to(error_msg, ", precision={}, scale={}; ",
desc->type().precision, desc->type().scale);
break;
}
case TYPE_DECIMALV2: {
auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(
assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
real_column_ptr.get()));

for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
column_decimal->get_data()[j]);
error_msg.clear();
bool invalid = false;

if (dec_val.greater_than_scale(desc->type().scale)) {
auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
column_decimal->get_data()[j] =
binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);

if (code != E_DEC_OK) {
fmt::format_to(error_msg, "round one decimal failed.value={}; ",
dec_val.to_string());
invalid = true;
}
}
if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
fmt::format_to(error_msg,
"decimal value is not valid for definition, column={}",
desc->col_name());
fmt::format_to(error_msg, ", value={}", dec_val.to_string());
fmt::format_to(error_msg, ", precision={}, scale={}; ",
desc->type().precision, desc->type().scale);
invalid = true;
}

if (invalid) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
if (invalid) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
break;
}
default:
break;
}

// Dispose the nullable column not match problem here, convert to nullable column
if (desc->is_nullable() && !column_ptr) {
block->get_by_position(i).column = vectorized::make_nullable(column);
block->get_by_position(i).type =
vectorized::make_nullable(block->get_by_position(i).type);
}
break;
}
default:
break;
}

// Dispose the nullable column not match problem here, convert to not nullable column
if (!desc->is_nullable() && column_ptr) {
const auto& null_map = column_ptr->get_null_map_data();
for (int j = 0; j < null_map.size(); ++j) {
fmt::format_to(error_msg, "null value for not null column, column={}; ",
desc->col_name());
if (null_map[j] && !filter_bitmap->Get(j)) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
// Dispose the the column should do not contain the NULL value
// Only tow case:
// 1. column is nullable but the desc is not nullable
// 2. desc->type is BITMAP
if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) {
const auto& null_map = column_ptr->get_null_map_data();
for (int j = 0; j < null_map.size(); ++j) {
fmt::format_to(error_msg,
"null value for not null column/or bitmap column, column={}; ",
desc->col_name());
if (null_map[j] && !filter_bitmap->Get(j)) {
RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
block->get_by_position(i).type =
(reinterpret_cast<const vectorized::DataTypeNullable*>(
block->get_by_position(i).type.get()))
->get_nested_type();
}
}
}
@@ -611,5 +585,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
return Status::OK();
}

void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) {
for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
SlotDescriptor* desc = _output_tuple_desc->slots()[i];
if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) {
if (desc->is_nullable()) {
block->get_by_position(i).type =
vectorized::make_nullable(block->get_by_position(i).type);
block->get_by_position(i).column =
vectorized::make_nullable(block->get_by_position(i).column);
} else {
block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>(
*block->get_by_position(i).type)
.get_nested_type();
block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>(
*block->get_by_position(i).column)
.get_nested_column_ptr();
}
}
}
}

} // namespace stream_load
} // namespace doris
@@ -102,9 +102,13 @@ class VOlapTableSink : public OlapTableSink {
Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap,
int* filtered_rows, bool* stop_processing);

// some output column of output expr may have different nullable property with dest slot desc
// so here need to do the convert operation
void _convert_to_dest_desc_block(vectorized::Block* block);

VOlapTablePartitionParam* _vpartition = nullptr;
std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
};

} // namespace stream_load
} // namespace doris
} // namespace doris
@@ -67,6 +67,28 @@ suite("load") {
}
sql "insert into ${dbName}.test select * from ${dbName}.baseall where k1 <= 3"

// table for compaction
sql """
CREATE TABLE compaction_tbl
(
user_id LARGEINT NOT NULL,
date DATE NOT NULL,
city VARCHAR(20),
age SMALLINT,
sex TINYINT,
last_visit_date DATETIME REPLACE DEFAULT "1970-01-01 00:00:00",
last_update_date DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00",
last_visit_date_not_null DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00",
cost BIGINT SUM DEFAULT "0",
max_dwell_time INT MAX DEFAULT "0",
min_dwell_time INT MIN DEFAULT "99999",
hll_col HLL HLL_UNION NOT NULL,
bitmap_col Bitmap BITMAP_UNION NOT NULL
) AGGREGATE KEY(user_id, date, city, age, sex)
DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");"""

sql """insert into compaction_tbl values(123,"1999-10-10",'aaa',123,123,"1970-01-01 00:00:00","1970-01-01 00:00:00","1970-01-01 00:00:00",123,123,123,hll_hash(""),bitmap_from_string(""));"""

def baseall_count = sql "select count(*) from ${dbName}.baseall"
assertEquals(16, baseall_count[0][0])
def test_count = sql "select count(*) from ${dbName}.test"

0 comments on commit 536d8ca

Please sign in to comment.