-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[branch-2.1] Picks "[opt](partial update) Allow to only specify key columns in partial update #40736" #40863
Conversation
Thank you for your contribution to Apache Doris. Since 2024-03-18, the Document has been moved to doris-website. |
run buildall |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-tidy made some suggestions
@@ -359,13 +359,8 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) { | |||
// 3. set columns to data convertor and then write all columns | |||
Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: function 'append_block_with_partial_content' has cognitive complexity of 62 (threshold 50) [readability-function-cognitive-complexity]
Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block,
^
Additional context
be/src/olap/rowset/segment_v2/segment_writer.cpp:362: +1, including nesting penalty of 0, nesting level increased to 1
if (block->columns() < _tablet_schema->num_key_columns() ||
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:383: +1, including nesting penalty of 0, nesting level increased to 1
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
^
be/src/common/status.h:619: expanded from macro 'RETURN_IF_ERROR'
do { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:383: +2, including nesting penalty of 1, nesting level increased to 2
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
^
be/src/common/status.h:621: expanded from macro 'RETURN_IF_ERROR'
if (UNLIKELY(!_status_.ok())) { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:415: +1, including nesting penalty of 0, nesting level increased to 1
if (const vectorized::ColumnWithTypeAndName* delete_sign_column =
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:420: +2, including nesting penalty of 1, nesting level increased to 2
if (delete_sign_col.size() >= row_pos + num_rows) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:429: +1, including nesting penalty of 0, nesting level increased to 1
if (specified_rowsets.size() != _mow_context->rowset_ids.size()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:438: +2, including nesting penalty of 1, nesting level increased to 2
if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:452: +1, including nesting penalty of 0, nesting level increased to 1
for (size_t block_pos = row_pos; block_pos < row_pos + num_rows; block_pos++) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:463: +2, including nesting penalty of 1, nesting level increased to 2
if (have_input_seq_column) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:469: +2, including nesting penalty of 1, nesting level increased to 2
if (!_tablet_schema->has_sequence_col() || have_input_seq_column) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:470: +3, including nesting penalty of 2, nesting level increased to 3
RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
^
be/src/common/status.h:619: expanded from macro 'RETURN_IF_ERROR'
do { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:470: +4, including nesting penalty of 3, nesting level increased to 4
RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
^
be/src/common/status.h:621: expanded from macro 'RETURN_IF_ERROR'
if (UNLIKELY(!_status_.ok())) { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:475: +1
(delete_sign_column_data != nullptr && delete_sign_column_data[block_pos] != 0);
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:483: +2, including nesting penalty of 1, nesting level increased to 2
if (st.is<KEY_NOT_FOUND>()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:484: +3, including nesting penalty of 2, nesting level increased to 3
if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:491: +1, nesting level increased to 3
} else {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:492: +4, including nesting penalty of 3, nesting level increased to 4
if (!_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update &&
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:492: +1
if (!_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update &&
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:515: +2, including nesting penalty of 1, nesting level increased to 2
if (!st.ok() && !st.is<KEY_ALREADY_EXISTS>()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:525: +2, including nesting penalty of 1, nesting level increased to 2
if (have_delete_sign && !_tablet_schema->has_sequence_col()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:528: +1, nesting level increased to 2
} else {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:535: +2, including nesting penalty of 1, nesting level increased to 2
if (st.is<KEY_ALREADY_EXISTS>()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:542: +1, nesting level increased to 2
} else {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:550: +1, including nesting penalty of 0, nesting level increased to 1
if (config::enable_merge_on_write_correctness_check) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:557: +1, including nesting penalty of 0, nesting level increased to 1
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,
^
be/src/common/status.h:619: expanded from macro 'RETURN_IF_ERROR'
do { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:557: +2, including nesting penalty of 1, nesting level increased to 2
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,
^
be/src/common/status.h:621: expanded from macro 'RETURN_IF_ERROR'
if (UNLIKELY(!_status_.ok())) { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:562: +1, including nesting penalty of 0, nesting level increased to 1
if (_tablet_schema->store_row_column()) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:568: +1, including nesting penalty of 0, nesting level increased to 1
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
^
be/src/common/status.h:619: expanded from macro 'RETURN_IF_ERROR'
do { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:568: +2, including nesting penalty of 1, nesting level increased to 2
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
^
be/src/common/status.h:621: expanded from macro 'RETURN_IF_ERROR'
if (UNLIKELY(!_status_.ok())) { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:589: +1, including nesting penalty of 0, nesting level increased to 1
if (_tablet_schema->has_sequence_col() && !have_input_seq_column) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:596: +2, including nesting penalty of 1, nesting level increased to 2
if (_num_rows_written != row_pos ||
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:603: +2, including nesting penalty of 1, nesting level increased to 2
for (size_t block_pos = row_pos; block_pos < row_pos + num_rows; block_pos++) {
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:606: +3, including nesting penalty of 2, nesting level increased to 3
RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
^
be/src/common/status.h:619: expanded from macro 'RETURN_IF_ERROR'
do { \
^
be/src/olap/rowset/segment_v2/segment_writer.cpp:606: +4, including nesting penalty of 3, nesting level increased to 4
RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
^
be/src/common/status.h:621: expanded from macro 'RETURN_IF_ERROR'
if (UNLIKELY(!_status_.ok())) { \
^
TeamCity be ut coverage result: |
…key columns in partial update apache#40736" (apache#40863) picks apache#40736
…update apache#39619 pick [opt](partial update) Remove unnecessary lock and refactor some code for partial update (apache#40062) 1. apache#34112 let partial update fetch rowsets in the initialization of RowsetBuilder rather than flush phase. So we can remove that tablet header lock. 2. refactor some partial update code fix compile pick [Fix](partial update) Fix __DORIS_SEQUENCE_COL__ is not set for newly inserted rows in partial update apache#40272 picks apache#40272 pick [Cherry-pick](branch-2.1) Pick "[Featrue](default value) Support bitmap_empty default value (apache#40364)" (apache#40487) Pick apache#40364 <!--Describe your changes.--> pick [Feature](partial update) Support flexible partial update in stream load with json files (apache#39756) This PR add the ability to update different columns for each row in one stream load Doc: apache/doris-website#1140 ```sql MySQL root@127.1:d1> CREATE TABLE t1 ( -> `k` int(11) NULL, -> `v1` BIGINT NULL, -> `v2` BIGINT NULL DEFAULT "9876", -> `v3` BIGINT NOT NULL, -> `v4` BIGINT NOT NULL DEFAULT "1234", -> `v5` BIGINT NULL -> ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 -> PROPERTIES( -> "replication_num" = "1", -> "enable_unique_key_merge_on_write" = "true"); Query OK, 0 rows affected Time: 0.013s MySQL root@127.1:d1> insert into t1 select number, number, number, number, number, number from numbers("number" = "6"); Query OK, 6 rows affected Time: 0.107s MySQL root@127.1:d1> select * from t1; +---+----+----+----+----+----+ | k | v1 | v2 | v3 | v4 | v5 | +---+----+----+----+----+----+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 1 | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 2 | 2 | 2 | | 3 | 3 | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | 4 | 4 | | 5 | 5 | 5 | 5 | 5 | 5 | +---+----+----+----+----+----+ ``` test1.json: ```json {"k": 1, "v1": 10} {"k": 2, "v2": 20, "v5": 25} {"k": 3, "v3": 30} {"k": 4, "v4": 20, "v1": 43, "v3": 99} {"k": 5, "v5": null} {"k": 6, "v1": 999, "v3": 777} {"k": 2, "v4": 222} {"k": 1, "v2": 111, "v3": 111} ``` ```bash curl --location-trusted -u root: \ -H "strict_mode:false" \ -H "format:json" \ -H "read_json_by_line:true" \ -H "unique_key_update_mode:UPDATE_FLEXIBLE_COLUMNS" \ -T test1.json \ -XPUT http://<host>:<http_port>/api/d1/t1/_stream_load ``` ```sql MySQL root@127.1:d1> select * from t1; +---+-----+------+-----+------+--------+ | k | v1 | v2 | v3 | v4 | v5 | +---+-----+------+-----+------+--------+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 10 | 111 | 111 | 1 | 1 | | 2 | 2 | 20 | 2 | 222 | 25 | | 3 | 3 | 3 | 30 | 3 | 3 | | 4 | 43 | 4 | 99 | 20 | 4 | | 5 | 5 | 5 | 5 | 5 | <null> | | 6 | 999 | 9876 | 777 | 1234 | <null> | +---+-----+------+-----+------+--------+ ``` fix compile pick [branch-2.1] Picks "[opt](partial update) Allow to only specify key columns in partial update apache#40736" (apache#40863) picks apache#40736 fix
…update apache#39619 pick [opt](partial update) Remove unnecessary lock and refactor some code for partial update (apache#40062) 1. apache#34112 let partial update fetch rowsets in the initialization of RowsetBuilder rather than flush phase. So we can remove that tablet header lock. 2. refactor some partial update code fix compile pick [Fix](partial update) Fix __DORIS_SEQUENCE_COL__ is not set for newly inserted rows in partial update apache#40272 picks apache#40272 pick [Cherry-pick](branch-2.1) Pick "[Featrue](default value) Support bitmap_empty default value (apache#40364)" (apache#40487) Pick apache#40364 <!--Describe your changes.--> pick [Feature](partial update) Support flexible partial update in stream load with json files (apache#39756) This PR add the ability to update different columns for each row in one stream load Doc: apache/doris-website#1140 ```sql MySQL root@127.1:d1> CREATE TABLE t1 ( -> `k` int(11) NULL, -> `v1` BIGINT NULL, -> `v2` BIGINT NULL DEFAULT "9876", -> `v3` BIGINT NOT NULL, -> `v4` BIGINT NOT NULL DEFAULT "1234", -> `v5` BIGINT NULL -> ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 -> PROPERTIES( -> "replication_num" = "1", -> "enable_unique_key_merge_on_write" = "true"); Query OK, 0 rows affected Time: 0.013s MySQL root@127.1:d1> insert into t1 select number, number, number, number, number, number from numbers("number" = "6"); Query OK, 6 rows affected Time: 0.107s MySQL root@127.1:d1> select * from t1; +---+----+----+----+----+----+ | k | v1 | v2 | v3 | v4 | v5 | +---+----+----+----+----+----+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 1 | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 2 | 2 | 2 | | 3 | 3 | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | 4 | 4 | | 5 | 5 | 5 | 5 | 5 | 5 | +---+----+----+----+----+----+ ``` test1.json: ```json {"k": 1, "v1": 10} {"k": 2, "v2": 20, "v5": 25} {"k": 3, "v3": 30} {"k": 4, "v4": 20, "v1": 43, "v3": 99} {"k": 5, "v5": null} {"k": 6, "v1": 999, "v3": 777} {"k": 2, "v4": 222} {"k": 1, "v2": 111, "v3": 111} ``` ```bash curl --location-trusted -u root: \ -H "strict_mode:false" \ -H "format:json" \ -H "read_json_by_line:true" \ -H "unique_key_update_mode:UPDATE_FLEXIBLE_COLUMNS" \ -T test1.json \ -XPUT http://<host>:<http_port>/api/d1/t1/_stream_load ``` ```sql MySQL root@127.1:d1> select * from t1; +---+-----+------+-----+------+--------+ | k | v1 | v2 | v3 | v4 | v5 | +---+-----+------+-----+------+--------+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 10 | 111 | 111 | 1 | 1 | | 2 | 2 | 20 | 2 | 222 | 25 | | 3 | 3 | 3 | 30 | 3 | 3 | | 4 | 43 | 4 | 99 | 20 | 4 | | 5 | 5 | 5 | 5 | 5 | <null> | | 6 | 999 | 9876 | 777 | 1234 | <null> | +---+-----+------+-----+------+--------+ ``` fix compile pick [branch-2.1] Picks "[opt](partial update) Allow to only specify key columns in partial update apache#40736" (apache#40863) picks apache#40736 fix
…update apache#39619 pick [opt](partial update) Remove unnecessary lock and refactor some code for partial update (apache#40062) 1. apache#34112 let partial update fetch rowsets in the initialization of RowsetBuilder rather than flush phase. So we can remove that tablet header lock. 2. refactor some partial update code fix compile pick [Fix](partial update) Fix __DORIS_SEQUENCE_COL__ is not set for newly inserted rows in partial update apache#40272 picks apache#40272 pick [Cherry-pick](branch-2.1) Pick "[Featrue](default value) Support bitmap_empty default value (apache#40364)" (apache#40487) Pick apache#40364 <!--Describe your changes.--> pick [Feature](partial update) Support flexible partial update in stream load with json files (apache#39756) This PR add the ability to update different columns for each row in one stream load Doc: apache/doris-website#1140 ```sql MySQL root@127.1:d1> CREATE TABLE t1 ( -> `k` int(11) NULL, -> `v1` BIGINT NULL, -> `v2` BIGINT NULL DEFAULT "9876", -> `v3` BIGINT NOT NULL, -> `v4` BIGINT NOT NULL DEFAULT "1234", -> `v5` BIGINT NULL -> ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 -> PROPERTIES( -> "replication_num" = "1", -> "enable_unique_key_merge_on_write" = "true"); Query OK, 0 rows affected Time: 0.013s MySQL root@127.1:d1> insert into t1 select number, number, number, number, number, number from numbers("number" = "6"); Query OK, 6 rows affected Time: 0.107s MySQL root@127.1:d1> select * from t1; +---+----+----+----+----+----+ | k | v1 | v2 | v3 | v4 | v5 | +---+----+----+----+----+----+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 1 | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 2 | 2 | 2 | | 3 | 3 | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | 4 | 4 | | 5 | 5 | 5 | 5 | 5 | 5 | +---+----+----+----+----+----+ ``` test1.json: ```json {"k": 1, "v1": 10} {"k": 2, "v2": 20, "v5": 25} {"k": 3, "v3": 30} {"k": 4, "v4": 20, "v1": 43, "v3": 99} {"k": 5, "v5": null} {"k": 6, "v1": 999, "v3": 777} {"k": 2, "v4": 222} {"k": 1, "v2": 111, "v3": 111} ``` ```bash curl --location-trusted -u root: \ -H "strict_mode:false" \ -H "format:json" \ -H "read_json_by_line:true" \ -H "unique_key_update_mode:UPDATE_FLEXIBLE_COLUMNS" \ -T test1.json \ -XPUT http://<host>:<http_port>/api/d1/t1/_stream_load ``` ```sql MySQL root@127.1:d1> select * from t1; +---+-----+------+-----+------+--------+ | k | v1 | v2 | v3 | v4 | v5 | +---+-----+------+-----+------+--------+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 10 | 111 | 111 | 1 | 1 | | 2 | 2 | 20 | 2 | 222 | 25 | | 3 | 3 | 3 | 30 | 3 | 3 | | 4 | 43 | 4 | 99 | 20 | 4 | | 5 | 5 | 5 | 5 | 5 | <null> | | 6 | 999 | 9876 | 777 | 1234 | <null> | +---+-----+------+-----+------+--------+ ``` fix compile pick [branch-2.1] Picks "[opt](partial update) Allow to only specify key columns in partial update apache#40736" (apache#40863) picks apache#40736 fix
…update apache#39619 pick [opt](partial update) Remove unnecessary lock and refactor some code for partial update (apache#40062) 1. apache#34112 let partial update fetch rowsets in the initialization of RowsetBuilder rather than flush phase. So we can remove that tablet header lock. 2. refactor some partial update code fix compile pick [Fix](partial update) Fix __DORIS_SEQUENCE_COL__ is not set for newly inserted rows in partial update apache#40272 picks apache#40272 pick [Cherry-pick](branch-2.1) Pick "[Featrue](default value) Support bitmap_empty default value (apache#40364)" (apache#40487) Pick apache#40364 <!--Describe your changes.--> pick [Feature](partial update) Support flexible partial update in stream load with json files (apache#39756) This PR add the ability to update different columns for each row in one stream load Doc: apache/doris-website#1140 ```sql MySQL root@127.1:d1> CREATE TABLE t1 ( -> `k` int(11) NULL, -> `v1` BIGINT NULL, -> `v2` BIGINT NULL DEFAULT "9876", -> `v3` BIGINT NOT NULL, -> `v4` BIGINT NOT NULL DEFAULT "1234", -> `v5` BIGINT NULL -> ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 -> PROPERTIES( -> "replication_num" = "1", -> "enable_unique_key_merge_on_write" = "true"); Query OK, 0 rows affected Time: 0.013s MySQL root@127.1:d1> insert into t1 select number, number, number, number, number, number from numbers("number" = "6"); Query OK, 6 rows affected Time: 0.107s MySQL root@127.1:d1> select * from t1; +---+----+----+----+----+----+ | k | v1 | v2 | v3 | v4 | v5 | +---+----+----+----+----+----+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 1 | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 2 | 2 | 2 | | 3 | 3 | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | 4 | 4 | | 5 | 5 | 5 | 5 | 5 | 5 | +---+----+----+----+----+----+ ``` test1.json: ```json {"k": 1, "v1": 10} {"k": 2, "v2": 20, "v5": 25} {"k": 3, "v3": 30} {"k": 4, "v4": 20, "v1": 43, "v3": 99} {"k": 5, "v5": null} {"k": 6, "v1": 999, "v3": 777} {"k": 2, "v4": 222} {"k": 1, "v2": 111, "v3": 111} ``` ```bash curl --location-trusted -u root: \ -H "strict_mode:false" \ -H "format:json" \ -H "read_json_by_line:true" \ -H "unique_key_update_mode:UPDATE_FLEXIBLE_COLUMNS" \ -T test1.json \ -XPUT http://<host>:<http_port>/api/d1/t1/_stream_load ``` ```sql MySQL root@127.1:d1> select * from t1; +---+-----+------+-----+------+--------+ | k | v1 | v2 | v3 | v4 | v5 | +---+-----+------+-----+------+--------+ | 0 | 0 | 0 | 0 | 0 | 0 | | 1 | 10 | 111 | 111 | 1 | 1 | | 2 | 2 | 20 | 2 | 222 | 25 | | 3 | 3 | 3 | 30 | 3 | 3 | | 4 | 43 | 4 | 99 | 20 | 4 | | 5 | 5 | 5 | 5 | 5 | <null> | | 6 | 999 | 9876 | 777 | 1234 | <null> | +---+-----+------+-----+------+--------+ ``` fix compile pick [branch-2.1] Picks "[opt](partial update) Allow to only specify key columns in partial update apache#40736" (apache#40863) picks apache#40736 fix
…key columns in partial update apache#40736" (apache#40863) picks apache#40736
picks #40736