Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,14 +566,22 @@ void ColumnArray::replicate(const uint32_t* counts, size_t target_size, IColumn&
if (col_size == 0) {
return;
}

IColumn::Offsets replicate_offsets(col_size);
// |---------------------|-------------------------|-------------------------|
// [0, begin) [begin, begin + count_sz) [begin + count_sz, size())
// do not need to copy copy counts[n] times do not need to copy
IColumn::Offsets replicate_offsets(get_offsets().size(), 0);
size_t cur_offset = 0;
size_t end = begin + col_size;
// copy original data at offset n counts[n] times
for (size_t i = begin; i < end; ++i) {
cur_offset += counts[i];
replicate_offsets[i - begin] = cur_offset;
replicate_offsets[i] = cur_offset;
}
// ignored
for (size_t i = end; i < size(); ++i) {
replicate_offsets[i] = replicate_offsets[i - 1];
}

if (cur_offset != target_size) {
LOG(WARNING) << "ColumnArray replicate input target_size:" << target_size
<< " not equal SUM(counts):" << cur_offset;
Expand Down Expand Up @@ -765,7 +773,9 @@ ColumnPtr ColumnArray::replicate_generic(const IColumn::Offsets& replicate_offse
size_t size_to_replicate = replicate_offsets[i] - prev_offset;
prev_offset = replicate_offsets[i];

for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i);
for (size_t j = 0; j < size_to_replicate; ++j) {
res_concrete.insert_from(*this, i);
}
}

return res;
Expand Down
26 changes: 26 additions & 0 deletions regression-test/data/load/insert/test_insert_nested_array.out
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,29 @@
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]

-- !select --
1 [] 1 []
1 [] 1 []
1 [] 1 []
1 [] 1 []
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !array_nested_with_join --
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-26 AIR []

-- !array_nested_with_join_2 --
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-26 AIR []

Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ suite("test_insert_nested_array", "load") {
(6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]])
"""
qt_select "select * from ${tableName} order by `key`"
qt_select "select * from ${tableName} as t1 right join ${tableName} as t2 on t1.`key` = t2.`key` order by t1.`key`"
}

test_nested_array_2_depths.call(false)
Expand Down
46 changes: 44 additions & 2 deletions regression-test/suites/query_p0/set_operations/load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ suite("load") {
for (String table in tables) {
sql new File("""${context.file.parent}/ddl/${table}.sql""").text
}

for (String tableName in tables) {
streamLoad {
// you can skip declare db, because a default db already specify in ${DORIS_HOME}/conf/regression-conf.groovy
Expand Down Expand Up @@ -59,4 +59,46 @@ suite("load") {
}
}
}
}

// nested array with join
def test_nested_array_2_depths = {
def tableName = "nested_array_test_2_vectorized"

sql "DROP TABLE IF EXISTS ${tableName}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`key` INT,
value ARRAY<ARRAY<INT>>
) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1
PROPERTIES ('replication_num' = '1')
"""

sql "INSERT INTO ${tableName} VALUES (1, [])"
sql "INSERT INTO ${tableName} VALUES (2, [null])"
sql "INSERT INTO ${tableName} VALUES (3, [[]])"
sql "INSERT INTO ${tableName} VALUES (4, [[1, 2, 3], [4, 5, 6]])"
sql "INSERT INTO ${tableName} VALUES (5, [[1, 2, 3], null, [4, 5, 6]])"
sql "INSERT INTO ${tableName} VALUES (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])"

sql """
INSERT INTO ${tableName} VALUES
(1, []),
(2, [null]),
(3, [[]]),
(4, [[1, 2, 3], [4, 5, 6]]),
(5, [[1, 2, 3], null, [4, 5, 6]]),
(6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
"""

sql """
INSERT INTO ${tableName} VALUES
(1, []),
(2, [null]),
(3, [[]]),
(4, [[1, 2, 3], [4, 5, 6]]),
(5, [[1, 2, 3], null, [4, 5, 6]]),
(6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
"""
}
test_nested_array_2_depths.call()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4
from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` )
where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;

select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4
from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` )
where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;