Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions be/src/olap/rowset/segment_v2/binary_plain_page.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,14 @@ class BinaryPlainPageDecoder : public PageDecoder {
return Status::OK();
}
const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));

uint32_t len_array[max_fetch];
uint32_t start_offset_array[max_fetch];
for (int i = 0; i < max_fetch; i++, _cur_idx++) {
const uint32_t start_offset = offset(_cur_idx);
uint32_t len = offset(_cur_idx + 1) - start_offset;

uint32_t last_offset = guarded_offset(_cur_idx);
for (int i = 0; i < max_fetch - 1; i++, _cur_idx++) {
const uint32_t start_offset = last_offset;
last_offset = guarded_offset(_cur_idx + 1);
uint32_t len = last_offset - start_offset;
len_array[i] = len;
start_offset_array[i] = start_offset;
if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
Expand All @@ -262,6 +264,14 @@ class BinaryPlainPageDecoder : public PageDecoder {
}
}
}
_cur_idx++;
len_array[max_fetch - 1] = offset(_cur_idx) - last_offset;
start_offset_array[max_fetch - 1] = last_offset;
if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
if (_options.need_check_bitmap) {
RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + last_offset)));
}
}
dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array,
max_fetch);

Expand Down Expand Up @@ -340,13 +350,20 @@ class BinaryPlainPageDecoder : public PageDecoder {
}

private:
static constexpr size_t SIZE_OF_INT32 = sizeof(uint32_t);
// Return the offset within '_data' where the string value with index 'idx' can be found.
uint32_t offset(size_t idx) const {
if (idx >= _num_elems) {
return _offsets_pos;
}
const uint8_t* p =
reinterpret_cast<const uint8_t*>(&_data[_offsets_pos + idx * sizeof(uint32_t)]);
reinterpret_cast<const uint8_t*>(&_data[_offsets_pos + idx * SIZE_OF_INT32]);
return decode_fixed32_le(p);
}

uint32_t guarded_offset(size_t idx) const {
const uint8_t* p =
reinterpret_cast<const uint8_t*>(&_data[_offsets_pos + idx * SIZE_OF_INT32]);
return decode_fixed32_le(p);
}

Expand Down
13 changes: 9 additions & 4 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1150,8 +1150,11 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
}

if (!_lazy_materialization_read) {
Status ret = _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
selected_size);
Status ret = Status::OK();
if (selected_size > 0) {
ret = _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
selected_size);
}
if (!ret.ok()) {
return ret;
}
Expand All @@ -1176,8 +1179,10 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
// when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
// see _vec_init_lazy_materialization
// todo(wb) need to tell input columnids from output columnids
RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
selected_size));
if (selected_size > 0) {
RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
selected_size));
}
}

// shrink char_type suffix zero data
Expand Down
49 changes: 49 additions & 0 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,55 @@ class ColumnString final : public COWHelper<IColumn, ColumnString> {
}
}

void insert_many_continuous_strings(const StringRef* strings, size_t num) {
DCHECK_NE(num, 0);
offsets.reserve(offsets.size() + num);
std::vector<const char*> start_points(1);
auto& head = strings[0];
start_points[0] = head.data;
size_t new_size = head.size;
const char* cursor = head.data + new_size;
std::vector<const char*> end_points;

const size_t old_size = chars.size();
size_t offset = old_size;
offset += new_size;
offsets.push_back(offset);
if (num == 1) {
end_points.push_back(cursor);
} else {
for (size_t i = 1; i < num; i++) {
auto& str = strings[i];
if (cursor != str.data) {
end_points.push_back(cursor);
start_points.push_back(str.data);
cursor = str.data;
}
size_t sz = str.size;
offset += sz;
new_size += sz;
cursor += sz;
offsets.push_back_without_reserve(offset);
}
end_points.push_back(cursor);
}
DCHECK_EQ(end_points.size(), start_points.size());

chars.resize(old_size + new_size);

size_t num_range = start_points.size();
Char* data = chars.data();

offset = old_size;
for (size_t i = 0; i < num_range; i++) {
uint32_t len = end_points[i] - start_points[i];
if (len) {
memcpy(data + offset, start_points[i], len);
offset += len;
}
}
}

void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
size_t num, uint32_t /*dict_num*/) override {
size_t offset_size = offsets.size();
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/columns/predicate_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<
refs[i].data = sv.ptr;
refs[i].size = sv.len;
}
res_ptr->insert_many_strings(refs, sel_size);
res_ptr->insert_many_continuous_strings(refs, sel_size);
}

void insert_decimal_to_res_column(const uint16_t* sel, size_t sel_size,
Expand Down