From 7f21734b8dcbbbf0b7d1992bda9c91c04ec17641 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Sun, 24 May 2026 15:27:23 +0800 Subject: [PATCH] [feat](be) add PLAIN_ENCODING_V3 binary plain page with contiguous data + lengths trailer V3 layout: |data1..dataN|varuint_len1..varuint_lenN|data_block_size(u32)|num_elems(u32)| Compared to V2 (length and data interleaved per entry), V3 lets the pre-decoder memcpy the entire binary payload in a single shot and walk the contiguous varuint length block once to fill the V1 offsets array, with no data-pointer-vs-length-pointer dependency between the two passes. Backward-compat: V3 is registered as a new EncodingTypePB (= 9). Existing segments persist their per-column encoding meta (V1=2, V2=8, V3=9), so the read path dispatches to the matching pre-decoder. Old V2 segments continue to be served by BinaryPlainPageV2PreDecoder. Wired through the entire write path: - encoding_info.cpp: Hook 1b mirrors the V2 hook, rewriting PLAIN_ENCODING to PLAIN_ENCODING_V3 when the schema preference is BINARY_PLAIN_ENCODING_V3. - segment_writer.cpp + vertical_segment_writer.cpp: row-store-column path switch-ifies over BinaryPlainEncodingTypePB to pick PLAIN_ENCODING_V3. - binary_dict_page.cpp: dict word page and fallback binary page use a small shared helper to map the preference to the on-disk encoding. - tablet_meta.cpp: new TStorageFormat::V3 tablets default to BINARY_PLAIN_ENCODING_V3 for both data schema and row binlog schema. Tests: 15 BinaryPlainPageV3Test cases covering encode/decode roundtrip, seek, read_by_rowids, empty page, page_full, large N, mixed lengths (including unicode), reset, varint length boundaries (127/128/16383/16384 byte values across 1/2/3-byte varint bands), raw trailer layout assertions, and two corruption-rejection cases. All pass under ASAN. Benchmark (15-rep x 2s median, V3 / V2 speedup at 256 KiB page): 8B: 3.56x 16B: 3.07x 32B: 2.46x 64B: 2.63x 128B: 2.25x 256B: 1.39x 512B: 1.22x 1024B: 1.11x 4096B: 1.01x V3 strictly does not lose to V2 across the tested grid. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../benchmark_binary_plain_page_v2.hpp | 233 +++++++++++ be/benchmark/benchmark_main.cpp | 34 +- be/src/storage/segment/binary_dict_page.cpp | 30 +- be/src/storage/segment/binary_plain_page_v3.h | 167 ++++++++ .../binary_plain_page_v3_pre_decoder.h | 134 ++++++ be/src/storage/segment/encoding_info.cpp | 53 +++ be/src/storage/segment/segment_writer.cpp | 17 +- .../segment/vertical_segment_writer.cpp | 17 +- be/src/storage/tablet/tablet_meta.cpp | 6 +- .../segment/binary_plain_page_v3_test.cpp | 393 ++++++++++++++++++ gensrc/proto/olap_file.proto | 1 + gensrc/proto/segment_v2.proto | 1 + 12 files changed, 1063 insertions(+), 23 deletions(-) create mode 100644 be/benchmark/benchmark_binary_plain_page_v2.hpp create mode 100644 be/src/storage/segment/binary_plain_page_v3.h create mode 100644 be/src/storage/segment/binary_plain_page_v3_pre_decoder.h create mode 100644 be/test/storage/segment/binary_plain_page_v3_test.cpp diff --git a/be/benchmark/benchmark_binary_plain_page_v2.hpp b/be/benchmark/benchmark_binary_plain_page_v2.hpp new file mode 100644 index 00000000000000..53f15b6dbe0df2 --- /dev/null +++ b/be/benchmark/benchmark_binary_plain_page_v2.hpp @@ -0,0 +1,233 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "storage/cache/page_cache.h" +#include "storage/segment/binary_plain_page_v2.h" +#include "storage/segment/binary_plain_page_v2_pre_decoder.h" +#include "storage/segment/binary_plain_page_v3.h" +#include "storage/segment/binary_plain_page_v3_pre_decoder.h" +#include "storage/segment/options.h" +#include "storage/segment/page_builder.h" +#include "storage/types.h" +#include "util/slice.h" + +namespace doris { +namespace segment_v2 { + +// Build a fixed corpus of strings of `value_len` bytes each. We reuse the same +// corpus across V2 and V3 so the only variable left is the on-disk layout. A +// deterministic RNG keeps results comparable across runs. +inline std::vector make_corpus(size_t num_elems, size_t value_len) { + std::mt19937 rng(0xC0FFEEu); + std::uniform_int_distribution dist('a', 'z'); + std::vector corpus; + corpus.reserve(num_elems); + for (size_t i = 0; i < num_elems; ++i) { + std::string s(value_len, '\0'); + for (size_t j = 0; j < value_len; ++j) { + s[j] = static_cast(dist(rng)); + } + corpus.emplace_back(std::move(s)); + } + return corpus; +} + +template