Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions be/src/storage/segment/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,12 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
auto page_size = _tablet_schema->row_store_page_size();
opts.data_page_size =
(page_size > 0) ? page_size : segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
// Row store data is already serialized as a single blob. Keep it on plain pages
// to avoid introducing dictionary pages for the hidden row store column.
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
? PLAIN_ENCODING_V2
: PLAIN_ENCODING);
}

opts.rowset_ctx = _opts.rowset_ctx;
Expand Down
6 changes: 6 additions & 0 deletions be/src/storage/segment/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,12 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
auto page_size = _tablet_schema->row_store_page_size();
opts.data_page_size =
(page_size > 0) ? page_size : segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
// Row store data is already serialized as a single blob. Keep it on plain pages
// to avoid introducing dictionary pages for the hidden row store column.
opts.meta->set_encoding(_tablet_schema->binary_plain_encoding_default_impl() ==
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2
? PLAIN_ENCODING_V2
: PLAIN_ENCODING);
}

opts.rowset_ctx = _opts.rowset_ctx;
Expand Down
71 changes: 71 additions & 0 deletions be/test/storage/segment/column_meta_accessor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <string>
#include <vector>

#include "common/consts.h"
#include "core/field.h"
#include "io/fs/local_file_system.h"
#include "storage/segment/segment.h"
Expand All @@ -45,6 +46,19 @@ std::string make_test_file_path(const std::string& file_name) {
return std::string(kTestDir) + "/" + file_name;
}

TabletColumnPtr create_row_store_test_column(int32_t id) {
auto column = std::make_shared<TabletColumn>();
column->_unique_id = id;
column->_col_name = BeConsts::ROW_STORE_COL;
column->_type = FieldType::OLAP_FIELD_TYPE_STRING;
column->_is_key = false;
column->_is_nullable = true;
column->_aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
column->_length = 2147483643;
column->_index_length = 4;
return column;
}

// Helper to write segment footer trailer (footer + metadata)
Status append_footer_trailer(io::FileWriter* fw, SegmentFooterPB* footer) {
std::string footer_buf;
Expand Down Expand Up @@ -677,6 +691,63 @@ TEST(ColumnMetaAccessorTest, FooterSizeWithManyColumnsExternalVsInline) {
EXPECT_LT(external_footer_size, inline_footer_size / 10);
}

TEST(ColumnMetaAccessorTest, RowStoreColumnDoesNotUseDictEncoding) {
constexpr int32_t kRowStoreUid = 1;

auto fs = io::global_local_filesystem();
static_cast<void>(fs->delete_directory(kTestDir));
ASSERT_TRUE(fs->create_directory(kTestDir).ok());

auto key_column = std::make_shared<TabletColumn>();
key_column->_unique_id = 0;
key_column->_col_name = "k0";
key_column->_type = FieldType::OLAP_FIELD_TYPE_INT;
key_column->_is_key = true;
key_column->_is_nullable = false;
key_column->_length = 4;
key_column->_index_length = 4;

std::vector<TabletColumnPtr> columns;
columns.emplace_back(std::move(key_column));
columns.emplace_back(create_row_store_test_column(kRowStoreUid));

auto tablet_schema = create_schema(columns, UNIQUE_KEYS);
tablet_schema->set_binary_plain_encoding_default_impl(
BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2);

SegmentWriterOptions opts;
opts.enable_unique_key_merge_on_write = false;

auto generator = [](size_t rid, int cid, Field& field) {
if (cid == 0) {
field = Field::create_field<TYPE_INT>(static_cast<int32_t>(rid));
return;
}
field = Field::create_field<TYPE_STRING>("row-store-" + std::to_string(rid));
};

std::shared_ptr<Segment> segment;
std::string segment_path;
build_segment(opts, tablet_schema,
/*segment_id=*/0, tablet_schema,
/*nrows=*/8, generator, &segment, std::string(kTestDir), &segment_path);
ASSERT_NE(segment, nullptr);

io::FileReaderSPtr reader;
io::FileReaderOptions reader_opts;
ASSERT_TRUE(fs->open_file(segment_path, &reader, &reader_opts).ok());

SegmentFooterPB footer;
ASSERT_TRUE(read_footer_from_file(reader, &footer).ok());
ASSERT_EQ(2, footer.columns_size());

const auto& row_store_meta = footer.columns(1);
EXPECT_EQ(kRowStoreUid, row_store_meta.unique_id());
EXPECT_EQ(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRING), row_store_meta.type());
EXPECT_EQ(PLAIN_ENCODING_V2, row_store_meta.encoding());
EXPECT_NE(DICT_ENCODING, row_store_meta.encoding());
}

// Test concurrent access (thread safety not guaranteed by ColumnMetaAccessor itself,
// but test that multiple sequential calls work correctly)
TEST(ColumnMetaAccessorTest, MultipleSequentialAccesses) {
Expand Down
Loading