diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 1bdc7a613d7ec3..7857b3a5ad7a8d 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -20,13 +20,17 @@ #include #include +#include +#include #include #include +#include #include #include #include #include +#include "common/cpp/private_member_accessor.hpp" #include "common/status.h" #include "core/assert_cast.h" #include "core/column/column.h" @@ -66,7 +70,8 @@ using doris::io::FileReaderSPtr; DEFINE_string(root_path, "", "storage root path"); DEFINE_string(operation, "get_meta", - "valid operation: get_meta, flag, load_meta, delete_meta, show_meta"); + "valid operation: get_meta, flag, load_meta, delete_meta, show_meta, " + "show_segment_footer, show_segment_data"); DEFINE_int64(tablet_id, 0, "tablet_id for tablet meta"); DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); DEFINE_string(json_meta_path, "", "absolute json meta file path"); @@ -89,6 +94,7 @@ std::string get_usage(const std::string& progname) { ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; + ss << "./meta_tool --operation=show_segment_data --file=/path/to/segment/file\n"; return ss.str(); } @@ -319,6 +325,580 @@ void show_segment_footer(const std::string& file_name) { return; } +// Helper function to get field type string +std::string get_field_type_string(doris::FieldType type) { + switch (type) { + case doris::FieldType::OLAP_FIELD_TYPE_TINYINT: + return "TINYINT"; + case doris::FieldType::OLAP_FIELD_TYPE_SMALLINT: + return "SMALLINT"; + case doris::FieldType::OLAP_FIELD_TYPE_INT: + return "INT"; + case doris::FieldType::OLAP_FIELD_TYPE_BIGINT: + return "BIGINT"; + case doris::FieldType::OLAP_FIELD_TYPE_LARGEINT: + return "LARGEINT"; + case doris::FieldType::OLAP_FIELD_TYPE_FLOAT: + return "FLOAT"; + case doris::FieldType::OLAP_FIELD_TYPE_DOUBLE: + return "DOUBLE"; + case doris::FieldType::OLAP_FIELD_TYPE_DECIMAL: + return "DECIMAL"; + case doris::FieldType::OLAP_FIELD_TYPE_DECIMAL32: + return "DECIMAL32"; + case doris::FieldType::OLAP_FIELD_TYPE_DECIMAL64: + return "DECIMAL64"; + case doris::FieldType::OLAP_FIELD_TYPE_DECIMAL128I: + return "DECIMAL128I"; + case doris::FieldType::OLAP_FIELD_TYPE_CHAR: + return "CHAR"; + case doris::FieldType::OLAP_FIELD_TYPE_VARCHAR: + return "VARCHAR"; + case doris::FieldType::OLAP_FIELD_TYPE_STRING: + return "STRING"; + case doris::FieldType::OLAP_FIELD_TYPE_DATE: + return "DATE"; + case doris::FieldType::OLAP_FIELD_TYPE_DATETIME: + return "DATETIME"; + case doris::FieldType::OLAP_FIELD_TYPE_DATEV2: + return "DATEV2"; + case doris::FieldType::OLAP_FIELD_TYPE_DATETIMEV2: + return "DATETIMEV2"; + case doris::FieldType::OLAP_FIELD_TYPE_BOOL: + return "BOOLEAN"; + case doris::FieldType::OLAP_FIELD_TYPE_STRUCT: + return "STRUCT"; + case doris::FieldType::OLAP_FIELD_TYPE_ARRAY: + return "ARRAY"; + case doris::FieldType::OLAP_FIELD_TYPE_MAP: + return "MAP"; + case doris::FieldType::OLAP_FIELD_TYPE_JSONB: + return "JSONB"; + case doris::FieldType::OLAP_FIELD_TYPE_HLL: + return "HLL"; + case doris::FieldType::OLAP_FIELD_TYPE_BITMAP: + return "BITMAP"; + case doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: + return "QUANTILE_STATE"; + case doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE: + return "AGG_STATE"; + case doris::FieldType::OLAP_FIELD_TYPE_VARIANT: + return "VARIANT"; + default: + return "UNKNOWN"; + } +} + +// Helper function to get encoding type string +std::string get_encoding_string(doris::segment_v2::EncodingTypePB encoding) { + switch (encoding) { + case doris::segment_v2::PLAIN_ENCODING: + return "PLAIN"; + case doris::segment_v2::PREFIX_ENCODING: + return "PREFIX"; + case doris::segment_v2::RLE: + return "RLE"; + case doris::segment_v2::DICT_ENCODING: + return "DICT_ENCODING"; + case doris::segment_v2::BIT_SHUFFLE: + return "BIT_SHUFFLE"; + case doris::segment_v2::FOR_ENCODING: + return "FOR_ENCODING"; + case doris::segment_v2::PLAIN_ENCODING_V2: + return "PLAIN_ENCODING_V2"; + default: + return "UNKNOWN"; + } +} + +// Helper function to get compression type string +std::string get_compression_string(doris::segment_v2::CompressionTypePB compression) { + switch (compression) { + case doris::segment_v2::NO_COMPRESSION: + return "NONE"; + case doris::segment_v2::SNAPPY: + return "SNAPPY"; + case doris::segment_v2::LZ4: + return "LZ4"; + case doris::segment_v2::LZ4F: + return "LZ4F"; + case doris::segment_v2::ZLIB: + return "ZLIB"; + case doris::segment_v2::ZSTD: + return "ZSTD"; + case doris::segment_v2::LZ4HC: + return "LZ4HC"; + default: + return "UNKNOWN"; + } +} + +// Helper function to format a single value from a column +std::string format_column_value(const doris::IColumn& column, size_t row, + doris::FieldType field_type) { + using namespace doris; + + try { + switch (field_type) { + case FieldType::OLAP_FIELD_TYPE_BOOL: { + return column.get_bool(row) ? "true" : "false"; + } + case FieldType::OLAP_FIELD_TYPE_TINYINT: + case FieldType::OLAP_FIELD_TYPE_SMALLINT: + case FieldType::OLAP_FIELD_TYPE_INT: + case FieldType::OLAP_FIELD_TYPE_BIGINT: { + return std::to_string(column.get_int(row)); + } + case FieldType::OLAP_FIELD_TYPE_LARGEINT: { + // LargeInt is stored as Int128 + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(__int128)) { + __int128 val = *reinterpret_cast(data.data); + return doris::LargeIntValue::to_string(val); + } + return ""; + } + case FieldType::OLAP_FIELD_TYPE_FLOAT: { + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(float)) { + float val = *reinterpret_cast(data.data); + return std::to_string(val); + } + return ""; + } + case FieldType::OLAP_FIELD_TYPE_DOUBLE: { + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(double)) { + double val = *reinterpret_cast(data.data); + return std::to_string(val); + } + return ""; + } + case FieldType::OLAP_FIELD_TYPE_DATE: + case FieldType::OLAP_FIELD_TYPE_DATEV2: { + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(uint32_t)) { + uint32_t val = *reinterpret_cast(data.data); + return std::to_string(val); + } + return ""; + } + case FieldType::OLAP_FIELD_TYPE_DATETIME: + case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(uint64_t)) { + uint64_t val = *reinterpret_cast(data.data); + return std::to_string(val); + } + return ""; + } + case FieldType::OLAP_FIELD_TYPE_CHAR: + case FieldType::OLAP_FIELD_TYPE_VARCHAR: + case FieldType::OLAP_FIELD_TYPE_STRING: + case FieldType::OLAP_FIELD_TYPE_HLL: + case FieldType::OLAP_FIELD_TYPE_BITMAP: + case FieldType::OLAP_FIELD_TYPE_JSONB: + case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: { + const StringRef& str = column.get_data_at(row); + std::string result = "'"; + for (size_t i = 0; i < str.size && i < 50; ++i) { + // Escape quotes and special characters + char c = str.data[i]; + if (c == '\0') { + result += "\\0"; + } else if (c == '\n') { + result += "\\n"; + } else if (c == '\r') { + result += "\\r"; + } else if (c == '\t') { + result += "\\t"; + } else if (c == '\'') { + result += "\\'"; + } else if (c == '\\') { + result += "\\\\"; + } else if (static_cast(c) < 32) { + // Other control characters + char buf[8]; + snprintf(buf, sizeof(buf), "\\x%02x", static_cast(c)); + result += buf; + } else { + result += c; + } + } + if (str.size > 50) { + result += "..."; + } + result += "'"; + return result; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL: + case FieldType::OLAP_FIELD_TYPE_DECIMAL32: + case FieldType::OLAP_FIELD_TYPE_DECIMAL64: + case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: { + const StringRef& data = column.get_data_at(row); + if (data.size == sizeof(__int128)) { + __int128 val = *reinterpret_cast(data.data); + return doris::LargeIntValue::to_string(val); + } + return ""; + } + default: + return ""; + } + } catch (const std::exception& e) { + return ""; + } +} + +// Read and print column data values +void print_column_data_values(const doris::segment_v2::ColumnMetaPB& column_meta, + const FileReaderSPtr& file_reader, uint64_t num_segment_rows, + int indent_level) { + using namespace doris::segment_v2; + + std::string indent(indent_level * 2, ' '); + + doris::FieldType field_type = static_cast(column_meta.type()); + + // Skip complex types for now + if (!doris::is_scalar_type(field_type)) { + std::cout << indent << "(Complex type - cannot display values)" << std::endl; + return; + } + + if (num_segment_rows == 0) { + std::cout << indent << "(No data)" << std::endl; + return; + } + + // Create a virtual TabletColumn for the column + doris::TabletColumn tablet_column; + tablet_column.set_aggregation_method( + doris::FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE); + tablet_column.set_type(field_type); + tablet_column.set_is_nullable(column_meta.is_nullable()); + tablet_column.set_length(0); // Default length + tablet_column.set_unique_id(column_meta.column_id()); + + // Create column reader + ColumnReaderOptions reader_opts; + reader_opts.verify_checksum = false; // Don't verify checksum for performance + + std::shared_ptr column_reader; + Status status = ColumnReader::create(reader_opts, column_meta, num_segment_rows, file_reader, + &column_reader); + if (!status.ok()) { + std::cout << indent << "(Failed to create column reader: " << status.to_string() << ")" + << std::endl; + return; + } + + // Create column iterator + ColumnIteratorUPtr iterator; + status = column_reader->new_iterator(&iterator, &tablet_column); + if (!status.ok()) { + std::cout << indent << "(Failed to create column iterator: " << status.to_string() << ")" + << std::endl; + return; + } + + // Initialize iterator + ColumnIteratorOptions iter_opts; + iter_opts.file_reader = file_reader.get(); + doris::OlapReaderStatistics stats; // Dummy statistics + iter_opts.stats = &stats; + + status = iterator->init(iter_opts); + if (!status.ok()) { + std::cout << indent << "(Failed to initialize column iterator: " << status.to_string() + << ")" << std::endl; + return; + } + + // Seek to the beginning + status = iterator->seek_to_ordinal(0); + if (!status.ok()) { + std::cout << indent << "(Failed to seek to ordinal 0: " << status.to_string() << ")" + << std::endl; + return; + } + + // Create destination column for reading data + auto data_type = doris::DataTypeFactory::instance().create_data_type(column_meta); + if (!data_type) { + std::cout << indent << "(Failed to create data type for field type " + << static_cast(field_type) << ")" << std::endl; + return; + } + + MutableColumnPtr dst_column = data_type->create_column(); + + // Determine how many rows to display (max 10 rows for readability) + const size_t max_display_rows = 10; + size_t rows_to_read = std::min(static_cast(num_segment_rows), max_display_rows); + size_t rows_read = rows_to_read; + + status = iterator->next_batch(&rows_read, dst_column); + if (!status.ok()) { + std::cout << indent << "(Failed to read column data: " << status.to_string() << ")" + << std::endl; + return; + } + + if (rows_read == 0) { + std::cout << indent << "(No data read)" << std::endl; + return; + } + + // Print the values + std::cout << indent << "Data Values (" << rows_read << " of " << num_segment_rows + << " rows, showing first " << std::min(rows_read, max_display_rows) + << "):" << std::endl; + + for (size_t i = 0; i < rows_read; ++i) { + std::cout << indent << " [" << i << "] "; + if (column_meta.is_nullable()) { + const auto& nullable_col = assert_cast(*dst_column); + if (nullable_col.is_null_at(i)) { + std::cout << "NULL"; + } else { + const IColumn& nested_col = nullable_col.get_nested_column(); + std::cout << format_column_value(nested_col, i, field_type); + } + } else { + std::cout << format_column_value(*dst_column, i, field_type); + } + std::cout << std::endl; + } + + if (num_segment_rows > max_display_rows) { + std::cout << indent << " ... (" << (num_segment_rows - max_display_rows) << " more rows)" + << std::endl; + } +} + +// Helper function to print column metadata +void print_column_meta(const doris::segment_v2::ColumnMetaPB& column_meta, + const FileReaderSPtr& file_reader, uint64_t num_segment_rows, + int indent_level) { + std::string indent(indent_level * 2, ' '); + std::string column_name; + if (column_meta.has_column_path_info() && column_meta.column_path_info().has_path()) { + column_name = column_meta.column_path_info().path(); + } else { + column_name = "column_id_" + std::to_string(column_meta.column_id()); + } + + doris::FieldType field_type = static_cast(column_meta.type()); + std::cout << indent << "=== " << column_name << ": type=" << get_field_type_string(field_type) + << ", nullable=" << (column_meta.is_nullable() ? "true" : "false") + << ", encoding=" << get_encoding_string(column_meta.encoding()) + << " ===" << std::endl; + + // Print size info + if (column_meta.has_compressed_data_bytes()) { + std::cout << indent << "Data Size (Compressed): " << column_meta.compressed_data_bytes() + << " bytes" << std::endl; + } + if (column_meta.has_uncompressed_data_bytes()) { + std::cout << indent << "Data Size (Uncompressed): " << column_meta.uncompressed_data_bytes() + << " bytes" << std::endl; + } + if (column_meta.has_raw_data_bytes()) { + std::cout << indent << "Raw Data Size: " << column_meta.raw_data_bytes() << " bytes" + << std::endl; + } + + // Print dict page info + if (column_meta.has_dict_page()) { + const auto& dict_page = column_meta.dict_page(); + std::cout << indent << "Dictionary Page: offset=" << dict_page.offset() + << ", size=" << dict_page.size() << " bytes" << std::endl; + } + + // Print indexes info + if (column_meta.indexes_size() > 0) { + std::cout << indent << "Indexes: "; + for (int i = 0; i < column_meta.indexes_size(); ++i) { + if (i > 0) std::cout << ", "; + const auto& index_meta = column_meta.indexes(i); + if (index_meta.has_type()) { + switch (index_meta.type()) { + case doris::segment_v2::ORDINAL_INDEX: + std::cout << "ORDINAL"; + break; + case doris::segment_v2::ZONE_MAP_INDEX: + std::cout << "ZONE_MAP"; + break; + case doris::segment_v2::BLOOM_FILTER_INDEX: + std::cout << "BLOOM_FILTER"; + break; + case doris::segment_v2::BITMAP_INDEX: + std::cout << "BITMAP"; + break; + default: + std::cout << "UNKNOWN"; + break; + } + } + } + std::cout << std::endl; + } + + // Handle complex types recursively + if (column_meta.children_columns_size() > 0) { + std::cout << indent << "Sub-columns: " << column_meta.children_columns_size() << std::endl; + for (int i = 0; i < column_meta.children_columns_size(); ++i) { + print_column_meta(column_meta.children_columns(i), file_reader, num_segment_rows, + indent_level + 1); + } + return; + } + + // Print column data values for scalar types + if (doris::is_scalar_type(field_type)) { + print_column_data_values(column_meta, file_reader, num_segment_rows, indent_level); + } else { + std::cout << indent << "(Complex type - cannot display values)" << std::endl; + } +} + +// Register hijacked accessors +ACCESS_PRIVATE_FIELD(ExecEnv_encoding_info_resolver, ExecEnv, + doris::segment_v2::EncodingInfoResolver*, _encoding_info_resolver); +ACCESS_PRIVATE_FIELD(ExecEnv_orphan_mem_tracker, ExecEnv, std::shared_ptr, + _orphan_mem_tracker); +ACCESS_PRIVATE_STATIC_FIELD(ExecEnv_tracking_memory, ExecEnv, std::atomic_bool, _s_tracking_memory); + +void show_segment_data(const std::string& file_name) { + // Initialize ExecEnv components needed for ColumnReader + // Use macro to access private members temporarily + auto* exec_env = doris::ExecEnv::GetInstance(); + + auto resolver = GET_PRIVATE_FIELD(ExecEnv_encoding_info_resolver); + auto mem_tracker = GET_PRIVATE_FIELD(ExecEnv_orphan_mem_tracker); + auto tracking_memory = GET_PRIVATE_STATIC_FIELD(ExecEnv_tracking_memory); + // Initialize encoding info resolver for ColumnReader + if (exec_env.*resolver == nullptr) { + exec_env.*resolver = new doris::segment_v2::EncodingInfoResolver(); + } + // Initialize mem tracker limiter pool and orphan mem tracker for ThreadMemTrackerMgr + if (exec_env->mem_tracker_limiter_pool.empty()) { + exec_env->mem_tracker_limiter_pool.resize(doris::MEM_TRACKER_GROUP_NUM, + doris::TrackerLimiterGroup()); + tracking_memory->store(true, std::memory_order_release); + exec_env.*mem_tracker = doris::MemTrackerLimiter::create_shared( + doris::MemTrackerLimiter::Type::GLOBAL, "Orphan"); + } + + doris::io::FileReaderSPtr file_reader; + Status status = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); + if (!status.ok()) { + std::cout << "open file failed: " << status << std::endl; + return; + } + + SegmentFooterPB footer; + status = get_segment_footer(file_reader.get(), &footer); + if (!status.ok()) { + std::cout << "get footer failed: " << status.to_string() << std::endl; + return; + } + + // Print basic info + std::cout << "\n=== Segment File Info ===" << std::endl; + std::cout << "File: " << file_name << std::endl; + std::cout << "Num Rows: " << footer.num_rows() << std::endl; + std::cout << "Num Columns: " << footer.columns_size() << std::endl; + std::cout << "Compression: " << get_compression_string(footer.compress_type()) << std::endl; + if (footer.has_version()) { + std::cout << "Version: " << footer.version() << std::endl; + } + std::cout << std::endl; + + // Collect statistics + uint64_t total_compressed_data_bytes = 0; + uint64_t total_uncompressed_data_bytes = 0; + uint64_t total_raw_data_bytes = 0; + uint32_t total_ordinal_indexes = 0; + uint32_t total_zone_map_indexes = 0; + uint32_t total_bloom_filter_indexes = 0; + uint32_t columns_with_dict = 0; + + // Print each column + for (int i = 0; i < footer.columns_size(); ++i) { + const auto& column_meta = footer.columns(i); + print_column_meta(column_meta, file_reader, footer.num_rows(), 0); + + // Collect statistics + if (column_meta.has_compressed_data_bytes()) { + total_compressed_data_bytes += column_meta.compressed_data_bytes(); + } + if (column_meta.has_uncompressed_data_bytes()) { + total_uncompressed_data_bytes += column_meta.uncompressed_data_bytes(); + } + if (column_meta.has_raw_data_bytes()) { + total_raw_data_bytes += column_meta.raw_data_bytes(); + } + + // Count indexes + for (int j = 0; j < column_meta.indexes_size(); ++j) { + const auto& index_meta = column_meta.indexes(j); + if (index_meta.has_type()) { + switch (index_meta.type()) { + case doris::segment_v2::ORDINAL_INDEX: + total_ordinal_indexes++; + break; + case doris::segment_v2::ZONE_MAP_INDEX: + total_zone_map_indexes++; + break; + case doris::segment_v2::BLOOM_FILTER_INDEX: + total_bloom_filter_indexes++; + break; + default: + break; + } + } + } + + if (column_meta.has_dict_page()) { + columns_with_dict++; + } + + std::cout << std::endl; + } + + // Print statistics + std::cout << "\n=== Statistics ===" << std::endl; + uint32_t total_indexes = + total_ordinal_indexes + total_zone_map_indexes + total_bloom_filter_indexes; + + std::cout << "Total Columns: " << footer.columns_size() << std::endl; + std::cout << "Columns with Dictionary: " << columns_with_dict << std::endl; + std::cout << "Total Indexes: " << total_indexes << std::endl; + std::cout << " - Ordinal Indexes: " << total_ordinal_indexes << std::endl; + std::cout << " - Zone Map Indexes: " << total_zone_map_indexes << std::endl; + std::cout << " - Bloom Filter Indexes: " << total_bloom_filter_indexes << std::endl; + std::cout << "Total Data Size (Compressed): " << total_compressed_data_bytes << " bytes (" + << std::fixed << std::setprecision(2) << (total_compressed_data_bytes / 1024.0) + << " KB)" << std::endl; + std::cout << "Total Data Size (Uncompressed): " << total_uncompressed_data_bytes << " bytes (" + << std::fixed << std::setprecision(2) << (total_uncompressed_data_bytes / 1024.0) + << " KB)" << std::endl; + std::cout << "Total Raw Data Size: " << total_raw_data_bytes << " bytes (" << std::fixed + << std::setprecision(2) << (total_raw_data_bytes / 1024.0) << " KB)" << std::endl; + if (footer.has_index_footprint()) { + std::cout << "Index Footprint: " << footer.index_footprint() << " bytes (" << std::fixed + << std::setprecision(2) << (footer.index_footprint() / 1024.0) << " KB)" + << std::endl; + } + if (footer.has_data_footprint()) { + std::cout << "Data Footprint: " << footer.data_footprint() << " bytes (" << std::fixed + << std::setprecision(2) << (footer.data_footprint() / 1024.0) << " KB)" + << std::endl; + } +} + int main(int argc, char** argv) { SCOPED_INIT_THREAD_CONTEXT(); std::string usage = get_usage(argv[0]); @@ -344,6 +924,12 @@ int main(int argc, char** argv) { return -1; } show_segment_footer(FLAGS_file); + } else if (FLAGS_operation == "show_segment_data") { + if (FLAGS_file == "") { + std::cout << "no file flag for show_segment_data" << std::endl; + return -1; + } + show_segment_data(FLAGS_file); } else { // operations that need root path should be written here std::set valid_operations = {"get_meta", "load_meta", "delete_meta"}; diff --git a/common/cpp/private_member_accessor.hpp b/common/cpp/private_member_accessor.hpp new file mode 100644 index 00000000000000..4b87f50330dc31 --- /dev/null +++ b/common/cpp/private_member_accessor.hpp @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* ==================== USAGE EXAMPLE ==================== + * + * Suppose we have a class with a private member: + * + * class MyClass { + * private: + * int secret_value = 42; + * std::string name = "test"; + * static int count; + * }; + * + * MyClass::count = 10086; + * + * In a.cpp, we can access the private members like this: + * + * // First, declare access to the private members + * ACCESS_PRIVATE_FIELD(MyClass_secret, MyClass, int, secret_value) + * ACCESS_PRIVATE_FIELD(MyClass_name, MyClass, std::string, name) + * ACCESS_PRIVATE_FIELD(MyClass_count, MyClass, int, count) + * + * // Later in code, use the hijacked pointers: + * void example() { + * MyClass obj; + * + * // Get pointer to member + * auto secret_ptr = GET_PRIVATE_FIELD(MyClass_secret); + * auto name_ptr = GET_PRIVATE_FIELD(MyClass_name); + * auto count_ptr = GET_PRIVATE_STATIC_FIELD(MyClass_count); + * + * // Access private members + * int value = obj.*secret_ptr; // value = 42 + * std::string& name = obj.*name_ptr; // name = "test" + * int& count = *count_ptr; // 10086 + * + * // Modify private members + * obj.*secret_ptr = 100; + * obj.*name_ptr = "modified"; + * *count = 10010; + * } + * + * Note: This technique bypasses access control and should only + * be used for debugging, testing, or legacy code workarounds. + * ====================================================== */ + +// A global container for storing hijacked member pointers +template +struct AccessStorage { + using Type = typename Tag::MemberType; + static Type ptr; +}; + +// Initialize static members +template +typename Tag::MemberType AccessStorage::ptr; + +// Hijacker template +template +struct AccessRobber { + struct Initer { + Initer() { AccessStorage::ptr = M; } + }; + static Initer initer; +}; + +template +typename AccessRobber::Initer AccessRobber::initer; + +/** + * Universal macro: Injects hijacking logic + * @param TagName Custom tag name (must be unique) + * @param Class Target class name + * @param Type Member variable type + * @param Member Member variable name + */ +#define ACCESS_PRIVATE_FIELD(TagName, Class, Type, Member) \ + struct TagName { \ + using MemberType = Type Class::*; \ + }; \ + template struct AccessRobber; + +// Similar to ACCESS_PRIVATE_FIELD but for private static field +#define ACCESS_PRIVATE_STATIC_FIELD(TagName, Class, Type, Member) \ + struct TagName { \ + using MemberType = Type*; \ + }; \ + template struct AccessRobber; + +// Convenience macro for retrieving hijacked pointers +#define GET_PRIVATE_FIELD(TagName) AccessStorage::ptr + +// Similar to GET_PRIVATE_FIELD but for private static field +#define GET_PRIVATE_STATIC_FIELD(TagName) AccessStorage::ptr