diff --git a/cpp/examples/c_examples/demo_write.c b/cpp/examples/c_examples/demo_write.c index 444cbe662..326cfdcf9 100644 --- a/cpp/examples/c_examples/demo_write.c +++ b/cpp/examples/c_examples/demo_write.c @@ -27,6 +27,14 @@ // This example shows you how to write tsfile. ERRNO write_tsfile() { ERRNO code = 0; + code = set_global_compression(TS_COMPRESSION_LZ4); + if (code != RET_OK) { + return code; + } + code = set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); + if (code != RET_OK) { + return code; + } char* table_name = "table1"; // Create table schema to describe a table in a tsfile. diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 50ca8c8a8..564f30c66 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -40,7 +40,8 @@ FORCE_INLINE int set_global_time_data_type(uint8_t data_type) { FORCE_INLINE int set_global_time_encoding(uint8_t encoding) { ASSERT(encoding >= PLAIN && encoding <= FREQ); - if (encoding != TS_2DIFF && encoding != PLAIN) { + if (encoding != TS_2DIFF && encoding != PLAIN && encoding != GORILLA && + encoding != ZIGZAG && encoding != RLE && encoding != SPRINTZ) { return E_NOT_SUPPORT; } g_config_value_.time_encoding_type_ = static_cast(encoding); @@ -49,7 +50,8 @@ FORCE_INLINE int set_global_time_encoding(uint8_t encoding) { FORCE_INLINE int set_global_time_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && compression != LZ4) { + if (compression != UNCOMPRESSED && compression != SNAPPY && + compression != GZIP && compression != LZO && compression != LZ4) { return E_NOT_SUPPORT; } g_config_value_.time_compress_type_ = @@ -58,51 +60,52 @@ FORCE_INLINE int set_global_time_compression(uint8_t compression) { } FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { - TSDataType dtype = static_cast(data_type); + const TSDataType dtype = static_cast(data_type); + const TSEncoding encoding_type = static_cast(encoding); + + // Validate input parameters ASSERT(dtype >= BOOLEAN && dtype <= STRING); - TSEncoding encoding_type = static_cast(encoding); - ASSERT(encoding >= PLAIN && encoding <= FREQ); + ASSERT(encoding >= PLAIN && encoding <= SPRINTZ); + + // Check encoding support for each data type switch (dtype) { case BOOLEAN: - if (encoding_type != PLAIN) { - return E_NOT_SUPPORT; - } + if (encoding_type != PLAIN) return E_NOT_SUPPORT; g_config_value_.boolean_encoding_type_ = encoding_type; break; + case INT32: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { - return E_NOT_SUPPORT; - } - g_config_value_.int32_encoding_type_ = encoding_type; - break; + case DATE: case INT64: if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { - return E_NOT_SUPPORT; - } - g_config_value_.int64_encoding_type_ = encoding_type; - break; - case STRING: - if (encoding_type != PLAIN) { + encoding_type != GORILLA && encoding_type != ZIGZAG && + encoding_type != RLE && encoding_type != SPRINTZ) { return E_NOT_SUPPORT; } - g_config_value_.string_encoding_type_ = encoding_type; + dtype == INT32 + ? g_config_value_.int32_encoding_type_ = encoding_type + : g_config_value_.int64_encoding_type_ = encoding_type; break; + case FLOAT: + case DOUBLE: if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { + encoding_type != GORILLA && encoding_type != SPRINTZ) { return E_NOT_SUPPORT; } - g_config_value_.float_encoding_type_ = encoding_type; + dtype == FLOAT + ? g_config_value_.float_encoding_type_ = encoding_type + : g_config_value_.double_encoding_type_ = encoding_type; break; - case DOUBLE: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { + + case STRING: + case TEXT: + if (encoding_type != PLAIN && encoding_type != DICTIONARY) { return E_NOT_SUPPORT; } - g_config_value_.double_encoding_type_ = encoding_type; + g_config_value_.string_encoding_type_ = encoding_type; break; + default: break; } @@ -111,7 +114,8 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { FORCE_INLINE int set_global_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && compression != LZ4) { + if (compression != UNCOMPRESSED && compression != SNAPPY && + compression != GZIP && compression != LZO && compression != LZ4) { return E_NOT_SUPPORT; } g_config_value_.default_compression_type_ = @@ -119,6 +123,46 @@ FORCE_INLINE int set_global_compression(uint8_t compression) { return E_OK; } +FORCE_INLINE uint8_t get_global_time_encoding() { + return static_cast(g_config_value_.time_encoding_type_); +} + +FORCE_INLINE uint8_t get_global_time_compression() { + return static_cast(g_config_value_.time_compress_type_); +} + +FORCE_INLINE uint8_t get_datatype_encoding(uint8_t data_type) { + const TSDataType dtype = static_cast(data_type); + + // Validate input parameter + ASSERT(dtype >= BOOLEAN && dtype <= STRING); + + switch (dtype) { + case BOOLEAN: + return static_cast(g_config_value_.boolean_encoding_type_); + case INT32: + return static_cast(g_config_value_.int32_encoding_type_); + case INT64: + return static_cast(g_config_value_.int64_encoding_type_); + case FLOAT: + return static_cast(g_config_value_.float_encoding_type_); + case DOUBLE: + return static_cast(g_config_value_.double_encoding_type_); + case STRING: + case TEXT: + return static_cast(g_config_value_.string_encoding_type_); + case DATE: + return static_cast(g_config_value_.int64_encoding_type_); + default: + return static_cast( + PLAIN); // Return default encoding for unknown types + } +} + +FORCE_INLINE uint8_t get_global_compression() { + return static_cast(g_config_value_.default_compression_type_); +} + extern int init_common(); extern bool is_timestamp_column_name(const char *time_col_name); extern void cols_to_json(ByteStream *byte_stream, diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 7b09f26d7..e6e15dd48 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -42,6 +42,36 @@ void init_tsfile_config() { } } +uint8_t get_global_time_encoding() { + return common::get_global_time_encoding(); +} + +uint8_t get_global_time_compression() { + return common::get_global_time_compression(); +} + +uint8_t get_datatype_encoding(uint8_t data_type) { + return common::get_datatype_encoding(data_type); +} + +uint8_t get_global_compression() { return common::get_global_compression(); } + +int set_global_time_encoding(uint8_t encoding) { + return common::set_global_time_encoding(encoding); +} + +int set_global_time_compression(uint8_t compression) { + return common::set_global_time_compression(compression); +} + +int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { + return common::set_datatype_encoding(data_type, encoding); +} + +int set_global_compression(uint8_t compression) { + return common::set_global_compression(compression); +} + WriteFile write_file_new(const char *pathname, ERRNO *err_code) { int ret; init_tsfile_config(); diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 1f651f5d1..75dc03643 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -119,6 +119,79 @@ typedef void* ResultSet; typedef int32_t ERRNO; typedef int64_t Timestamp; +/** + * @brief Get the encoding type for global time column + * + * @return uint8_t Time encoding type enum value (cast to uint8_t) + */ +uint8_t get_global_time_encoding(); + +/** + * @brief Get the compression type for global time column + * + * @return uint8_t Time compression type enum value (cast to uint8_t) + */ +uint8_t get_global_time_compression(); + +/** + * @brief Get the encoding type for specified data type + * + * @param data_type The data type to query encoding for + * @return uint8_t Encoding type enum value (cast to uint8_t) + */ +uint8_t get_datatype_encoding(uint8_t data_type); + +/** + * @brief Get the global default compression type + * + * @return uint8_t Compression type enum value (cast to uint8_t) + */ +uint8_t get_global_compression(); + +/** + * @brief Sets the global time column encoding method + * + * Validates and sets the encoding type for time series timestamps. + * Supported encodings: TS_2DIFF, PLAIN, GORILLA, ZIGZAG, RLE, SPRINTZ + * + * @param encoding The encoding type to set (as uint8_t) + * @return int E_OK on success, E_NOT_SUPPORT for invalid encoding + */ +int set_global_time_encoding(uint8_t encoding); + +/** + * @brief Sets the global time column compression method + * + * Validates and sets the compression type for time series timestamps. + * Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4 + * + * @param compression The compression type to set (as uint8_t) + * @return int E_OK on success, E_NOT_SUPPORT for invalid compression + */ +int set_global_time_compression(uint8_t compression); + +/** + * @brief Set encoding type for specific data type + * @param data_type The data type to configure + * @param encoding The encoding type to set + * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the + * data type + * @note Supported encodings per data type: + * - BOOLEAN: PLAIN only + * - INT32/INT64: PLAIN, TS_2DIFF, GORILLA, ZIGZAG, RLE, SPRINTZ + * - FLOAT/DOUBLE: PLAIN, TS_2DIFF, GORILLA, SPRINTZ + * - STRING: PLAIN, DICTIONARY + */ +int set_datatype_encoding(uint8_t data_type, uint8_t encoding); + +/** + * @brief Set the global default compression type + * @param compression Compression type to set + * @return E_OK if success, E_NOT_SUPPORT if compression is not supported + * @note Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4 + */ +int set_global_compression(uint8_t compression); + /*--------------------------TsFile Reader and Writer------------------------ */ /** diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc index 2bc9fd9af..8c373a3cb 100644 --- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc +++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc @@ -987,3 +987,115 @@ TEST_F(TsFileWriterTableTest, DiffCodecTypes) { ASSERT_EQ(reader.close(), common::E_OK); delete[] literal; } + +TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { + // 1. Test setting global compression type + ASSERT_EQ(E_OK, set_global_compression(SNAPPY)); + + // 2. Test setting encoding types for different data types + ASSERT_EQ(E_OK, set_datatype_encoding(INT32, SPRINTZ)); + ASSERT_EQ(E_OK, set_datatype_encoding(INT64, TS_2DIFF)); + ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA)); + ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA)); + ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY)); + ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN)); // Added DATE support + ASSERT_EQ(E_OK, + set_datatype_encoding(TEXT, DICTIONARY)); // Added TEXT support + + // 3. Create schema using these configurations + std::vector measurement_schemas; + std::vector column_categories; + + std::vector measurement_names = { + "int32_sprintz", "int64_ts2diff", "float_gorilla", "double_gorilla", + "string_dict", "date_plain", "text_dict"}; + + std::vector data_types = {INT32, INT64, FLOAT, DOUBLE, + STRING, DATE, TEXT}; + + std::vector encodings = { + SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY}; + + // Create measurement schemas with configured encodings and compression + for (int i = 0; i < measurement_names.size(); i++) { + measurement_schemas.emplace_back(new MeasurementSchema( + measurement_names[i], data_types[i], encodings[i], SNAPPY)); + column_categories.emplace_back(ColumnCategory::FIELD); + } + + // 4. Write and verify data + auto table_schema = new TableSchema("configTestTable", measurement_schemas, + column_categories); + auto tsfile_table_writer = + std::make_shared(&write_file_, table_schema); + + // Create test data tablet + Tablet tablet(table_schema->get_measurement_names(), + table_schema->get_data_types(), 10); + char* literal = new char[std::strlen("test_str") + 1]; + std::strcpy(literal, "test_str"); + String literal_str(literal, std::strlen("test_str")); + + // Prepare DATE and TEXT values + std::time_t now = std::time(nullptr); + std::tm* local_time = std::localtime(&now); + std::tm today = {}; + today.tm_year = local_time->tm_year; + today.tm_mon = local_time->tm_mon; + today.tm_mday = local_time->tm_mday; + char* text_literal = new char[std::strlen("sample_text") + 1]; + std::strcpy(text_literal, "sample_text"); + String text_str(text_literal, std::strlen("sample_text")); + + // Fill tablet with test values + for (int i = 0; i < 10; i++) { + tablet.add_timestamp(i, static_cast(i)); + tablet.add_value(i, 0, (int32_t)32); // INT32 with SPRINTZ encoding + tablet.add_value(i, 1, (int64_t)64); // INT64 with TS_2DIFF encoding + tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding + tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding + tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY encoding + tablet.add_value(i, 5, today); // DATE with PLAIN encoding (added) + tablet.add_value(i, 6, + text_str); // TEXT with DICTIONARY encoding (added) + } + + // Write and flush data + ASSERT_EQ(tsfile_table_writer->write_table(tablet), E_OK); + ASSERT_EQ(tsfile_table_writer->flush(), E_OK); + ASSERT_EQ(tsfile_table_writer->close(), E_OK); + + // 5. Verify read data matches what was written + auto reader = TsFileReader(); + reader.open(write_file_.get_file_path()); + ResultSet* ret = nullptr; + int ret_value = + reader.query("configTestTable", measurement_names, 0, 10, ret); + ASSERT_EQ(common::E_OK, ret_value); + + auto table_result_set = (TableResultSet*)ret; + bool has_next = false; + while (IS_SUCC(table_result_set->next(has_next)) && has_next) { + // Verify all values were correctly encoded/decoded + ASSERT_EQ(table_result_set->get_value(2), 32); // INT32 + ASSERT_EQ(table_result_set->get_value(3), 64); // INT64 + ASSERT_FLOAT_EQ(table_result_set->get_value(4), 1.0f); // FLOAT + ASSERT_DOUBLE_EQ(table_result_set->get_value(5), + 2.0); // DOUBLE + ASSERT_EQ(table_result_set->get_value(6)->compare( + literal_str), + 0); // STRING + ASSERT_TRUE(DateConverter::is_tm_ymd_equal( + table_result_set->get_value(7), today)); + ASSERT_EQ( + table_result_set->get_value(8)->compare(text_str), + 0); // TEXT (added) + } + + // 6. Clean up resources + reader.destroy_query_data_set(table_result_set); + ASSERT_EQ(reader.close(), common::E_OK); + delete[] literal; + delete[] text_literal; + delete table_schema; +} \ No newline at end of file