From aaa1f5f41f32f2c8d95428cabac7eb4db54b734d Mon Sep 17 00:00:00 2001 From: "menglingda.mld" Date: Tue, 26 May 2026 18:20:01 +0800 Subject: [PATCH] feat: add StringUtils, DateTimeUtils, PathUtil, OptionsUtils, RapidJsonUtil, and Jsonizable utilities --- src/paimon/common/utils/date_time_utils.h | 228 ++++++++ .../common/utils/date_time_utils_test.cpp | 337 ++++++++++++ src/paimon/common/utils/jsonizable.h | 61 +++ src/paimon/common/utils/jsonizable_test.cpp | 194 +++++++ src/paimon/common/utils/options_utils.h | 105 ++++ .../common/utils/options_utils_test.cpp | 72 +++ src/paimon/common/utils/path_util.cpp | 156 ++++++ src/paimon/common/utils/path_util.h | 58 +++ src/paimon/common/utils/path_util_test.cpp | 152 ++++++ src/paimon/common/utils/rapidjson_util.h | 441 ++++++++++++++++ .../common/utils/rapidjson_util_test.cpp | 147 ++++++ src/paimon/common/utils/string_utils.cpp | 220 ++++++++ src/paimon/common/utils/string_utils.h | 209 ++++++++ src/paimon/common/utils/string_utils_test.cpp | 487 ++++++++++++++++++ src/paimon/testing/utils/timezone_guard.h | 54 ++ 15 files changed, 2921 insertions(+) create mode 100644 src/paimon/common/utils/date_time_utils.h create mode 100644 src/paimon/common/utils/date_time_utils_test.cpp create mode 100644 src/paimon/common/utils/jsonizable.h create mode 100644 src/paimon/common/utils/jsonizable_test.cpp create mode 100644 src/paimon/common/utils/options_utils.h create mode 100644 src/paimon/common/utils/options_utils_test.cpp create mode 100644 src/paimon/common/utils/path_util.cpp create mode 100644 src/paimon/common/utils/path_util.h create mode 100644 src/paimon/common/utils/path_util_test.cpp create mode 100644 src/paimon/common/utils/rapidjson_util.h create mode 100644 src/paimon/common/utils/rapidjson_util_test.cpp create mode 100644 src/paimon/common/utils/string_utils.cpp create mode 100644 src/paimon/common/utils/string_utils.h create mode 100644 src/paimon/common/utils/string_utils_test.cpp create mode 100644 src/paimon/testing/utils/timezone_guard.h diff --git a/src/paimon/common/utils/date_time_utils.h b/src/paimon/common/utils/date_time_utils.h new file mode 100644 index 0000000..652c312 --- /dev/null +++ b/src/paimon/common/utils/date_time_utils.h @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "arrow/api.h" +#include "arrow/compute/api.h" +#include "arrow/vendored/datetime.h" +#include "fmt/format.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/data/timestamp.h" +#include "paimon/result.h" +namespace paimon { +/// Utils for date time. +class DateTimeUtils { + public: + DateTimeUtils() = delete; + ~DateTimeUtils() = delete; + + /// The number of milliseconds in a day. + /// + /// This is the modulo 'mask' used when converting TIMESTAMP values to DATE and TIME values. + static constexpr int64_t MILLIS_PER_DAY = 86400000l; // = 24 * 60 * 60 * 1000 + static constexpr int64_t SECONDS_PER_DAY = 86400l; // = 24 * 60 * 60 + static constexpr int64_t NANOS_PER_MILLIS = 1000000l; + enum TimeType { + SECOND = 0, + MILLISECOND = 1, + MICROSECOND = 2, + NANOSECOND = 3, + }; + constexpr static int64_t CONVERSION_FACTORS[] = {1L, 1000L, 1000000L, 1000000000L}; + + // convert a timestamp of a certain type into a combination of two specified types + // e.g., src_timestamp = 12345678, src_type = ns, dst_first_type = ms, dst_second_type = ns + // return: {12, 345678} + static std::pair TimestampConverter(int64_t src_timestamp, + const TimeType& src_type, + const TimeType& dst_first_type, + const TimeType& dst_second_type) { + if (src_type <= dst_first_type) { + // e.g., ms -> {us, ns} or {ms, ns} or {us, us} or {ns, ms} + int64_t conversion_factor_to_first_type = + CONVERSION_FACTORS[dst_first_type] / CONVERSION_FACTORS[src_type]; + // TODO(jinli.zjw): maybe overflow int64 + assert(src_timestamp * conversion_factor_to_first_type < + std::numeric_limits::max()); + return std::make_pair(src_timestamp * conversion_factor_to_first_type, 0L); + } else { + // e.g., ns -> {ms, ns} or {ms, s} or {ms, us} + int64_t conversion_factor_to_first_type = + CONVERSION_FACTORS[src_type] / CONVERSION_FACTORS[dst_first_type]; + double conversion_factor_to_second_type = + static_cast(CONVERSION_FACTORS[dst_second_type]) / + CONVERSION_FACTORS[src_type]; + + int64_t first_value = src_timestamp / conversion_factor_to_first_type; + int64_t second_value = src_timestamp % conversion_factor_to_first_type; + if (second_value < 0) { + second_value += conversion_factor_to_first_type; + first_value--; + } + second_value = conversion_factor_to_second_type * second_value; + return std::make_pair(first_value, second_value); + } + } + + static int64_t TimestampToInteger(const Timestamp& timestamp, const TimeType& dst_type) { + if (dst_type == TimeType::SECOND) { + return timestamp.GetMillisecond() / CONVERSION_FACTORS[MILLISECOND]; + } else if (dst_type == TimeType::MILLISECOND) { + return timestamp.GetMillisecond(); + } else if (dst_type == TimeType::MICROSECOND) { + return timestamp.ToMicrosecond(); + } + return timestamp.ToNanosecond(); + } + + static inline uint64_t GetCurrentUTCTimeUs() { + struct timeval ts; + gettimeofday(&ts, nullptr); + return static_cast(ts.tv_sec) * 1000000ULL + static_cast(ts.tv_usec); + } + + static inline Result ToLocalTimestamp(const Timestamp& utc_timestamp) { + int64_t utc_micro = utc_timestamp.ToMicrosecond(); + auto utc_ts_scalar = std::make_shared( + utc_micro, arrow::TimeUnit::MICRO, GetLocalTimezoneName()); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( + arrow::Datum local_micro, arrow::compute::LocalTimestamp(arrow::Datum(utc_ts_scalar))); + auto local_ts_scalar = + std::dynamic_pointer_cast(local_micro.scalar()); + auto [millisecond, nano_of_millisecond] = DateTimeUtils::TimestampConverter( + *(static_cast(local_ts_scalar->data())), + DateTimeUtils::TimeType::MICROSECOND, DateTimeUtils::TimeType::MILLISECOND, + DateTimeUtils::TimeType::NANOSECOND); + return Timestamp(millisecond, nano_of_millisecond); + } + + static inline Result GetCurrentLocalTimeUs() { + auto [millisecond, nano_of_millisecond] = DateTimeUtils::TimestampConverter( + GetCurrentUTCTimeUs(), DateTimeUtils::TimeType::MICROSECOND, + DateTimeUtils::TimeType::MILLISECOND, DateTimeUtils::TimeType::NANOSECOND); + Timestamp utc_timestamp(millisecond, nano_of_millisecond); + PAIMON_ASSIGN_OR_RAISE(Timestamp local_timestamp, ToLocalTimestamp(utc_timestamp)); + return local_timestamp.ToMicrosecond(); + } + + static inline Result GetCurrentLocalHour() { + PAIMON_ASSIGN_OR_RAISE(uint64_t local_us, GetCurrentLocalTimeUs()); + auto local_seconds = static_cast(local_us / 1000000); + std::tm local_tm{}; + gmtime_r(&local_seconds, &local_tm); + return local_tm.tm_hour; + } + + static inline int32_t GetPrecisionFromType( + const std::shared_ptr& timestamp_type) { + int32_t precision = Timestamp::MAX_PRECISION; + if (timestamp_type->unit() == arrow::TimeUnit::type::SECOND) { + precision = Timestamp::MIN_PRECISION; + } else if (timestamp_type->unit() == arrow::TimeUnit::type::MILLI) { + precision = Timestamp::MILLIS_PRECISION; + } else if (timestamp_type->unit() == arrow::TimeUnit::type::MICRO) { + precision = Timestamp::DEFAULT_PRECISION; + } + return precision; + } + + static inline TimeType GetTimeTypeFromArrowType( + const std::shared_ptr& timestamp_type) { + if (timestamp_type->unit() == arrow::TimeUnit::type::SECOND) { + return TimeType::SECOND; + } else if (timestamp_type->unit() == arrow::TimeUnit::type::MILLI) { + return TimeType::MILLISECOND; + } else if (timestamp_type->unit() == arrow::TimeUnit::type::MICRO) { + return TimeType::MICROSECOND; + } + return TimeType::NANOSECOND; + } + + static inline Result> GetTypeFromPrecision( + int32_t precision, bool with_timezone) { + std::string timezone = with_timezone ? GetLocalTimezoneName() : ""; + if (precision == Timestamp::MIN_PRECISION) { + return arrow::timestamp(arrow::TimeUnit::type::SECOND, timezone); + } else if (precision == Timestamp::MILLIS_PRECISION) { + return arrow::timestamp(arrow::TimeUnit::type::MILLI, timezone); + } else if (precision == Timestamp::DEFAULT_PRECISION) { + return arrow::timestamp(arrow::TimeUnit::type::MICRO, timezone); + } else if (precision == Timestamp::MAX_PRECISION) { + return arrow::timestamp(arrow::TimeUnit::type::NANO, timezone); + } + return Status::Invalid("only support precision 0/3/6/9 in timestamp type"); + } + + static std::string GetLocalTimezoneName() { + // find local tz in env + const char* timezone = std::getenv("TZ"); + if (timezone != nullptr && *timezone != '\0') { + return std::string(timezone); + } + // find local tz in file + auto* tz = arrow_vendored::date::current_zone(); + return tz ? tz->name() : "UTC"; + } + + static std::string GetArrowTimeUnitStr(arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::SECOND: + return "SECOND"; + case arrow::TimeUnit::MILLI: + return "MILLISECOND"; + case arrow::TimeUnit::MICRO: + return "MICROSECOND"; + case arrow::TimeUnit::NANO: + return "NANOSECOND"; + default: + break; + } + return "UNKNOWN"; + } + + // there may be a precision loss for nano + static Result ToUTCTimestamp(const Timestamp& timestamp) { + int64_t micro_second = timestamp.ToMicrosecond(); + auto local_ts_scalar = + std::make_shared(micro_second, arrow::TimeUnit::MICRO); + arrow::compute::AssumeTimezoneOptions options(DateTimeUtils::GetLocalTimezoneName()); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( + arrow::Datum target_scalar, + arrow::compute::AssumeTimezone(arrow::Datum(local_ts_scalar), options)); + auto utc_ts_scalar = + std::dynamic_pointer_cast(target_scalar.scalar()); + auto [milli, nano] = DateTimeUtils::TimestampConverter( + *(static_cast(utc_ts_scalar->data())), + DateTimeUtils::TimeType::MICROSECOND, DateTimeUtils::TimeType::MILLISECOND, + DateTimeUtils::TimeType::NANOSECOND); + return Timestamp(milli, nano); + } +}; +} // namespace paimon diff --git a/src/paimon/common/utils/date_time_utils_test.cpp b/src/paimon/common/utils/date_time_utils_test.cpp new file mode 100644 index 0000000..e012172 --- /dev/null +++ b/src/paimon/common/utils/date_time_utils_test.cpp @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/date_time_utils.h" + +#include + +#include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" +#include "paimon/testing/utils/timezone_guard.h" + +namespace paimon::test { + +TEST(DateTimeUtilsTest, TestTimestampConverter) { + { + auto ret = DateTimeUtils::TimestampConverter( + 10L, DateTimeUtils::SECOND, DateTimeUtils::MILLISECOND, DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(10000L, 0L)); + } + { + auto ret = DateTimeUtils::TimestampConverter( + 10L, DateTimeUtils::SECOND, DateTimeUtils::NANOSECOND, DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(10000000000L, 0L)); + } + { + auto ret = DateTimeUtils::TimestampConverter( + 10L, DateTimeUtils::SECOND, DateTimeUtils::NANOSECOND, DateTimeUtils::SECOND); + ASSERT_EQ(ret, std::make_pair(10000000000L, 0L)); + } + + { + auto ret = DateTimeUtils::TimestampConverter( + 2567L, DateTimeUtils::MILLISECOND, DateTimeUtils::SECOND, DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(2L, 567000000L)); + } + { + auto ret = DateTimeUtils::TimestampConverter(2567L, DateTimeUtils::MILLISECOND, + DateTimeUtils::SECOND, DateTimeUtils::SECOND); + ASSERT_EQ(ret, std::make_pair(2L, 0L)); + } + { + auto ret = DateTimeUtils::TimestampConverter( + 2567L, DateTimeUtils::MILLISECOND, DateTimeUtils::MICROSECOND, DateTimeUtils::SECOND); + ASSERT_EQ(ret, std::make_pair(2567000L, 0L)); + } + { + auto ret = DateTimeUtils::TimestampConverter(2567L, DateTimeUtils::MILLISECOND, + DateTimeUtils::MICROSECOND, + DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(2567000L, 0L)); + } + + { + auto ret = DateTimeUtils::TimestampConverter(12345678L, DateTimeUtils::NANOSECOND, + DateTimeUtils::MILLISECOND, + DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(12L, 345678L)); + } + { + auto ret = DateTimeUtils::TimestampConverter(12345678L, DateTimeUtils::NANOSECOND, + DateTimeUtils::MILLISECOND, + DateTimeUtils::MICROSECOND); + ASSERT_EQ(ret, std::make_pair(12L, 345L)); + } + { + auto ret = + DateTimeUtils::TimestampConverter(12345678L, DateTimeUtils::NANOSECOND, + DateTimeUtils::MILLISECOND, DateTimeUtils::SECOND); + ASSERT_EQ(ret, std::make_pair(12L, 0L)); + } + { + auto ret = DateTimeUtils::TimestampConverter( + -2240521239998998999L, DateTimeUtils::NANOSECOND, DateTimeUtils::MILLISECOND, + DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(-2240521239999L, 1001L)); + } + { + // 9999-12-31 + auto ret = DateTimeUtils::TimestampConverter(253402252995L, DateTimeUtils::SECOND, + DateTimeUtils::MILLISECOND, + DateTimeUtils::NANOSECOND); + ASSERT_EQ(ret, std::make_pair(253402252995000L, 0L)); + } +} + +TEST(DateTimeUtilsTest, TestTimestampToInteger) { + { + ASSERT_EQ(DateTimeUtils::TimestampToInteger(Timestamp(1758173447000l, 0), + /*dst_type=*/DateTimeUtils::TimeType::SECOND), + 1758173447l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 0), + /*dst_type=*/DateTimeUtils::TimeType::MILLISECOND), + 1758173447001l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 1000), + /*dst_type=*/DateTimeUtils::TimeType::MICROSECOND), + 1758173447001001l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 1001), + /*dst_type=*/DateTimeUtils::TimeType::NANOSECOND), + 1758173447001001001l); + } + { + ASSERT_EQ(DateTimeUtils::TimestampToInteger(Timestamp(-2493033748000l, 0), + /*dst_type=*/DateTimeUtils::TimeType::SECOND), + -2493033748l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(-2493033748001l, 0), + /*dst_type=*/DateTimeUtils::TimeType::MILLISECOND), + -2493033748001l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(-2493033748001l, 1000), + /*dst_type=*/DateTimeUtils::TimeType::MICROSECOND), + -2493033748000999l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(-2493033748001l, 1001), + /*dst_type=*/DateTimeUtils::TimeType::NANOSECOND), + -2493033748000998999l); + } + + { + // 9999-12-31, cannot convert to nano second, which is overflow for int64 + ASSERT_EQ(DateTimeUtils::TimestampToInteger(Timestamp(253402252995000l, 0), + /*dst_type=*/DateTimeUtils::TimeType::SECOND), + 253402252995l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(253402252995001l, 0), + /*dst_type=*/DateTimeUtils::TimeType::MILLISECOND), + 253402252995001l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(253402252995001l, 1000), + /*dst_type=*/DateTimeUtils::TimeType::MICROSECOND), + 253402252995001001l); + } + { + // 0000-01-01, cannot convert to nano second, which is overflow for int64 + ASSERT_EQ(DateTimeUtils::TimestampToInteger(Timestamp(-62167219200000l, 0), + /*dst_type=*/DateTimeUtils::TimeType::SECOND), + -62167219200l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(-62167219200000l, 0), + /*dst_type=*/DateTimeUtils::TimeType::MILLISECOND), + -62167219200000l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(-62167219200000l, 1000), + /*dst_type=*/DateTimeUtils::TimeType::MICROSECOND), + -62167219199999999l); + } + + { + // test precision loss + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 1001), + /*dst_type=*/DateTimeUtils::TimeType::MICROSECOND), + 1758173447001001l); + ASSERT_EQ( + DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 1001), + /*dst_type=*/DateTimeUtils::TimeType::MILLISECOND), + 1758173447001l); + ASSERT_EQ(DateTimeUtils::TimestampToInteger(Timestamp(1758173447001l, 1001), + /*dst_type=*/DateTimeUtils::TimeType::SECOND), + 1758173447l); + } +} + +TEST(DateTimeUtilsTest, TestGetPrecisionFromType) { + auto ts_sec_type = arrow::timestamp(arrow::TimeUnit::type::SECOND); + auto ts_type = arrow::internal::checked_pointer_cast(ts_sec_type); + ASSERT_EQ(DateTimeUtils::GetPrecisionFromType(ts_type), 0); + + auto ts_milli_type = arrow::timestamp(arrow::TimeUnit::type::MILLI); + ts_type = arrow::internal::checked_pointer_cast(ts_milli_type); + ASSERT_EQ(DateTimeUtils::GetPrecisionFromType(ts_type), 3); + + auto ts_micro_type = arrow::timestamp(arrow::TimeUnit::type::MICRO); + ts_type = arrow::internal::checked_pointer_cast(ts_micro_type); + ASSERT_EQ(DateTimeUtils::GetPrecisionFromType(ts_type), 6); + + auto ts_nano_type = arrow::timestamp(arrow::TimeUnit::type::NANO); + ts_type = arrow::internal::checked_pointer_cast(ts_nano_type); + ASSERT_EQ(DateTimeUtils::GetPrecisionFromType(ts_type), 9); +} + +TEST(DateTimeUtilsTest, TestGetTimeTypeFromArrowType) { + auto ts_sec_type = arrow::timestamp(arrow::TimeUnit::type::SECOND); + auto ts_type = arrow::internal::checked_pointer_cast(ts_sec_type); + ASSERT_EQ(DateTimeUtils::GetTimeTypeFromArrowType(ts_type), DateTimeUtils::TimeType::SECOND); + + auto ts_milli_type = arrow::timestamp(arrow::TimeUnit::type::MILLI); + ts_type = arrow::internal::checked_pointer_cast(ts_milli_type); + ASSERT_EQ(DateTimeUtils::GetTimeTypeFromArrowType(ts_type), + DateTimeUtils::TimeType::MILLISECOND); + + auto ts_micro_type = arrow::timestamp(arrow::TimeUnit::type::MICRO); + ts_type = arrow::internal::checked_pointer_cast(ts_micro_type); + ASSERT_EQ(DateTimeUtils::GetTimeTypeFromArrowType(ts_type), + DateTimeUtils::TimeType::MICROSECOND); + + auto ts_nano_type = arrow::timestamp(arrow::TimeUnit::type::NANO); + ts_type = arrow::internal::checked_pointer_cast(ts_nano_type); + ASSERT_EQ(DateTimeUtils::GetTimeTypeFromArrowType(ts_type), + DateTimeUtils::TimeType::NANOSECOND); +} + +TEST(DateTimeUtilsTest, TestGetTypeFromPrecision) { + auto timezone = DateTimeUtils::GetLocalTimezoneName(); + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(0, /*with_timezone=*/false)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::SECOND))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(0, /*with_timezone=*/true)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::SECOND, timezone))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(3, /*with_timezone=*/false)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::MILLI))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(3, /*with_timezone=*/true)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::MILLI, timezone))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(6, /*with_timezone=*/false)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::MICRO))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(6, /*with_timezone=*/true)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::MICRO, timezone))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(9, /*with_timezone=*/false)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::NANO))); + } + { + ASSERT_OK_AND_ASSIGN(std::shared_ptr ts_type, + DateTimeUtils::GetTypeFromPrecision(9, /*with_timezone=*/true)); + ASSERT_TRUE(ts_type->Equals(arrow::timestamp(arrow::TimeUnit::type::NANO, timezone))); + } + { + ASSERT_NOK_WITH_MSG(DateTimeUtils::GetTypeFromPrecision(4, /*with_timezone=*/true), + "only support precision 0/3/6/9 in timestamp type"); + } +} + +TEST(DateTimeUtilsTest, TestGetLocalTimezoneName) { + std::string timezone = DateTimeUtils::GetLocalTimezoneName(); + { + TimezoneGuard guard("US/Hawaii"); + ASSERT_EQ(DateTimeUtils::GetLocalTimezoneName(), "US/Hawaii"); + } + ASSERT_EQ(DateTimeUtils::GetLocalTimezoneName(), timezone); +} + +TEST(DateTimeUtilsTest, TestGetCurrentLocalTimeUs) { + TimezoneGuard guard("Asia/Shanghai"); + uint64_t utc_ts = DateTimeUtils::GetCurrentUTCTimeUs(); + uint64_t local_ts = DateTimeUtils::GetCurrentLocalTimeUs().value(); + ASSERT_GT(local_ts, utc_ts); + ASSERT_GE(local_ts - utc_ts, 28800000000l); +} + +TEST(DateTimeUtilsTest, TestToLocalTimestamp) { + { + TimezoneGuard guard("Asia/Shanghai"); + ASSERT_OK_AND_ASSIGN(Timestamp timestamp, DateTimeUtils::ToLocalTimestamp( + Timestamp::FromEpochMillis(1700000000123L))); + ASSERT_EQ(timestamp, Timestamp::FromEpochMillis(1700028800123L)); + } + { + TimezoneGuard guard("UTC"); + ASSERT_OK_AND_ASSIGN(Timestamp timestamp, DateTimeUtils::ToLocalTimestamp( + Timestamp::FromEpochMillis(1700000000123L))); + ASSERT_EQ(timestamp, Timestamp::FromEpochMillis(1700000000123L)); + } +} + +TEST(DateTimeUtilsTest, TestGetCurrentLocalHour) { + int32_t shanghai_hour = 0; + int32_t utc_hour = 0; + { + TimezoneGuard guard("Asia/Shanghai"); + ASSERT_OK_AND_ASSIGN(shanghai_hour, DateTimeUtils::GetCurrentLocalHour()); + } + { + TimezoneGuard guard("UTC"); + ASSERT_OK_AND_ASSIGN(utc_hour, DateTimeUtils::GetCurrentLocalHour()); + } + ASSERT_EQ((shanghai_hour - utc_hour + 24) % 24, 8); +} + +TEST(DateTimeUtilsTest, TestToUTCTimestamp) { + TimezoneGuard guard("Asia/Shanghai"); + { + Timestamp ts(0, 0); + ASSERT_OK_AND_ASSIGN(Timestamp utc_ts, DateTimeUtils::ToUTCTimestamp(ts)); + ASSERT_EQ(utc_ts, Timestamp(-28800000l, 0)); + } + { + // test precision loss for nano + Timestamp ts(0, 500); + ASSERT_OK_AND_ASSIGN(Timestamp utc_ts, DateTimeUtils::ToUTCTimestamp(ts)); + ASSERT_EQ(utc_ts, Timestamp(-28800000l, 0)); + } +} +TEST(DateTimeUtilsTest, TestGetArrowTimeUnitStr) { + ASSERT_EQ(DateTimeUtils::GetArrowTimeUnitStr(arrow::TimeUnit::SECOND), "SECOND"); + ASSERT_EQ(DateTimeUtils::GetArrowTimeUnitStr(arrow::TimeUnit::MILLI), "MILLISECOND"); + ASSERT_EQ(DateTimeUtils::GetArrowTimeUnitStr(arrow::TimeUnit::MICRO), "MICROSECOND"); + ASSERT_EQ(DateTimeUtils::GetArrowTimeUnitStr(arrow::TimeUnit::NANO), "NANOSECOND"); +} + +} // namespace paimon::test diff --git a/src/paimon/common/utils/jsonizable.h b/src/paimon/common/utils/jsonizable.h new file mode 100644 index 0000000..9e2a395 --- /dev/null +++ b/src/paimon/common/utils/jsonizable.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "paimon/common/utils/rapidjson_util.h" +#include "paimon/result.h" +#include "paimon/status.h" +#include "rapidjson/allocators.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" + +namespace paimon { + +#define JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(Type) \ + friend class RapidJsonUtil; \ + friend class Jsonizable; \ + Type() = default; + +template +class Jsonizable { + public: + Jsonizable() = default; + virtual ~Jsonizable() = default; + + virtual rapidjson::Value ToJson(rapidjson::Document::AllocatorType* allocator) const + noexcept(false) = 0; + virtual void FromJson(const rapidjson::Value& obj) noexcept(false) = 0; + + Result ToJsonString() const { + std::string json_str; + PAIMON_RETURN_NOT_OK(RapidJsonUtil::ToJsonString(*this, &json_str)); + return json_str; + } + static Result FromJsonString(const std::string& json_str) { + Derived obj; + PAIMON_RETURN_NOT_OK(RapidJsonUtil::FromJsonString(json_str, &obj)); + return obj; + } +}; + +} // namespace paimon diff --git a/src/paimon/common/utils/jsonizable_test.cpp b/src/paimon/common/utils/jsonizable_test.cpp new file mode 100644 index 0000000..b5a5158 --- /dev/null +++ b/src/paimon/common/utils/jsonizable_test.cpp @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/jsonizable.h" + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" +#include "rapidjson/allocators.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" + +namespace paimon::test { + +TEST(JsonizableTest, TestNestedClass) { + class ClassA : public Jsonizable { + public: + bool operator==(const ClassA& other) const { + return vec_ == other.vec_ && string_ == other.string_ && map_ == other.map_; + } + rapidjson::Value ToJson(rapidjson::Document::AllocatorType* allocator) const + noexcept(false) override { + rapidjson::Value value(rapidjson::kObjectType); + value.AddMember("vec", RapidJsonUtil::SerializeValue(vec_, allocator).Move(), + *allocator); + value.AddMember("string", RapidJsonUtil::SerializeValue(string_, allocator).Move(), + *allocator); + value.AddMember("map_a", RapidJsonUtil::SerializeValue(map_, allocator).Move(), + *allocator); + return value; + } + void FromJson(const rapidjson::Value& value) noexcept(false) override { + vec_ = RapidJsonUtil::DeserializeKeyValue>(value, "vec", vec_); + string_ = RapidJsonUtil::DeserializeKeyValue(value, "string", string_); + map_ = RapidJsonUtil::DeserializeKeyValue>( + value, "map_a", map_); + } + + private: + JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(ClassA); + + std::vector vec_; + std::string string_; + std::map map_; + }; + + class ClassB : public Jsonizable { + public: + bool operator==(const ClassB& other) const { + return a_ == other.a_ && a_vec_ == other.a_vec_ && f_ == other.f_ && map_ == other.map_; + } + rapidjson::Value ToJson(rapidjson::Document::AllocatorType* allocator) const + noexcept(false) override { + rapidjson::Value obj(rapidjson::kObjectType); + obj.AddMember("ClassA", RapidJsonUtil::SerializeValue(a_, allocator).Move(), + *allocator); + obj.AddMember("ClassA_vec", RapidJsonUtil::SerializeValue(a_vec_, allocator).Move(), + *allocator); + obj.AddMember("float", RapidJsonUtil::SerializeValue(f_, allocator).Move(), *allocator); + obj.AddMember("map_b", RapidJsonUtil::SerializeValue(map_, allocator).Move(), + *allocator); + return obj; + } + void FromJson(const rapidjson::Value& obj) noexcept(false) override { + a_ = RapidJsonUtil::DeserializeKeyValue(obj, "ClassA", a_); + a_vec_ = + RapidJsonUtil::DeserializeKeyValue>(obj, "ClassA_vec", a_vec_); + f_ = RapidJsonUtil::DeserializeKeyValue(obj, "float", f_); + map_ = RapidJsonUtil::DeserializeKeyValue>>( + obj, "map_b", map_); + } + + private: + JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(ClassB); + + ClassA a_; + std::vector a_vec_; + float f_; + std::map> map_; + }; + + ClassA obj_a1, obj_a2; + obj_a1.vec_ = {11.0, 12.0, 13.0, 14.0}; + obj_a1.string_ = "string_value_1"; + obj_a1.map_ = {{"10", "a1"}, {"11", "b1"}, {"12", "c1"}}; + + obj_a2.vec_ = {21.0, 22.0, 23.0, 24.0}; + obj_a2.string_ = "string_value_2"; + obj_a2.map_ = {{"20", "a2"}, {"21", "b2"}, {"22", "c2"}}; + + ClassB obj_b; + obj_b.a_.vec_ = {1.0, 2.0, 3.0, 4.0}; + obj_b.a_.string_ = "string_value"; + obj_b.a_.map_ = {{"0", "a"}, {"1", "b"}, {"2", "c"}}; + + obj_b.a_vec_.push_back(obj_a1); + obj_b.a_vec_.push_back(obj_a2); + obj_b.f_ = 10.5; + obj_b.map_ = {{"aa", {0, 1}}, {"bb", {1, 2}}, {"cc", {2, 3}}}; + + ASSERT_OK_AND_ASSIGN(std::string json_str, obj_b.ToJsonString()); + ASSERT_OK_AND_ASSIGN(ClassB obj_b_2, ClassB::FromJsonString(json_str)); + ASSERT_EQ(obj_b, obj_b_2); + + // test invalid json_str + auto invalid_json_str = json_str.substr(0, json_str.length() / 2); + ASSERT_NOK_WITH_MSG(ClassB::FromJsonString(invalid_json_str), "deserialize failed"); +} + +TEST(JsonizableTest, TestUpgradeClass) { + class ClassA : public Jsonizable { + public: + bool operator==(const ClassA& other) const { + return vec_ == other.vec_ && string_ == other.string_; + } + + rapidjson::Value ToJson(rapidjson::Document::AllocatorType* allocator) const + noexcept(false) override { + rapidjson::Value value(rapidjson::kObjectType); + value.AddMember("vec", RapidJsonUtil::SerializeValue(vec_, allocator).Move(), + *allocator); + value.AddMember("string", RapidJsonUtil::SerializeValue(string_, allocator).Move(), + *allocator); + return value; + } + void FromJson(const rapidjson::Value& value) noexcept(false) override { + vec_ = RapidJsonUtil::DeserializeKeyValue>(value, "vec", vec_); + string_ = RapidJsonUtil::DeserializeKeyValue(value, "string", string_); + } + + private: + JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(ClassA); + + std::vector vec_; + std::string string_; + }; + + // modify vec_ from vector to vector + class NewClassA : public Jsonizable { + public: + rapidjson::Value ToJson(rapidjson::Document::AllocatorType* allocator) const + noexcept(false) override { + rapidjson::Value value(rapidjson::kObjectType); + value.AddMember("vec", RapidJsonUtil::SerializeValue(vec_, allocator).Move(), + *allocator); + value.AddMember("string", RapidJsonUtil::SerializeValue(string_, allocator).Move(), + *allocator); + return value; + } + void FromJson(const rapidjson::Value& value) noexcept(false) override { + vec_ = RapidJsonUtil::DeserializeKeyValue>(value, "vec", vec_); + string_ = RapidJsonUtil::DeserializeKeyValue(value, "string", string_); + } + + private: + JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(NewClassA); + + std::vector vec_; + std::string string_; + }; + + ClassA obj_a; + obj_a.vec_ = {1, 2, 3}; + obj_a.string_ = "abcd"; + + ASSERT_OK_AND_ASSIGN(std::string json_str, obj_a.ToJsonString()); + ASSERT_OK_AND_ASSIGN(ClassA obj_a_2, ClassA::FromJsonString(json_str)); + ASSERT_EQ(obj_a, obj_a_2); + + // test serialize with ClassA and deserialize with NewClassA + ASSERT_NOK_WITH_MSG(NewClassA::FromJsonString(json_str), "value must be string"); +} + +} // namespace paimon::test diff --git a/src/paimon/common/utils/options_utils.h b/src/paimon/common/utils/options_utils.h new file mode 100644 index 0000000..90b30b5 --- /dev/null +++ b/src/paimon/common/utils/options_utils.h @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fmt/format.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/result.h" +#include "paimon/status.h" + +namespace paimon { + +class OptionsUtils { + public: + template + using is_supported_type = + std::disjunction, std::is_same>; + + OptionsUtils() = delete; + ~OptionsUtils() = delete; + + template + static Result GetValueFromMap(const std::map& key_value_map, + const std::string& key, const T& default_value) { + auto value = GetValueFromMap(key_value_map, key); + if (value.ok()) { + return value.value(); + } else if (value.status().IsNotExist()) { + return default_value; + } + return value.status(); + } + + template + static Result GetValueFromMap(const std::map& key_value_map, + const std::string& key) { + static_assert(is_supported_type::value, "T must be trivially copyable or string"); + auto iter = key_value_map.find(key); + if (iter == key_value_map.end()) { + return Status::NotExist(fmt::format("key {} does not exist in map", key)); + } + const auto& value_str = iter->second; + std::optional value = StringUtils::StringToValue(value_str); + if (value == std::nullopt) { + return Status::Invalid(fmt::format("convert key {}, value {} to {} failed", key, + value_str, GetTypeName())); + } + return value.value(); + } + + /// Fetch options with specific prefix and remove prefix for key. + static std::map FetchOptionsWithPrefix( + const std::string& prefix, const std::map& options) { + std::map options_with_prefix; + int64_t prefix_len = prefix.size(); + for (const auto& [key, value] : options) { + if (StringUtils::StartsWith(key, prefix)) { + options_with_prefix[key.substr(prefix_len)] = value; + } + } + return options_with_prefix; + } + + template + static std::string GetTypeName() { + int32_t status; + char* demangled = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, &status); + if (status == 0) { + std::string result(demangled); + free(demangled); + return result; + } + assert(demangled == nullptr); + return typeid(T).name(); + } +}; +} // namespace paimon diff --git a/src/paimon/common/utils/options_utils_test.cpp b/src/paimon/common/utils/options_utils_test.cpp new file mode 100644 index 0000000..7a09a98 --- /dev/null +++ b/src/paimon/common/utils/options_utils_test.cpp @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/options_utils.h" + +#include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { +TEST(OptionsUtilsTest, TestGetValueFromMap) { + std::map key_value_map; + key_value_map["key_int"] = "10"; + key_value_map["key_bool"] = "true"; + key_value_map["key_int16"] = "100"; + key_value_map["key_double"] = "4.5E10"; + // invalid + key_value_map["key_bool2"] = "true1"; + key_value_map["key_int8"] = "500"; + key_value_map["key_int64"] = "ab"; + + ASSERT_OK_AND_ASSIGN(auto int32_value, + OptionsUtils::GetValueFromMap(key_value_map, "key_int")); + ASSERT_EQ(10, int32_value); + ASSERT_OK_AND_ASSIGN(auto bool_value, + OptionsUtils::GetValueFromMap(key_value_map, "key_bool")); + ASSERT_TRUE(bool_value); + ASSERT_OK_AND_ASSIGN(auto int16_value, + OptionsUtils::GetValueFromMap(key_value_map, "key_int16")); + ASSERT_EQ(100, int16_value); + ASSERT_OK_AND_ASSIGN(auto double_value, + OptionsUtils::GetValueFromMap(key_value_map, "key_double")); + ASSERT_NEAR(4.5E10, double_value, 0.00001); + ASSERT_NOK_WITH_MSG(OptionsUtils::GetValueFromMap(key_value_map, "key_bool2"), + "convert key key_bool2, value true1 to bool failed"); + ASSERT_NOK_WITH_MSG(OptionsUtils::GetValueFromMap(key_value_map, "key_int8"), + "convert key key_int8, value 500 to signed char failed"); + ASSERT_NOK_WITH_MSG(OptionsUtils::GetValueFromMap(key_value_map, "key_int64"), + "convert key key_int64, value ab to long failed"); + ASSERT_NOK_WITH_MSG(OptionsUtils::GetValueFromMap(key_value_map, "key_int64", 10), + "convert key key_int64, value ab to long failed"); + + ASSERT_OK_AND_ASSIGN( + auto nonexist, OptionsUtils::GetValueFromMap(key_value_map, "key_nonexist", 233)); + ASSERT_EQ(233, nonexist); + ASSERT_OK_AND_ASSIGN(auto empty, + OptionsUtils::GetValueFromMap(key_value_map, "", 999)); + ASSERT_EQ(999, empty); +} + +TEST(OptionsUtilsTest, TestFetchOptionsWithPrefix) { + std::map options = {{"key1", "value1"}, {"test.key2", "value2"}}; + auto new_options = OptionsUtils::FetchOptionsWithPrefix("test.", options); + std::map expected = {{"key2", "value2"}}; + ASSERT_EQ(expected, new_options); +} +} // namespace paimon::test diff --git a/src/paimon/common/utils/path_util.cpp b/src/paimon/common/utils/path_util.cpp new file mode 100644 index 0000000..a977ab8 --- /dev/null +++ b/src/paimon/common/utils/path_util.cpp @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/path_util.h" + +#include +#include +#include + +#include "fmt/format.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/common/utils/uuid.h" +#include "paimon/status.h" + +namespace paimon { +std::string Path::ToString() const { + std::string ret; + if (!scheme.empty()) { + ret = scheme + ":"; + } + if (!authority.empty()) { + ret += "//"; + ret += authority; + } + if (!path.empty()) { + ret += path; + } + return ret; +} + +std::string PathUtil::JoinPath(const std::string& path, const std::string& name) noexcept { + if (path.empty()) { + return name; + } + if (name.empty()) { + return path; + } + int32_t slash_cnt = (*(path.rbegin()) == '/') + (*(name.begin()) == '/'); + if (!slash_cnt) { + return path + "/" + name; + } else if (slash_cnt == 2) { + return path + name.substr(1); + } + return path + name; +} + +std::string PathUtil::NormalizeInnerPath(const std::string& path) noexcept { + if (path.empty()) { + return path; + } + std::string ret; + ret.reserve(path.size()); + char last_char = path[0]; + ret.append(1, last_char); + for (size_t i = 1; i < path.size(); ++i) { + if (last_char == '/' && path[i] == '/') { + continue; + } + last_char = path[i]; + ret.append(1, last_char); + } + TrimLastDelim(&ret); + return ret; +} + +Result PathUtil::NormalizePath(const std::string& path_str) noexcept { + PAIMON_ASSIGN_OR_RAISE(Path path, ToPath(path_str)); + return path.ToString(); +} + +Result PathUtil::ToPath(const std::string& path) noexcept { + // TODO(yonghao.fyh): support Windows Driver + if (path.empty()) { + return Status::Invalid("path is an empty string."); + } + std::string scheme; + std::string authority; + int32_t start = 0; + + // parse scheme + auto colon = path.find(':'); + auto slash = path.find('/'); + if ((colon != std::string::npos) && (slash == std::string::npos || colon < slash)) { + // has a scheme + scheme.append(path, 0, colon); + start = colon + 1; + } + + // parse authority + if (StringUtils::StartsWith(path, "//", start) && (path.length() - start > 2)) { + // has authority + int32_t next_slash = path.find('/', start + 2); + int32_t auth_end = next_slash > 0 ? next_slash : path.length(); + authority = path.substr(start + 2, auth_end - start - 2); + start = auth_end; + } + + // parse path in uri + std::string inner_path = NormalizeInnerPath(path.substr(start)); + return Path(scheme, authority, inner_path); +} + +std::string PathUtil::GetParentDirPath(const std::string& path) noexcept { + std::string::const_reverse_iterator it; + for (it = path.rbegin(); it != path.rend() && *it == '/'; it++) { + } + for (; it != path.rend() && *it != '/'; it++) { + } + for (; it != path.rend() && *it == '/'; it++) { + } + return path.substr(0, path.rend() - it); +} + +std::string PathUtil::GetName(const std::string& path) noexcept { + std::string dir_path = path; + TrimLastDelim(&dir_path); + std::string::const_reverse_iterator it; + for (it = dir_path.rbegin(); it != dir_path.rend() && *it != '/'; it++) { + } + return dir_path.substr(dir_path.rend() - it); +} + +void PathUtil::TrimLastDelim(std::string* dir_path) noexcept { + if (dir_path == nullptr || dir_path->empty()) { + return; + } + if (dir_path->length() > 1 && *(dir_path->rbegin()) == '/') { + dir_path->erase(dir_path->size() - 1, 1); + } +} + +Result PathUtil::CreateTempPath(const std::string& path) noexcept { + std::string uuid; + if (!UUID::Generate(&uuid)) { + return Status::Invalid("generate uuid failed"); + } + return JoinPath(GetParentDirPath(path), fmt::format(".{}.{}.tmp", GetName(path), uuid)); +} + +} // namespace paimon diff --git a/src/paimon/common/utils/path_util.h b/src/paimon/common/utils/path_util.h new file mode 100644 index 0000000..b893e76 --- /dev/null +++ b/src/paimon/common/utils/path_util.h @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "paimon/result.h" +#include "paimon/visibility.h" + +namespace paimon { +struct PAIMON_EXPORT Path { + Path(const std::string& _scheme, const std::string& _authority, const std::string& _path) + : scheme(_scheme), authority(_authority), path(_path) {} + + std::string ToString() const; + + std::string scheme; + std::string authority; + std::string path; +}; + +class PAIMON_EXPORT PathUtil { + public: + PathUtil() = delete; + ~PathUtil() = delete; + + static std::string JoinPath(const std::string& path, const std::string& name) noexcept; + // TODO(jinli.zjw): should pass `Path.path` and normalize; otherwise if path is + // "oss://bucket1/", GetParentDirPath will return "oss:" + static std::string GetParentDirPath(const std::string& path) noexcept; + static std::string GetName(const std::string& path) noexcept; + static void TrimLastDelim(std::string* dir_path) noexcept; + static Result CreateTempPath(const std::string& path) noexcept; + static Result ToPath(const std::string& path) noexcept; + static Result NormalizePath(const std::string& path) noexcept; + + private: + static std::string NormalizeInnerPath(const std::string& path) noexcept; +}; + +} // namespace paimon diff --git a/src/paimon/common/utils/path_util_test.cpp b/src/paimon/common/utils/path_util_test.cpp new file mode 100644 index 0000000..8e7ed4c --- /dev/null +++ b/src/paimon/common/utils/path_util_test.cpp @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/path_util.h" + +#include "gtest/gtest.h" +#include "paimon/common/utils/string_utils.h" +#include "paimon/status.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +TEST(PathUtilsTest, TestJoinPath) { + ASSERT_EQ("/tmp/test_path/test", PathUtil::JoinPath("/tmp/test_path/test", "")); +} + +TEST(PathUtilsTest, TestGetParentDirPath) { + ASSERT_EQ("/tmp/test_path", PathUtil::GetParentDirPath("/tmp/test_path/test")); + ASSERT_EQ("/tmp/test_path", PathUtil::GetParentDirPath("/tmp/test_path/test/")); +} + +TEST(PathUtilsTest, TestNormalizePathWithEmptyString) { + std::string test_path = ""; + ASSERT_NOK_WITH_MSG(PathUtil::NormalizePath(test_path), "path is an empty string."); +} + +TEST(PathUtilsTest, TestNormalizePathWithNoScheme) { + std::string test_path = "//tmp////index"; + ASSERT_OK_AND_ASSIGN(std::string normalize_path, PathUtil::NormalizePath(test_path)); + std::string expected_path = "//tmp/index"; + ASSERT_EQ(normalize_path, expected_path); +} + +TEST(PathUtilsTest, TestNormalizePath) { + { + // test with no authority + std::string test_path = "hdfs:///tmp/test_path/test_subdir"; + ASSERT_OK_AND_ASSIGN(std::string normalize_path, PathUtil::NormalizePath(test_path)); + std::string expected_path = "hdfs:/tmp/test_path/test_subdir"; + ASSERT_EQ(normalize_path, expected_path); + } + { + // test with authority + std::string test_path = "hdfs://tmp/test_path//test_subdir/"; + ASSERT_OK_AND_ASSIGN(std::string normalize_path, PathUtil::NormalizePath(test_path)); + std::string expected_path = "hdfs://tmp/test_path/test_subdir"; + ASSERT_EQ(normalize_path, expected_path); + } + { + // test with no authority + std::string test_path = "hdfs:///tmp/test_path//test_subdir/"; + ASSERT_OK_AND_ASSIGN(std::string normalize_path, PathUtil::NormalizePath(test_path)); + std::string expected_path = "hdfs:/tmp/test_path/test_subdir"; + ASSERT_EQ(normalize_path, expected_path); + } +} + +TEST(PathUtilsTest, TestTrimLastDelim) { + { + std::string path = "hdfs://auth/test_path/test_subdir"; + PathUtil::TrimLastDelim(&path); + ASSERT_EQ(path, "hdfs://auth/test_path/test_subdir"); + } + { + std::string path = "hdfs://auth/test_path/test_subdir/"; + PathUtil::TrimLastDelim(&path); + ASSERT_EQ(path, "hdfs://auth/test_path/test_subdir"); + } + { + std::string path = "/"; + PathUtil::TrimLastDelim(&path); + ASSERT_EQ(path, "/"); + } + { + std::string path = ""; + PathUtil::TrimLastDelim(&path); + ASSERT_EQ(path, ""); + } +} + +TEST(PathUtilsTest, TestToPath) { + { + std::string test_path = ""; + ASSERT_NOK_WITH_MSG(PathUtil::ToPath(test_path), "path is an empty string."); + } + { + std::string test_path = "FILE:///tmp"; + ASSERT_OK_AND_ASSIGN(Path path, PathUtil::ToPath(test_path)); + ASSERT_EQ(path.scheme, "FILE"); + ASSERT_EQ(path.authority, ""); + ASSERT_EQ(path.path, "/tmp"); + ASSERT_EQ(path.ToString(), "FILE:/tmp"); + } + { + std::string test_path = "dfs://tmp/index"; + ASSERT_OK_AND_ASSIGN(Path path, PathUtil::ToPath(test_path)); + ASSERT_EQ(path.scheme, "dfs"); + ASSERT_EQ(path.authority, "tmp"); + ASSERT_EQ(path.path, "/index"); + ASSERT_EQ(path.ToString(), "dfs://tmp/index"); + } + { + std::string test_path = "http://example.com:8080/api"; + ASSERT_OK_AND_ASSIGN(Path path, PathUtil::ToPath(test_path)); + ASSERT_EQ(path.scheme, "http"); + ASSERT_EQ(path.authority, "example.com:8080"); + ASSERT_EQ(path.path, "/api"); + ASSERT_EQ(path.ToString(), "http://example.com:8080/api"); + } + { + std::string test_path = "/tmp/index"; + ASSERT_OK_AND_ASSIGN(Path path, PathUtil::ToPath(test_path)); + ASSERT_EQ(path.scheme, ""); + ASSERT_EQ(path.authority, ""); + ASSERT_EQ(path.path, "/tmp/index"); + ASSERT_EQ(path.ToString(), "/tmp/index"); + } +} + +TEST(PathUtilsTest, TestGetName) { + ASSERT_EQ("test", PathUtil::GetName("hdfs://tmp/test_path/test/")); + ASSERT_EQ("test", PathUtil::GetName("hdfs://tmp/test_path/test")); + ASSERT_EQ("test", PathUtil::GetName("test")); +} + +TEST(PathUtilsTest, TestCreateTempPath) { + // tmp path: hdfs://tmp/test_path/.test..tmp; + ASSERT_OK_AND_ASSIGN(std::string tmp_path, + PathUtil::CreateTempPath("hdfs://tmp/test_path/test")); + ASSERT_EQ("hdfs://tmp/test_path", PathUtil::GetParentDirPath(tmp_path)); + auto tmp_name = PathUtil::GetName(tmp_path); + ASSERT_TRUE(StringUtils::StartsWith(tmp_name, ".test.")); + ASSERT_TRUE(StringUtils::EndsWith(tmp_name, ".tmp")); +} + +} // namespace paimon::test diff --git a/src/paimon/common/utils/rapidjson_util.h b/src/paimon/common/utils/rapidjson_util.h new file mode 100644 index 0000000..d413f25 --- /dev/null +++ b/src/paimon/common/utils/rapidjson_util.h @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "paimon/common/utils/string_utils.h" +#include "paimon/status.h" +#include "paimon/traits.h" +#include "rapidjson/allocators.h" +#include "rapidjson/document.h" +#include "rapidjson/encodings.h" +#include "rapidjson/prettywriter.h" +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" + +namespace paimon { + +class RapidJsonUtil { + public: + RapidJsonUtil() = delete; + ~RapidJsonUtil() = delete; + + // if T is custom type, T must have ToJson() + template + static inline Status ToJsonString(const T& obj, std::string* json_str) { + rapidjson::Document doc; + rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); + rapidjson::Value value; + try { + if constexpr (is_pointer::value) { + value = obj->ToJson(&allocator); + } else if constexpr (std::is_same_v>) { + *json_str = MapToJsonString(obj); + return Status::OK(); + } else { + value = obj.ToJson(&allocator); + } + } catch (const std::invalid_argument& e) { + return Status::Invalid("json serialize failed:", e.what()); + } catch (...) { + return Status::Invalid("json serialize failed, unknown error"); + } + if (!ToJson(value, json_str)) { + return Status::Invalid("serialize failed"); + } + return Status::OK(); + } + + // if T is custom type, T must have FromJson() + template + static inline Status FromJsonString(const std::string& json_str, T* obj) { + if (!obj) { + return Status::Invalid("deserialize failed: obj is nullptr"); + } + if constexpr (std::is_same_v>) { + PAIMON_ASSIGN_OR_RAISE(*obj, MapFromJsonString(json_str)); + } else { + rapidjson::Document doc; + if (!FromJson(json_str, &doc)) { + return Status::Invalid("deserialize failed: ", json_str); + } + try { + obj->FromJson(doc); + } catch (const std::invalid_argument& e) { + return Status::Invalid("deserialize failed, possibly type incompatible: ", + e.what()); + } catch (...) { + return Status::Invalid("deserialize failed, reason unknown: ", json_str); + } + } + return Status::OK(); + } + + // if T is std::nullopt, will use rapid_json null_value + template + static rapidjson::Value SerializeValue(const T& obj, + rapidjson::Document::AllocatorType* allocator); + + // condition1 : has key & value -> return value + // condition2 : no key or null value -> return default value + template + static T DeserializeKeyValue(const rapidjson::Value& value, const std::string& key, + const T& default_value); + + // condition1 : has key & value -> return value + // condition2 : no key or null value, T is optional -> return std::nullopt + // condition3 : no key or null value, T is not optional -> throw exception + template + static T DeserializeKeyValue(const rapidjson::Value& value, const std::string& key); + + template + static T DeserializeValue(const rapidjson::Value& value); + + private: + static inline bool ToJson(const rapidjson::Value& value, std::string* json_str) { + assert(json_str); + rapidjson::StringBuffer buffer; + rapidjson::PrettyWriter writer(buffer); + if (!value.Accept(writer)) { + return false; + } + *json_str = buffer.GetString(); + return true; + } + + static inline bool FromJson(const std::string& json_str, rapidjson::Document* doc) { + doc->Parse(json_str.c_str()); + if (doc->HasParseError()) { + return false; + } + return true; + } + + template + static rapidjson::Value SerializeMap(const T& map, + rapidjson::Document::AllocatorType* allocator); + + template + static rapidjson::Value SerializeVector(const T& vec, + rapidjson::Document::AllocatorType* allocator); + + template + static T DeserializeVector(const rapidjson::Value& value); + + template + static T DeserializeMap(const rapidjson::Value& value); + + template + static T GetValue(const rapidjson::Value& value); + + static std::string MapToJsonString(const std::map& map) { + rapidjson::Document d; + d.SetObject(); + rapidjson::Document::AllocatorType& allocator = d.GetAllocator(); + + for (const auto& kv : map) { + d.AddMember(rapidjson::Value(kv.first.c_str(), allocator), + rapidjson::Value(kv.second.c_str(), allocator), allocator); + } + + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + d.Accept(writer); + + return buffer.GetString(); + } + static Result> MapFromJsonString( + const std::string& json_str) { + rapidjson::Document doc; + doc.Parse(json_str.c_str()); + if (doc.HasParseError() || !doc.IsObject()) { + return Status::Invalid("deserialize failed: parse error or not JSON object: ", + json_str); + } + + std::map result; + for (auto it = doc.MemberBegin(); it != doc.MemberEnd(); ++it) { + if (!it->name.IsString() || !it->value.IsString()) { + return Status::Invalid( + "deserialize failed: non-string key or value in JSON object: ", json_str); + } + result[it->name.GetString()] = it->value.GetString(); + } + return result; + } +}; + +template +inline rapidjson::Value RapidJsonUtil::SerializeValue( + const T& obj, rapidjson::Document::AllocatorType* allocator) { + if constexpr (is_optional::value) { + if (obj == std::nullopt) { + rapidjson::Value null_value; + null_value.SetNull(); + return null_value; + } else { + return SerializeValue(obj.value(), allocator); + } + } else { + if constexpr (std::is_same_v) { + return rapidjson::Value(obj.c_str(), *allocator); + } else if constexpr (std::is_arithmetic_v) { + return rapidjson::Value(obj); + } else if constexpr (is_map::value) { + return SerializeMap(obj, allocator); + } else if constexpr (is_vector::value) { + return SerializeVector(obj, allocator); + } else { + // custom type + return obj.ToJson(allocator); + } + } +} + +template +inline rapidjson::Value RapidJsonUtil::SerializeMap(const T& map, + rapidjson::Document::AllocatorType* allocator) { + rapidjson::Value value(rapidjson::kObjectType); + using K = typename T::key_type; + for (const auto& kv : map) { + rapidjson::Value key; + if constexpr (std::is_same_v) { + key = SerializeValue(kv.first, allocator); + } else if constexpr (std::is_same_v) { + throw std::invalid_argument("map key cannot be bool"); + } else { + std::string key_str = std::to_string(kv.first); + key = SerializeValue(key_str, allocator); + } + rapidjson::Value val = SerializeValue(kv.second, allocator); + value.AddMember(key, val, *allocator); + } + return value; +} + +template +inline rapidjson::Value RapidJsonUtil::SerializeVector( + const T& vec, rapidjson::Document::AllocatorType* allocator) { + rapidjson::Value value(rapidjson::kArrayType); + using V = typename T::value_type; + for (const V& item : vec) { + value.PushBack(SerializeValue(item, allocator), *allocator); + } + return value; +} + +template +inline T RapidJsonUtil::DeserializeKeyValue(const rapidjson::Value& value, const std::string& key) { + if (!value.IsObject()) { + throw std::invalid_argument("value must be an object"); + } + if constexpr (is_optional::value) { + if (!value.HasMember(key.c_str()) || value[key].IsNull()) { + return std::nullopt; + } else { + return DeserializeValue(value[key]); + } + } else { + if (!value.HasMember(key.c_str()) || value[key].IsNull()) { + throw std::invalid_argument("key must exist"); + } + return DeserializeValue(value[key]); + } +} + +template +inline T RapidJsonUtil::DeserializeKeyValue(const rapidjson::Value& value, const std::string& key, + const T& default_value) { + if (!value.IsObject()) { + throw std::invalid_argument("value must be an object"); + } + if (!value.HasMember(key.c_str()) || value[key].IsNull()) { + return default_value; + } + if constexpr (is_optional::value) { + return DeserializeValue(value[key]); + } else { + return DeserializeValue(value[key]); + } +} + +template +inline T RapidJsonUtil::DeserializeValue(const rapidjson::Value& value) { + if constexpr (is_vector::value) { + return DeserializeVector(value); + } else if constexpr (is_map::value) { + return DeserializeMap(value); + } else { + // arithmetic or string or custom type + return GetValue(value); + } +} + +template +inline T RapidJsonUtil::DeserializeVector(const rapidjson::Value& value) { + if (!value.IsArray()) { + throw std::invalid_argument("value must be an array"); + } + T obj; + obj.reserve(value.Size()); + using V = typename T::value_type; + for (const auto& item : value.GetArray()) { + obj.push_back(DeserializeValue(item)); + } + return obj; +} + +template +inline T RapidJsonUtil::DeserializeMap(const rapidjson::Value& value) { + if (!value.IsObject()) { + throw std::invalid_argument("value must be an object"); + } + using K = typename T::key_type; + using V = typename T::mapped_type; + T obj; + for (auto it = value.MemberBegin(); it != value.MemberEnd(); ++it) { + K key; + if constexpr (std::is_same_v) { + key = DeserializeValue(it->name); + } else { + auto key_str = DeserializeValue(it->name); + auto optional_key = StringUtils::StringToValue(key_str); + if (!optional_key) { + throw std::invalid_argument("key cannot be parse from string"); + } + key = optional_key.value(); + } + obj.emplace(key, DeserializeValue(it->value)); + } + return obj; +} + +template <> +inline bool RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsBool()) { + throw std::invalid_argument("value must be bool"); + } + return value.GetBool(); +} + +template <> +inline int8_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsInt()) { + throw std::invalid_argument("value must be int"); + } + return static_cast(value.GetInt()); +} + +template <> +inline uint8_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsUint()) { + throw std::invalid_argument("value must be uint"); + } + return static_cast(value.GetUint()); +} + +template <> +inline int16_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsInt()) { + throw std::invalid_argument("value must be int"); + } + return static_cast(value.GetInt()); +} + +template <> +inline uint16_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsUint()) { + throw std::invalid_argument("value must be uint"); + } + return static_cast(value.GetUint()); +} + +template <> +inline int32_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsInt()) { + throw std::invalid_argument("value must be int"); + } + return value.GetInt(); +} + +template <> +inline uint32_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsUint()) { + throw std::invalid_argument("value must be uint"); + } + return value.GetUint(); +} + +template <> +inline int64_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsInt64()) { + throw std::invalid_argument("value must be int64"); + } + return value.GetInt64(); +} + +template <> +inline uint64_t RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsUint64()) { + throw std::invalid_argument("value must be uint64"); + } + return value.GetUint64(); +} + +template <> +inline double RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsDouble()) { + throw std::invalid_argument("value must be double"); + } + return value.GetDouble(); +} + +template <> +inline float RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsDouble()) { + throw std::invalid_argument("value must be double"); + } + return static_cast(value.GetDouble()); +} + +template <> +inline std::string RapidJsonUtil::GetValue(const rapidjson::Value& value) { + if (!value.IsString()) { + throw std::invalid_argument("value must be string"); + } + return std::string(value.GetString(), value.GetStringLength()); +} + +template +inline T RapidJsonUtil::GetValue(const rapidjson::Value& value) { + // custom type + T obj; + obj.FromJson(value); + return obj; +} + +} // namespace paimon diff --git a/src/paimon/common/utils/rapidjson_util_test.cpp b/src/paimon/common/utils/rapidjson_util_test.cpp new file mode 100644 index 0000000..c5011d9 --- /dev/null +++ b/src/paimon/common/utils/rapidjson_util_test.cpp @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/rapidjson_util.h" + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "paimon/testing/utils/testharness.h" +#include "rapidjson/allocators.h" +#include "rapidjson/document.h" +#include "rapidjson/rapidjson.h" + +namespace paimon::test { + +TEST(RapidJsonUtilTest, TestSerializeAndDeserialize) { + // serialize + rapidjson::Document doc; + doc.SetObject(); + rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); + // string + std::string str_value = "John"; + doc.AddMember("name", RapidJsonUtil::SerializeValue(str_value, &allocator).Move(), allocator); + // int + int32_t int_value = 30; + doc.AddMember("age", RapidJsonUtil::SerializeValue(int_value, &allocator).Move(), allocator); + // vector + std::vector vector_value = {7, 12}; + doc.AddMember("vector_value", RapidJsonUtil::SerializeValue(vector_value, &allocator).Move(), + allocator); + // map + std::map map_value = {{"a", 0.2}, {"b", 1.2}}; + doc.AddMember("map_value", RapidJsonUtil::SerializeValue(map_value, &allocator).Move(), + allocator); + + // vector of vector + std::vector> vector_of_vector = {{7, 12}, {27, 45}}; + doc.AddMember("vector_of_vector", + RapidJsonUtil::SerializeValue(vector_of_vector, &allocator).Move(), allocator); + + // vector of map + std::vector> vector_of_map = {{{{"a", 0.2}, {"b", 1.2}}}, + {{"c", 2.2}, {"d", 3.2}}}; + doc.AddMember("vector_of_map", RapidJsonUtil::SerializeValue(vector_of_map, &allocator).Move(), + allocator); + + // map of vector + std::map> map_of_vector = {{"aa", {7, 12}}, {"bb", {27, 45}}}; + doc.AddMember("map_of_vector", RapidJsonUtil::SerializeValue(map_of_vector, &allocator).Move(), + allocator); + + std::optional null_value; + doc.AddMember("null_value", RapidJsonUtil::SerializeValue(null_value, &allocator).Move(), + allocator); + + std::optional optional_value("abcd"); + doc.AddMember("optional_value", + RapidJsonUtil::SerializeValue(optional_value, &allocator).Move(), allocator); + + // map with int key (not string key, will convert in util) + std::map map_with_int_key = {{100, 1000}, {200, 2000}}; + doc.AddMember("map_with_int_key", + RapidJsonUtil::SerializeValue(map_with_int_key, &allocator).Move(), allocator); + + std::string jsonStr; + ASSERT_TRUE(RapidJsonUtil::ToJson(doc, &jsonStr)); + + // deserialize + rapidjson::Document doc2; + ASSERT_TRUE(RapidJsonUtil::FromJson(jsonStr, &doc2)); + + ASSERT_EQ(str_value, RapidJsonUtil::DeserializeKeyValue(doc2, "name", "")); + ASSERT_EQ(int_value, RapidJsonUtil::DeserializeKeyValue(doc2, "age", -1)); + ASSERT_EQ(vector_value, + RapidJsonUtil::DeserializeKeyValue>(doc2, "vector_value", {})); + + auto de_map_value = + RapidJsonUtil::DeserializeKeyValue>(doc2, "map_value", {}); + ASSERT_EQ(map_value, de_map_value); + + auto de_vector_of_vector = RapidJsonUtil::DeserializeKeyValue>>( + doc2, "vector_of_vector", {}); + ASSERT_EQ(vector_of_vector, de_vector_of_vector); + + auto de_vector_of_map = + RapidJsonUtil::DeserializeKeyValue>>( + doc2, "vector_of_map", {}); + ASSERT_EQ(vector_of_map, de_vector_of_map); + + auto de_map_of_vector = + RapidJsonUtil::DeserializeKeyValue>>( + doc2, "map_of_vector", {}); + ASSERT_EQ(map_of_vector, de_map_of_vector); + + auto de_null_value = + RapidJsonUtil::DeserializeKeyValue>(doc2, "null_value"); + ASSERT_EQ(null_value, de_null_value); + + auto de_null_value_with_default = RapidJsonUtil::DeserializeKeyValue>( + doc2, "null_value", /*default_value=*/std::optional(2333)); + ASSERT_EQ(2333, de_null_value_with_default.value()); + + auto de_optional_value = + RapidJsonUtil::DeserializeKeyValue>(doc2, "optional_value"); + ASSERT_EQ(optional_value, de_optional_value); + + auto de_map_with_int_key = RapidJsonUtil::DeserializeKeyValue>( + doc2, "map_with_int_key", {}); + ASSERT_EQ(map_with_int_key, de_map_with_int_key); + + // test non exist key, will use default value + double non_exist_value = 0.0; + non_exist_value = RapidJsonUtil::DeserializeKeyValue(doc2, "non_exist_key", 2.333); + ASSERT_EQ(2.333, non_exist_value); +} + +TEST(RapidJsonUtilTest, TestMapJsonString) { + std::map m1 = {{"key1", "value1"}, {"key2", "value2"}}; + std::string result; + ASSERT_OK(RapidJsonUtil::ToJsonString(m1, &result)); + ASSERT_EQ(result, "{\"key1\":\"value1\",\"key2\":\"value2\"}"); + + std::map m2; + ASSERT_OK(RapidJsonUtil::FromJsonString(result, &m2)); + ASSERT_EQ(m1, m2); +} + +} // namespace paimon::test diff --git a/src/paimon/common/utils/string_utils.cpp b/src/paimon/common/utils/string_utils.cpp new file mode 100644 index 0000000..1cc9045 --- /dev/null +++ b/src/paimon/common/utils/string_utils.cpp @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/string_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "fmt/format.h" +#include "paimon/status.h" + +namespace paimon { +std::string StringUtils::Replace(const std::string& text, const std::string& search_string, + const std::string& replacement, int32_t max) { + std::string str = text; + size_t pos = str.find(search_string); + int32_t count = 0; + while (pos != std::string::npos && (count < max || max == -1)) { + str.replace(pos, search_string.size(), replacement); + pos = str.find(search_string, pos + replacement.size()); + count++; + } + return str; +} + +std::string StringUtils::ReplaceLast(const std::string& text, const std::string& old_str, + const std::string& new_str) { + std::string str = text; + size_t pos = str.rfind(old_str); + if (pos != std::string::npos) { + str.replace(pos, old_str.size(), new_str); + } + return str; +} + +bool StringUtils::StartsWith(const std::string& str, const std::string& prefix, size_t start_pos) { + return (str.size() >= prefix.size()) && (str.compare(start_pos, prefix.size(), prefix) == 0); +} +bool StringUtils::EndsWith(const std::string& str, const std::string& suffix) { + size_t s1 = str.size(); + size_t s2 = suffix.size(); + return (s1 >= s2) && (str.compare(s1 - s2, s2, suffix) == 0); +} +bool StringUtils::IsNullOrWhitespaceOnly(const std::string& str) { + if (str.empty()) { + return true; + } + for (char c : str) { + if (!std::isspace(static_cast(c))) { + return false; + } + } + return true; +} + +void StringUtils::Trim(std::string* str) { + str->erase(str->find_last_not_of(' ') + 1); + str->erase(0, str->find_first_not_of(' ')); +} + +std::string StringUtils::ToLowerCase(const std::string& str) { + std::string result; + result.reserve(str.length()); + std::transform(str.begin(), str.end(), std::back_inserter(result), + [](unsigned char c) { return std::tolower(c); }); + return result; +} + +std::string StringUtils::ToUpperCase(const std::string& str) { + std::string result; + result.reserve(str.length()); + std::transform(str.begin(), str.end(), std::back_inserter(result), + [](unsigned char c) { return std::toupper(c); }); + return result; +} + +std::vector StringUtils::Split(const std::string& text, const std::string& sep_str, + bool ignore_empty) { + std::vector vec; + if (sep_str.empty()) { + // invalid case, do not split. + vec.emplace_back(text); + return vec; + } + size_t n = 0, old = 0; + while (n != std::string::npos) { + n = text.find(sep_str, n); + if (n != std::string::npos) { + if (!ignore_empty || n != old) { + vec.emplace_back(text.substr(old, n - old)); + } + n += sep_str.length(); + old = n; + } + } + + if (!ignore_empty || old < text.length()) { + vec.emplace_back(text.substr(old, text.length() - old)); + } + return vec; +} + +std::vector> StringUtils::Split(const std::string& text, + const std::string& delim1, + const std::string& delim2) { + std::vector> result; + std::vector split_parts = Split(text, delim1); + result.reserve(split_parts.size()); + for (auto& part : split_parts) { + result.emplace_back(Split(part, delim2)); + } + return result; +} + +Result StringUtils::StringToDate(const std::string& str) { + auto int_value = StringToValue(str); + if (int_value) { + return int_value.value(); + } + std::tm timeinfo = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, nullptr}; + std::istringstream ss(str); + ss >> std::get_time(&timeinfo, "%Y-%m-%d"); + if (ss.fail()) { + return Status::Invalid(fmt::format("failed to convert string '{}' to date", str)); + } + std::time_t time = timegm(&timeinfo); + if (time == -1) { + return Status::Invalid(fmt::format("failed to convert string '{}' to date", str)); + } + static const int64_t SECONDS_PER_DAY = 86400l; // = 24 * 60 * 60 + return time / SECONDS_PER_DAY; +} + +/// Parses a timestamp string into unix milliseconds. +/// Supported formats: "yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss.SSS". +/// Uses the default local time zone, consistent with Java Paimon behavior. +Result StringUtils::StringToTimestampMillis(const std::string& str) { + std::tm timeinfo{}; + timeinfo.tm_isdst = -1; + + // Try "yyyy-MM-dd HH:mm:ss" first (also matches "yyyy-MM-dd HH:mm:ss.SSS") + std::istringstream ss(str); + ss >> std::get_time(&timeinfo, "%Y-%m-%d %H:%M:%S"); + int32_t millis_part = 0; + + if (!ss.fail()) { + // Check for optional fractional seconds ".SSS" + if (ss.peek() == '.') { + ss.get(); + std::string frac; + while (frac.size() < 3 && ss.peek() != std::char_traits::eof() && + std::isdigit(static_cast(ss.peek()))) { + frac += static_cast(ss.get()); + } + if (frac.empty()) { + return Status::Invalid( + fmt::format("failed to convert string '{}' to timestamp, " + "expected digits after '.'", + str)); + } + // Pad to 3 digits: "1" -> 100, "12" -> 120, "123" -> 123 + while (frac.size() < 3) { + frac += '0'; + } + auto parsed = StringToValue(frac); + if (parsed) { + millis_part = parsed.value(); + } + } + } else { + // Fall back to "yyyy-MM-dd" (date only, time defaults to 00:00:00) + ss.clear(); + ss.str(str); + timeinfo = std::tm{}; + timeinfo.tm_isdst = -1; + ss >> std::get_time(&timeinfo, "%Y-%m-%d"); + if (ss.fail()) { + return Status::Invalid( + fmt::format("failed to convert string '{}' to timestamp, " + "supported formats: yyyy-MM-dd, yyyy-MM-dd HH:mm:ss, " + "yyyy-MM-dd HH:mm:ss.SSS", + str)); + } + } + + if (ss.peek() != std::char_traits::eof()) { + return Status::Invalid( + fmt::format("failed to convert string '{}' to timestamp, " + "unexpected trailing characters", + str)); + } + + std::time_t time = mktime(&timeinfo); + if (time == -1) { + return Status::Invalid(fmt::format("failed to convert string '{}' to timestamp", str)); + } + return static_cast(time) * 1000 + millis_part; +} + +} // namespace paimon diff --git a/src/paimon/common/utils/string_utils.h b/src/paimon/common/utils/string_utils.h new file mode 100644 index 0000000..3c0906e --- /dev/null +++ b/src/paimon/common/utils/string_utils.h @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fmt/core.h" +#include "fmt/format.h" +#include "fmt/ranges.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/data/timestamp.h" +#include "paimon/result.h" +#include "paimon/traits.h" +#include "paimon/visibility.h" + +namespace paimon { + +/// Utils for string. +class PAIMON_EXPORT StringUtils { + public: + /// Replaces all occurrences of a string within another string. + /// + /// A `null` reference passed to this method is a no-op. + /// + ///
+    /// StringUtils::Replace(null, *, *)        = null
+    /// StringUtils::Replace("", *, *)          = ""
+    /// StringUtils::Replace("any", null, *)    = "any"
+    /// StringUtils::Replace("any", *, null)    = "any"
+    /// StringUtils::Replace("any", "", *)      = "any"
+    /// StringUtils::Replace("aba", "a", null)  = "aba"
+    /// StringUtils::Replace("aba", "a", "")    = "b"
+    /// StringUtils::Replace("aba", "a", "z")   = "zbz"
+    /// 
+ /// + /// @see #replace(string text, string search_string, string replacement, int max) + /// @param text text to search and replace in, may be null + /// @param search_string the String to search for, may be null + /// @param replacement the String to replace it with, may be null + /// @return the text with any replacements processed, `null` if null string input + static std::string Replace(const std::string& text, const std::string& search_string, + const std::string& replacement) { + return Replace(text, search_string, replacement, -1); + } + + /// Replaces a String with another String inside a larger String, for the first `max` values of + /// the search String. + /// + /// A `null` reference passed to this method is a no-op. + /// + ///
+    /// StringUtils::Replace(null, *, *, *)         = null
+    /// StringUtils::Replace("", *, *, *)           = ""
+    /// StringUtils::Replace("any", null, *, *)     = "any"
+    /// StringUtils::Replace("any", *, null, *)     = "any"
+    /// StringUtils::Replace("any", "", *, *)       = "any"
+    /// StringUtils::Replace("any", *, *, 0)        = "any"
+    /// StringUtils::Replace("abaa", "a", null, -1) = "abaa"
+    /// StringUtils::Replace("abaa", "a", "", -1)   = "b"
+    /// StringUtils::Replace("abaa", "a", "z", 0)   = "abaa"
+    /// StringUtils::Replace("abaa", "a", "z", 1)   = "zbaa"
+    /// StringUtils::Replace("abaa", "a", "z", 2)   = "zbza"
+    /// StringUtils::Replace("abaa", "a", "z", -1)  = "zbzz"
+    /// 
+ /// + /// @param text text to search and replace in, may be null + /// @param search_string the String to search for, may be null + /// @param replacement the String to replace it with, may be null + /// @param max maximum number of values to replace, or `-1` if no maximum + /// @return the text with any replacements processed, `null` if null string input + static std::string Replace(const std::string& text, const std::string& search_string, + const std::string& replacement, int32_t max); + + static std::string ReplaceLast(const std::string& text, const std::string& old_str, + const std::string& new_str); + + static bool StartsWith(const std::string& str, const std::string& prefix, size_t start_pos = 0); + + static bool EndsWith(const std::string& str, const std::string& suffix); + + static bool IsNullOrWhitespaceOnly(const std::string& str); + + static void Trim(std::string* str); + + static std::string ToLowerCase(const std::string& str); + static std::string ToUpperCase(const std::string& str); + + template + static std::string VectorToString(const std::vector& vec) { + std::vector strs; + strs.reserve(vec.size()); + for (const auto& value : vec) { + if constexpr (is_optional::value) { + if (value == std::nullopt) { + strs.emplace_back("null"); + } else { + strs.emplace_back(value.value().ToString()); + } + } else if constexpr (is_pointer::value) { + strs.emplace_back(value->ToString()); + } else { + strs.emplace_back(value.ToString()); + } + } + return fmt::format("[{}]", fmt::join(strs, ", ")); + } + + static std::vector Split(const std::string& text, const std::string& sep_str, + bool ignore_empty = true); + + static std::vector> Split(const std::string& text, + const std::string& delim1, + const std::string& delim2); + + static Result StringToDate(const std::string& str); + + static Result StringToTimestampMillis(const std::string& str); + + template + static std::optional StringToValue(const std::string& str); +}; + +template +std::optional StringUtils::StringToValue(const std::string& str) { + static_assert(std::is_trivially_copyable_v, "T must be trivially copyable"); + if (str.length() == 0) { + return std::nullopt; + } + + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v) { + T value{}; + int32_t base = 10; + auto str_data = str.data(); + auto str_size = str.size(); + if constexpr (std::is_unsigned_v) { + if (str_data[0] == '-') { + return std::nullopt; + } + } + auto result = std::from_chars(str_data, str_data + str_size, value, base); + if (result.ec != std::errc() || result.ptr != str_data + str_size) { + return std::nullopt; + } else { + return value; + } + } else if constexpr (std::is_same_v || std::is_same_v) { + T value; + std::istringstream iss(str); + iss >> value; + if (iss && iss.eof()) { + return value; + } + return std::nullopt; + } else if constexpr (std::is_same_v) { + static const std::set TRUE_STRINGS = {"t", "true", "y", "yes", "1"}; + static const std::set FALSE_STRINGS = {"f", "false", "n", "no", "0"}; + std::string lower_case = ToLowerCase(str); + if (TRUE_STRINGS.find(lower_case) != TRUE_STRINGS.end()) { + return true; + } else if (FALSE_STRINGS.find(lower_case) != FALSE_STRINGS.end()) { + return false; + } else { + return std::nullopt; + } + } else { + assert(false); + return std::nullopt; + } +} + +template <> +inline std::optional StringUtils::StringToValue(const std::string& str) { + return str; +} + +} // namespace paimon diff --git a/src/paimon/common/utils/string_utils_test.cpp b/src/paimon/common/utils/string_utils_test.cpp new file mode 100644 index 0000000..f1ec0da --- /dev/null +++ b/src/paimon/common/utils/string_utils_test.cpp @@ -0,0 +1,487 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "paimon/common/utils/string_utils.h" + +#include +#include + +#include "gtest/gtest.h" +#include "paimon/status.h" +#include "paimon/testing/utils/testharness.h" +#include "paimon/testing/utils/timezone_guard.h" + +namespace paimon::test { +class StringUtilsTest : public ::testing::Test { + public: + void SetUp() override {} + void TearDown() override {} + + private: + template + void CheckBoundary(const std::string& max_value_str, const std::string& min_value_str); + template + void CheckOverFlowAndUnderFlow(const std::string& over_flow, const std::string& under_flow); +}; + +template +void StringUtilsTest::CheckBoundary(const std::string& max_value_str, + const std::string& min_value_str) { + ASSERT_EQ(std::numeric_limits::min(), StringUtils::StringToValue(min_value_str).value()); + ASSERT_EQ(std::numeric_limits::max(), StringUtils::StringToValue(max_value_str).value()); +} + +template <> +void StringUtilsTest::CheckBoundary(const std::string& max_value_str, + const std::string& min_value_str) { + ASSERT_NEAR(-std::numeric_limits::max(), + StringUtils::StringToValue(min_value_str).value(), 0.00001e+308); + ASSERT_NEAR(std::numeric_limits::max(), + StringUtils::StringToValue(max_value_str).value(), 0.00001e+308); +} + +template <> +void StringUtilsTest::CheckBoundary(const std::string& max_value_str, + const std::string& min_value_str) { + ASSERT_NEAR(-std::numeric_limits::max(), + StringUtils::StringToValue(min_value_str).value(), 0.00001e+38); + ASSERT_NEAR(std::numeric_limits::max(), + StringUtils::StringToValue(max_value_str).value(), 0.00001e+38); +} + +template +void StringUtilsTest::CheckOverFlowAndUnderFlow(const std::string& over_flow, + const std::string& under_flow) { + ASSERT_EQ(StringUtils::StringToValue(over_flow), std::nullopt); + ASSERT_EQ(StringUtils::StringToValue(under_flow), std::nullopt); +} + +TEST_F(StringUtilsTest, TestReplaceAll) { + { + std::string origin = "how is is you"; + std::string expect = "how are are you"; + std::string result = StringUtils::Replace(origin, "is", "are"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aabac"; + std::string expect = "aaaabaac"; + std::string result = StringUtils::Replace(origin, "a", "aa"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaabaac"; + std::string expect = "aabac"; + std::string result = StringUtils::Replace(origin, "aa", "a"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaabaac"; + std::string expect = "aaaabaac"; + std::string result = StringUtils::Replace(origin, "abc", "a"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaaaaaa"; + std::string expect = "bbbb"; + std::string result = StringUtils::Replace(origin, "aa", "b"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaaaaaaa"; + std::string expect = "bbbba"; + std::string result = StringUtils::Replace(origin, "aa", "b"); + ASSERT_EQ(expect, result); + } + { + std::string origin = "/home/admin/ops"; + std::string expect = R"(\/home\/admin\/ops)"; + std::string result = StringUtils::Replace(origin, "/", "\\/"); + ASSERT_EQ(expect, result); + } +} + +TEST_F(StringUtilsTest, TestReplaceLast) { + { + std::string origin = "a/b/c//"; + std::string expect = "a/b/c/_"; + std::string actual = StringUtils::ReplaceLast(origin, "/", "_"); + ASSERT_EQ(expect, actual); + } + { + std::string origin = "a/b/c//"; + std::string expect = "a/b/c//"; + std::string actual = StringUtils::ReplaceLast(origin, "_", "/"); + ASSERT_EQ(expect, actual); + } + + { + std::string origin = "how is is you"; + std::string expect = "how is are you"; + std::string actual = StringUtils::ReplaceLast(origin, "is", "are"); + ASSERT_EQ(expect, actual); + } +} + +TEST_F(StringUtilsTest, TestReplaceWithMaxCount) { + { + std::string origin = "how is is you"; + std::string expect = "how are is you"; + std::string result = StringUtils::Replace(origin, "is", "are", 1); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aabac"; + std::string expect = "aaaabac"; + std::string result = StringUtils::Replace(origin, "a", "aa", 2); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaabaac"; + std::string expect = "aaaabaac"; + std::string result = StringUtils::Replace(origin, "aa", "a", 0); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaaaaaa"; + std::string expect = "bbbb"; + std::string result = StringUtils::Replace(origin, "aa", "b", 100); + ASSERT_EQ(expect, result); + } + { + std::string origin = "aaaaaaaaa"; + std::string expect = "bbbaaa"; + std::string result = StringUtils::Replace(origin, "aa", "b", 3); + ASSERT_EQ(expect, result); + } + { + std::string origin = "/home/admin/ops"; + std::string expect = "\\/home\\/admin/ops"; + std::string result = StringUtils::Replace(origin, "/", "\\/", 2); + ASSERT_EQ(expect, result); + } +} + +TEST_F(StringUtilsTest, TestIsNullOrWhitespaceOnly) { + { + std::string str = ""; + auto ret = StringUtils::IsNullOrWhitespaceOnly(str); + ASSERT_TRUE(ret); + } + { + std::string str = "a a a a"; + auto ret = StringUtils::IsNullOrWhitespaceOnly(str); + ASSERT_FALSE(ret); + } + { + std::string str = " "; + auto ret = StringUtils::IsNullOrWhitespaceOnly(str); + ASSERT_TRUE(ret); + } + { + std::string str = "\n"; + auto ret = StringUtils::IsNullOrWhitespaceOnly(str); + ASSERT_TRUE(ret); + } + { + std::string str = "\t"; + auto ret = StringUtils::IsNullOrWhitespaceOnly(str); + ASSERT_TRUE(ret); + } +} + +TEST_F(StringUtilsTest, TestToLowerCase) { + { + std::string str = "HDGF"; + ASSERT_EQ("hdgf", StringUtils::ToLowerCase(str)); + } + { + std::string str = "ab CD ffg +8"; + ASSERT_EQ("ab cd ffg +8", StringUtils::ToLowerCase(str)); + } + { + std::string str = ""; + ASSERT_EQ("", StringUtils::ToLowerCase(str)); + } +} + +TEST_F(StringUtilsTest, TestToUpperCase) { + { + std::string str = "hdgf"; + ASSERT_EQ("HDGF", StringUtils::ToUpperCase(str)); + } + { + std::string str = "AB cd ffg +8"; + ASSERT_EQ("AB CD FFG +8", StringUtils::ToUpperCase(str)); + } + { + std::string str = ""; + ASSERT_EQ("", StringUtils::ToUpperCase(str)); + } +} + +TEST_F(StringUtilsTest, TestStartsWith) { + { + std::string str = "abcde"; + ASSERT_TRUE(StringUtils::StartsWith(str, "ab")); + } + { + std::string str = "abcde"; + ASSERT_FALSE(StringUtils::StartsWith(str, "ba")); + } + { + std::string str = "abcde"; + ASSERT_TRUE(StringUtils::StartsWith(str, "bc", /*start_pos=*/1)); + } + { + std::string str = "abcde"; + ASSERT_FALSE(StringUtils::StartsWith(str, "bc", /*start_pos=*/3)); + } + { + std::string str = ""; + ASSERT_FALSE(StringUtils::StartsWith(str, "bc")); + } + { + std::string str = ""; + ASSERT_TRUE(StringUtils::StartsWith(str, "")); + } +} +TEST_F(StringUtilsTest, TestEndsWith) { + { + std::string str = "abcde"; + ASSERT_TRUE(StringUtils::EndsWith(str, "de")); + } + { + std::string str = "abcde"; + ASSERT_FALSE(StringUtils::EndsWith(str, "ba")); + } + { + std::string str = ""; + ASSERT_FALSE(StringUtils::EndsWith(str, "bc")); + } + { + std::string str = ""; + ASSERT_TRUE(StringUtils::EndsWith(str, "")); + } +} + +TEST_F(StringUtilsTest, TestSplit) { + { + std::vector expect = {"aabbcc"}; + std::vector result = StringUtils::Split("aabbcc", ""); + ASSERT_EQ(expect, result); + } + { + std::vector expect = {"aa", "bb", "cc"}; + std::vector result = StringUtils::Split("aa,bb,cc", ","); + ASSERT_EQ(expect, result); + } + { + std::vector expect = {"aa", "bb", "cc"}; + std::vector result = + StringUtils::Split("aa,bb,,cc", ",", /*ignore_empty=*/true); + ASSERT_EQ(expect, result); + } + { + std::vector expect = {"aa", "bb", "", "cc"}; + std::vector result = + StringUtils::Split("aa,bb,,cc", ",", /*ignore_empty=*/false); + ASSERT_EQ(expect, result); + } + { + std::vector> expect = { + {"key1", "value1"}, {"key2", "value2"}, {"key3", "value3"}}; + std::vector> result = StringUtils::Split( + "key1=value1/key2=value2/key3=value3", std::string("/"), std::string("=")); + ASSERT_EQ(expect, result); + } + { + std::vector> expect = {{"key1"}, {"key2"}, {"key3", "value3"}}; + std::vector> result = + StringUtils::Split("key1/key2=/key3=value3", std::string("/"), std::string("=")); + ASSERT_EQ(expect, result); + } + { + std::vector> expect = { + {"key1"}, {"key2", " "}, {"key3", "value3"}}; + std::vector> result = + StringUtils::Split("key1/key2= /key3=value3", std::string("/"), std::string("=")); + ASSERT_EQ(expect, result); + } + { + std::vector> expect = {{"key1", "value1"}, {"key3", "value3"}}; + std::vector> result = + StringUtils::Split("key1=value1//key3=value3", std::string("/"), std::string("=")); + ASSERT_EQ(expect, result); + } + { + std::vector> expect = {}; + std::vector> result = + StringUtils::Split("", std::string("/"), std::string("=")); + ASSERT_EQ(expect, result); + } +} + +TEST_F(StringUtilsTest, TestStringToValueSimple) { + ASSERT_EQ(static_cast(233), StringUtils::StringToValue("233").value()); + ASSERT_EQ(static_cast(10), StringUtils::StringToValue("10").value()); + ASSERT_EQ(std::nullopt, StringUtils::StringToValue("1024")); + ASSERT_EQ(static_cast(34785895352), + StringUtils::StringToValue("34785895352").value()); + ASSERT_EQ(std::nullopt, StringUtils::StringToValue("abc")); + ASSERT_EQ(std::nullopt, StringUtils::StringToValue("")); + + ASSERT_EQ(true, StringUtils::StringToValue("1").value()); + ASSERT_EQ(true, StringUtils::StringToValue("true").value()); + ASSERT_EQ(true, StringUtils::StringToValue("TRUE").value()); + ASSERT_EQ(false, StringUtils::StringToValue("0").value()); + ASSERT_EQ(false, StringUtils::StringToValue("false").value()); + ASSERT_EQ(false, StringUtils::StringToValue("FALSE").value()); + ASSERT_EQ(std::nullopt, StringUtils::StringToValue("123")); +} + +TEST_F(StringUtilsTest, TestStringToValueWithBoundaryValue) { + { + // normal case + CheckBoundary("127", "-128"); + CheckBoundary("32767", "-32768"); + CheckBoundary("2147483647", "-2147483648"); + CheckBoundary("4294967295", "0"); + CheckBoundary("9223372036854775807", "-9223372036854775808"); + CheckBoundary("18446744073709551615", "0"); + CheckBoundary("3.4028235e+38", "-3.4028235e+38"); + CheckBoundary("1.7976931348623157e+308", "-1.7976931348623157e+308"); + } + { + // overflow or underflow + CheckOverFlowAndUnderFlow("128", "-129"); + CheckOverFlowAndUnderFlow("32768", "-32769"); + CheckOverFlowAndUnderFlow("2147483648", "-2147483649"); + CheckOverFlowAndUnderFlow("4294967296", "-1"); + CheckOverFlowAndUnderFlow("9223372036854775808", "-9223372036854775809"); + CheckOverFlowAndUnderFlow("18446744073709551616", "-1"); + + CheckOverFlowAndUnderFlow("3.4028235e+39", "-3.4028235e+39"); + CheckOverFlowAndUnderFlow("1.7976931348623157e+309", "-1.7976931348623157e+309"); + } +} + +TEST_F(StringUtilsTest, TestStringToDate) { + { + ASSERT_OK_AND_ASSIGN(auto date, StringUtils::StringToDate("2147483647")); + ASSERT_EQ(date, 2147483647); + } + { + ASSERT_OK_AND_ASSIGN(auto date, StringUtils::StringToDate("-2147483648")); + ASSERT_EQ(date, -2147483648); + } + { + ASSERT_OK_AND_ASSIGN(auto date, StringUtils::StringToDate("1970-01-01")); + ASSERT_EQ(date, 0); + } + { + ASSERT_OK_AND_ASSIGN(auto date, StringUtils::StringToDate("0000-01-01")); + ASSERT_EQ(date, -719528); + } + { + ASSERT_OK_AND_ASSIGN(auto date, StringUtils::StringToDate("9999-12-31")); + ASSERT_EQ(date, 2932896); + } + // invalid str + ASSERT_NOK(StringUtils::StringToDate("9223372036854775807")); + ASSERT_NOK(StringUtils::StringToDate("11970-01-02")); + ASSERT_NOK(StringUtils::StringToDate("-1970-01-02")); + ASSERT_NOK(StringUtils::StringToDate("")); + ASSERT_NOK(StringUtils::StringToDate("1970-XX-02")); +} + +TEST_F(StringUtilsTest, TestStringToTimestampMillis) { + TimezoneGuard tz_guard("Asia/Shanghai"); + // "yyyy-MM-dd HH:mm:ss" format + { + ASSERT_OK_AND_ASSIGN(int64_t millis, + StringUtils::StringToTimestampMillis("1970-01-01 00:00:00")); + ASSERT_EQ(millis, -28800000); + } + // "yyyy-MM-dd HH:mm:ss.SSS" format + { + ASSERT_OK_AND_ASSIGN(int64_t millis1, + StringUtils::StringToTimestampMillis("2023-06-01 00:00:00.000")); + ASSERT_OK_AND_ASSIGN(int64_t millis2, + StringUtils::StringToTimestampMillis("2023-06-01 00:00:00.123")); + ASSERT_EQ(millis2 - millis1, 123); + } + // "yyyy-MM-dd" format (date only, time defaults to 00:00:00) + { + ASSERT_OK_AND_ASSIGN(int64_t millis1, StringUtils::StringToTimestampMillis("2023-06-01")); + ASSERT_OK_AND_ASSIGN(int64_t millis2, + StringUtils::StringToTimestampMillis("2023-06-01 00:00:00")); + ASSERT_EQ(millis1, millis2); + } + // Fractional second padding: "1" -> 100ms, "12" -> 120ms + { + ASSERT_OK_AND_ASSIGN(int64_t millis_base, + StringUtils::StringToTimestampMillis("2023-06-01 12:00:00.000")); + ASSERT_OK_AND_ASSIGN(int64_t millis_1, + StringUtils::StringToTimestampMillis("2023-06-01 12:00:00.1")); + ASSERT_EQ(millis_1 - millis_base, 100); + ASSERT_OK_AND_ASSIGN(int64_t millis_12, + StringUtils::StringToTimestampMillis("2023-06-01 12:00:00.12")); + ASSERT_EQ(millis_12 - millis_base, 120); + } + // Invalid strings + ASSERT_NOK(StringUtils::StringToTimestampMillis("")); + ASSERT_NOK(StringUtils::StringToTimestampMillis("not-a-date")); + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-XX-01 00:00:00")); + // Trailing garbage + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-06-01 00:00:00abc")); + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-06-01 00:00:00.12xyz")); + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-06-01 00:00:00 ")); + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-06-01 00:00:00.12 ")); + // Trailing dot with no digits + ASSERT_NOK(StringUtils::StringToTimestampMillis("2023-06-01 00:00:00.")); +} + +TEST_F(StringUtilsTest, TestVectorToString) { + class A { + public: + explicit A(int32_t value) : value_(value) {} + std::string ToString() const { + return std::to_string(value_); + } + + private: + int32_t value_; + }; + + { + std::vector vec = {A(10), A(20), A(30)}; + ASSERT_EQ(StringUtils::VectorToString(vec), "[10, 20, 30]"); + } + { + std::vector> vec = {A(10), A(20), A(30), std::nullopt}; + ASSERT_EQ(StringUtils::VectorToString(vec), "[10, 20, 30, null]"); + } + { + std::vector> vec = {std::make_shared(10), std::make_shared(20), + std::make_shared(30)}; + ASSERT_EQ(StringUtils::VectorToString(vec), "[10, 20, 30]"); + } +} +} // namespace paimon::test diff --git a/src/paimon/testing/utils/timezone_guard.h b/src/paimon/testing/utils/timezone_guard.h new file mode 100644 index 0000000..2b88532 --- /dev/null +++ b/src/paimon/testing/utils/timezone_guard.h @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +namespace paimon::test { +class TimezoneGuard { + public: + explicit TimezoneGuard(const std::string& tz) { + const char* original = std::getenv("TZ"); + if (original) { + original_tz_ = original; + } else { + original_tz_.clear(); + } + + setenv("TZ", tz.c_str(), /*replace=*/1); + tzset(); + } + + ~TimezoneGuard() { + if (original_tz_.empty()) { + unsetenv("TZ"); + } else { + setenv("TZ", original_tz_.c_str(), /*replace=*/1); + } + tzset(); + } + + private: + std::string original_tz_; +}; + +} // namespace paimon::test