diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 8f4702dacb6b3..2642a6ec1a2f4 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -146,8 +146,7 @@ jobs: ARROW_WITH_SNAPPY: ON ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON - # System Abseil installed by Homebrew uses C++ 17 - CMAKE_CXX_STANDARD: 17 + GTest_SOURCE: BUNDLED steps: - name: Checkout Arrow uses: actions/checkout@v3 diff --git a/LICENSE.txt b/LICENSE.txt index 6532b8790c332..86cfaf546ca2e 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -653,34 +653,6 @@ SOFTWARE. -------------------------------------------------------------------------------- -The file cpp/src/arrow/vendored/string_view.hpp has the following license - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - The files in cpp/src/arrow/vendored/xxhash/ have the following license (BSD 2-Clause License) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 3404af794ded8..3554fdf1158b6 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -5121,7 +5121,7 @@ GArrowFunctionOptions * garrow_function_options_new_raw( const arrow::compute::FunctionOptions *arrow_options) { - arrow::util::string_view arrow_type_name(arrow_options->type_name()); + std::string_view arrow_type_name(arrow_options->type_name()); if (arrow_type_name == "CastOptions") { auto arrow_cast_options = static_cast(arrow_options); diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index e1e46c7df1065..844c83d629b8f 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -34,6 +33,7 @@ #include #include +#include G_BEGIN_DECLS @@ -855,7 +855,7 @@ namespace garrow { } } - arrow::Result Peek(int64_t nbytes) override { + arrow::Result Peek(int64_t nbytes) override { if (!G_IS_BUFFERED_INPUT_STREAM(input_stream_)) { std::string message("[gio-input-stream][peek] " "not peekable input stream: <"); @@ -882,8 +882,7 @@ namespace garrow { if (data_size > static_cast(nbytes)) { data_size = nbytes; } - return arrow::util::string_view(static_cast(data), - data_size); + return std::string_view(static_cast(data), data_size); } arrow::Status Seek(int64_t position) override { diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp index f8699f34eea9c..ddd35d9ea600e 100644 --- a/c_glib/arrow-glib/scalar.cpp +++ b/c_glib/arrow-glib/scalar.cpp @@ -250,9 +250,8 @@ garrow_scalar_parse(GArrowDataType *data_type, GError **error) { const auto arrow_data_type = garrow_data_type_get_raw(data_type); - auto arrow_data = - arrow::util::string_view(reinterpret_cast(data), - size); + auto arrow_data = std::string_view(reinterpret_cast(data), + size); auto arrow_scalar_result = arrow::Scalar::Parse(arrow_data_type, arrow_data); if (garrow::check(error, arrow_scalar_result, "[scalar][parse]")) { auto arrow_scalar = *arrow_scalar_result; diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index 5051ae7f00391..b205f14f6da5f 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -106,6 +106,7 @@ ENV absl_SOURCE=BUNDLED \ CC=gcc \ CXX=g++ \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PATH=/usr/lib/ccache/:$PATH \ Protobuf_SOURCE=BUNDLED \ diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 8923b8510423a..66100e5d0a0cf 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -74,6 +74,7 @@ cmake \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DGTest_SOURCE=BUNDLED \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 7f6e89cb7a316..7cbed90dd2e21 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -84,6 +84,7 @@ cmake \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DGTest_SOURCE=BUNDLED \ -DORC_SOURCE=BUNDLED \ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ -DPARQUET_BUILD_EXAMPLES=OFF \ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 0ab66c5bf07fc..235369caf2f9c 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1974,9 +1974,8 @@ macro(build_gtest) set(dummy ">") set(GTEST_CMAKE_ARGS - ${EP_COMMON_TOOLCHAIN} + ${EP_COMMON_CMAKE_ARGS} -DBUILD_SHARED_LIBS=ON - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${GTEST_CMAKE_CXX_FLAGS} -DCMAKE_INSTALL_LIBDIR=lib diff --git a/cpp/examples/arrow/join_example.cc b/cpp/examples/arrow/join_example.cc index 7bea588e3ad7e..c29f5e5dbbdc7 100644 --- a/cpp/examples/arrow/join_example.cc +++ b/cpp/examples/arrow/join_example.cc @@ -63,7 +63,7 @@ arrow::Result> CreateDataSetFromCSVData std::shared_ptr input; std::string csv_data = is_left ? kLeftRelationCsvData : kRightRelationCsvData; std::cout << csv_data << std::endl; - arrow::util::string_view sv = csv_data; + std::string_view sv = csv_data; input = std::make_shared(sv); auto read_options = arrow::csv::ReadOptions::Defaults(); auto parse_options = arrow::csv::ParseOptions::Defaults(); diff --git a/cpp/examples/arrow/rapidjson_row_converter.cc b/cpp/examples/arrow/rapidjson_row_converter.cc index defa6de4610b9..3907e72121c6d 100644 --- a/cpp/examples/arrow/rapidjson_row_converter.cc +++ b/cpp/examples/arrow/rapidjson_row_converter.cc @@ -97,7 +97,7 @@ class RowBatchBuilder { for (int64_t i = 0; i < array.length(); ++i) { if (!array.IsNull(i)) { rapidjson::Value str_key(field_->name(), rows_[i].GetAllocator()); - arrow::util::string_view value_view = array.Value(i); + std::string_view value_view = array.Value(i); rapidjson::Value value; value.SetString(value_view.data(), static_cast(value_view.size()), diff --git a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc index 64ab7af496205..1f7246b78160c 100644 --- a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc +++ b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc @@ -135,10 +135,10 @@ struct TestData { if (i % 2 == 0) return {}; return "Str #" + std::to_string(i); } - static arrow::util::string_view GetStringView(const int i) { + static std::string_view GetStringView(const int i) { static std::string string; string = "StringView #" + std::to_string(i); - return arrow::util::string_view(string); + return std::string_view(string); } static const char* GetCharPtr(const int i) { static std::string string; @@ -190,7 +190,7 @@ void WriteParquetFile() { os.SetMaxRowGroupSize(1000); for (auto i = 0; i < TestData::num_rows; ++i) { - // Output string using 3 different types: std::string, arrow::util::string_view and + // Output string using 3 different types: std::string, std::string_view and // const char *. switch (i % 3) { case 0: diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py index 5d7e1719afee3..6c3af1680bdae 100644 --- a/cpp/gdb_arrow.py +++ b/cpp/gdb_arrow.py @@ -456,7 +456,7 @@ def value(self): class StdString: """ - A `std::string` (or possibly `string_view`) value. + A `std::string` (or possibly `std::string_view`) value. """ def __init__(self, val): @@ -2163,23 +2163,6 @@ def to_string(self): return f"arrow::Result<{data_type}>({inner})" -class StringViewPrinter: - """ - Pretty-printer for arrow::util::string_view. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - size = int(self.val['size_']) - if size == 0: - return f"arrow::util::string_view of size 0" - else: - data = bytes_literal(self.val['data_'], size) - return f"arrow::util::string_view of size {size}, {data}" - - class FieldPrinter: """ Pretty-printer for arrow::Field. @@ -2397,8 +2380,6 @@ def to_string(self): "arrow::SimpleTable": TablePrinter, "arrow::Status": StatusPrinter, "arrow::Table": TablePrinter, - "arrow::util::string_view": StringViewPrinter, - "nonstd::sv_lite::basic_string_view": StringViewPrinter, } diff --git a/cpp/src/arrow/adapters/orc/util.cc b/cpp/src/arrow/adapters/orc/util.cc index dbdb110fb46dc..170aaa1815550 100644 --- a/cpp/src/arrow/adapters/orc/util.cc +++ b/cpp/src/arrow/adapters/orc/util.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/array/builder_base.h" @@ -30,7 +31,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/range.h" -#include "arrow/util/string_view.h" #include "arrow/visit_data_inline.h" #include "orc/Exceptions.hh" @@ -462,7 +462,7 @@ struct Appender { running_arrow_offset++; return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { batch->notNull[running_orc_offset] = true; COffsetType data_length = 0; batch->data[running_orc_offset] = reinterpret_cast( @@ -486,7 +486,7 @@ struct Appender { running_arrow_offset++; return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { batch->notNull[running_orc_offset] = true; const Decimal128 dec_value(array.GetValue(running_arrow_offset)); batch->values[running_orc_offset] = static_cast(dec_value.low_bits()); @@ -507,7 +507,7 @@ struct Appender { running_arrow_offset++; return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { batch->notNull[running_orc_offset] = true; const Decimal128 dec_value(array.GetValue(running_arrow_offset)); batch->values[running_orc_offset] = @@ -557,7 +557,7 @@ struct FixedSizeBinaryAppender { running_arrow_offset++; return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { batch->notNull[running_orc_offset] = true; batch->data[running_orc_offset] = reinterpret_cast( const_cast(array.GetValue(running_arrow_offset))); diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h index cc04d792002b4..7e58a96ff841a 100644 --- a/cpp/src/arrow/array/array_binary.h +++ b/cpp/src/arrow/array/array_binary.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "arrow/array/array_base.h" @@ -32,7 +33,6 @@ #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" // IWYU pragma: export #include "arrow/util/visibility.h" namespace arrow { @@ -67,15 +67,15 @@ class BaseBinaryArray : public FlatArray { /// /// \param i the value index /// \return the view over the selected value - util::string_view GetView(int64_t i) const { + std::string_view GetView(int64_t i) const { // Account for base offset i += data_->offset; const offset_type pos = raw_value_offsets_[i]; - return util::string_view(reinterpret_cast(raw_data_ + pos), - raw_value_offsets_[i + 1] - pos); + return std::string_view(reinterpret_cast(raw_data_ + pos), + raw_value_offsets_[i + 1] - pos); } - std::optional operator[](int64_t i) const { + std::optional operator[](int64_t i) const { return *IteratorType(*this, i); } @@ -84,7 +84,7 @@ class BaseBinaryArray : public FlatArray { /// /// \param i the value index /// \return the view over the selected value - util::string_view Value(int64_t i) const { return GetView(i); } + std::string_view Value(int64_t i) const { return GetView(i); } /// \brief Get binary value as a std::string /// @@ -236,11 +236,11 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray { const uint8_t* GetValue(int64_t i) const; const uint8_t* Value(int64_t i) const { return GetValue(i); } - util::string_view GetView(int64_t i) const { - return util::string_view(reinterpret_cast(GetValue(i)), byte_width()); + std::string_view GetView(int64_t i) const { + return std::string_view(reinterpret_cast(GetValue(i)), byte_width()); } - std::optional operator[](int64_t i) const { + std::optional operator[](int64_t i) const { return *IteratorType(*this, i); } diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc index b7225eb8b7d7e..3bc9bb91a022a 100644 --- a/cpp/src/arrow/array/array_binary_test.cc +++ b/cpp/src/arrow/array/array_binary_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,6 @@ #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_builders.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/string_view.h" #include "arrow/visit_data_inline.h" namespace arrow { @@ -63,7 +63,7 @@ void CheckStringArray(const ArrayType& array, const std::vector& st auto view = array.GetView(i); ASSERT_EQ(value_pos, array.value_offset(i)); ASSERT_EQ(strings[j].size(), view.size()); - ASSERT_EQ(util::string_view(strings[j]), view); + ASSERT_EQ(std::string_view(strings[j]), view); value_pos += static_cast(view.size()); } else { ASSERT_TRUE(array.IsNull(i)); @@ -256,7 +256,7 @@ class TestStringArray : public ::testing::Test { } Status ValidateFull(int64_t length, std::vector offsets, - util::string_view data, int64_t offset = 0) { + std::string_view data, int64_t offset = 0) { ArrayType arr(length, Buffer::Wrap(offsets), std::make_shared(data), /*null_bitmap=*/nullptr, /*null_count=*/0, offset); return arr.ValidateFull(); @@ -373,7 +373,7 @@ class TestUTF8Array : public ::testing::Test { using ArrayType = typename TypeTraits::ArrayType; Status ValidateUTF8(int64_t length, std::vector offsets, - util::string_view data, int64_t offset = 0) { + std::string_view data, int64_t offset = 0) { ArrayType arr(length, Buffer::Wrap(offsets), std::make_shared(data), /*null_bitmap=*/nullptr, /*null_count=*/0, offset); return arr.ValidateUTF8(); @@ -867,12 +867,12 @@ struct BinaryAppender { return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { data.push_back(v); return Status::OK(); } - std::vector data; + std::vector data; }; template diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 9193e1d21ac9f..bfa732f165f85 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -711,7 +711,7 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, ArrayInit) { // Build the dictionary Array auto value_type = fixed_size_binary(4); auto dict_array = ArrayFromJSON(value_type, R"(["abcd", "wxyz"])"); - util::string_view test = "abcd", test2 = "wxyz"; + std::string_view test = "abcd", test2 = "wxyz"; DictionaryBuilder builder(dict_array); ASSERT_OK(builder.Append(test)); ASSERT_OK(builder.Append(test2)); @@ -735,7 +735,7 @@ TEST(TestFixedSizeBinaryDictionaryBuilder, MakeBuilder) { std::unique_ptr boxed_builder; ASSERT_OK(MakeBuilder(default_memory_pool(), dict_type, &boxed_builder)); auto& builder = checked_cast&>(*boxed_builder); - util::string_view test = "abcd", test2 = "wxyz"; + std::string_view test = "abcd", test2 = "wxyz"; ASSERT_OK(builder.Append(test)); ASSERT_OK(builder.Append(test2)); ASSERT_OK(builder.Append(test)); @@ -1317,12 +1317,12 @@ TEST(TestDictionary, ListOfDictionary) { ASSERT_OK(list_builder->Append()); std::vector expected; - for (char a : util::string_view("abc")) { - for (char d : util::string_view("def")) { - for (char g : util::string_view("ghi")) { - for (char j : util::string_view("jkl")) { - for (char m : util::string_view("mno")) { - for (char p : util::string_view("pqr")) { + for (char a : std::string_view("abc")) { + for (char d : std::string_view("def")) { + for (char g : std::string_view("ghi")) { + for (char j : std::string_view("jkl")) { + for (char m : std::string_view("mno")) { + for (char p : std::string_view("pqr")) { if ((static_cast(a) + d + g + j + m + p) % 16 == 0) { ASSERT_OK(list_builder->Append()); } diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 373b71b85f381..f8c24b71e06e9 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -647,11 +647,11 @@ TEST_F(TestMapArray, Equality) { std::shared_ptr array, equal_array, unequal_array; std::vector equal_offsets = {0, 1, 2, 5, 6, 7, 8, 10}; - std::vector equal_keys = {"a", "a", "a", "b", "c", - "a", "a", "a", "a", "b"}; + std::vector equal_keys = {"a", "a", "a", "b", "c", + "a", "a", "a", "a", "b"}; std::vector equal_values = {1, 2, 3, 4, 5, 2, 2, 2, 5, 6}; std::vector unequal_offsets = {0, 1, 4, 7}; - std::vector unequal_keys = {"a", "a", "b", "c", "a", "b", "c"}; + std::vector unequal_keys = {"a", "a", "b", "c", "a", "b", "c"}; std::vector unequal_values = {1, 2, 2, 2, 3, 4, 5}; // setup two equal arrays diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 9256d4ad0b7f2..c00e54ecb803b 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -2254,12 +2254,12 @@ struct FWBinaryAppender { return Status::OK(); } - Status VisitValue(util::string_view v) { + Status VisitValue(std::string_view v) { data.push_back(v); return Status::OK(); } - std::vector data; + std::vector data; }; TEST_F(TestFWBinaryArray, ArraySpanVisitor) { diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc index ff37cee5ba1ee..e9d5fb44ac1ef 100644 --- a/cpp/src/arrow/array/builder_base.cc +++ b/cpp/src/arrow/array/builder_base.cc @@ -144,7 +144,7 @@ struct AppendScalarImpl { raw++) { auto scalar = checked_cast::ScalarType*>(raw->get()); if (scalar->is_valid) { - builder->UnsafeAppend(util::string_view{*scalar->value}); + builder->UnsafeAppend(std::string_view{*scalar->value}); } else { builder->UnsafeAppendNull(); } diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc index fd1be17981698..88c17454034e3 100644 --- a/cpp/src/arrow/array/builder_binary.cc +++ b/cpp/src/arrow/array/builder_binary.cc @@ -123,10 +123,10 @@ const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const { return data_ptr + i * byte_width_; } -util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { +std::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { const uint8_t* data_ptr = byte_builder_.data(); - return util::string_view(reinterpret_cast(data_ptr + i * byte_width_), - byte_width_); + return std::string_view(reinterpret_cast(data_ptr + i * byte_width_), + byte_width_); } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h index 25cec5c1e2576..274baeca7484f 100644 --- a/cpp/src/arrow/array/builder_binary.h +++ b/cpp/src/arrow/array/builder_binary.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "arrow/array/array_base.h" @@ -36,7 +37,6 @@ #include "arrow/status.h" #include "arrow/type.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" // IWYU pragma: export #include "arrow/util/visibility.h" namespace arrow { @@ -77,7 +77,7 @@ class BaseBinaryBuilder : public ArrayBuilder { return Append(reinterpret_cast(value), length); } - Status Append(util::string_view value) { + Status Append(std::string_view value) { return Append(value.data(), static_cast(value.size())); } @@ -93,7 +93,7 @@ class BaseBinaryBuilder : public ArrayBuilder { return Status::OK(); } - Status ExtendCurrent(util::string_view value) { + Status ExtendCurrent(std::string_view value) { return ExtendCurrent(reinterpret_cast(value.data()), static_cast(value.size())); } @@ -150,7 +150,7 @@ class BaseBinaryBuilder : public ArrayBuilder { UnsafeAppend(value.c_str(), static_cast(value.size())); } - void UnsafeAppend(util::string_view value) { + void UnsafeAppend(std::string_view value) { UnsafeAppend(value.data(), static_cast(value.size())); } @@ -159,7 +159,7 @@ class BaseBinaryBuilder : public ArrayBuilder { value_data_builder_.UnsafeAppend(value, length); } - void UnsafeExtendCurrent(util::string_view value) { + void UnsafeExtendCurrent(std::string_view value) { UnsafeExtendCurrent(reinterpret_cast(value.data()), static_cast(value.size())); } @@ -370,10 +370,10 @@ class BaseBinaryBuilder : public ArrayBuilder { /// Temporary access to a value. /// /// This view becomes invalid on the next modifying operation. - util::string_view GetView(int64_t i) const { + std::string_view GetView(int64_t i) const { offset_type value_length; const uint8_t* value_data = GetValue(i, &value_length); - return util::string_view(reinterpret_cast(value_data), value_length); + return std::string_view(reinterpret_cast(value_data), value_length); } // Cannot make this a static attribute because of linking issues @@ -476,7 +476,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder { return Append(reinterpret_cast(value)); } - Status Append(const util::string_view& view) { + Status Append(const std::string_view& view) { ARROW_RETURN_NOT_OK(Reserve(1)); UnsafeAppend(view); return Status::OK(); @@ -490,7 +490,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder { Status Append(const Buffer& s) { ARROW_RETURN_NOT_OK(Reserve(1)); - UnsafeAppend(util::string_view(s)); + UnsafeAppend(std::string_view(s)); return Status::OK(); } @@ -500,7 +500,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder { Status Append(const std::array& value) { ARROW_RETURN_NOT_OK(Reserve(1)); UnsafeAppend( - util::string_view(reinterpret_cast(value.data()), value.size())); + std::string_view(reinterpret_cast(value.data()), value.size())); return Status::OK(); } @@ -534,14 +534,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder { UnsafeAppend(reinterpret_cast(value)); } - void UnsafeAppend(util::string_view value) { + void UnsafeAppend(std::string_view value) { #ifndef NDEBUG CheckValueSize(static_cast(value.size())); #endif UnsafeAppend(reinterpret_cast(value.data())); } - void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); } + void UnsafeAppend(const Buffer& s) { UnsafeAppend(std::string_view(s)); } void UnsafeAppend(const std::shared_ptr& s) { UnsafeAppend(*s); } @@ -590,7 +590,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder { /// Temporary access to a value. /// /// This view becomes invalid on the next modifying operation. - util::string_view GetView(int64_t i) const; + std::string_view GetView(int64_t i) const; static constexpr int64_t memory_limit() { return std::numeric_limits::max() - 1; @@ -658,7 +658,7 @@ class ARROW_EXPORT ChunkedBinaryBuilder { return builder_->Append(value, length); } - Status Append(const util::string_view& value) { + Status Append(const std::string_view& value) { return Append(reinterpret_cast(value.data()), static_cast(value.size())); } diff --git a/cpp/src/arrow/array/builder_decimal.cc b/cpp/src/arrow/array/builder_decimal.cc index bd7615a730939..96d6b60932be4 100644 --- a/cpp/src/arrow/array/builder_decimal.cc +++ b/cpp/src/arrow/array/builder_decimal.cc @@ -52,7 +52,7 @@ void Decimal128Builder::UnsafeAppend(Decimal128 value) { UnsafeAppendToBitmap(true); } -void Decimal128Builder::UnsafeAppend(util::string_view value) { +void Decimal128Builder::UnsafeAppend(std::string_view value) { FixedSizeBinaryBuilder::UnsafeAppend(value); } @@ -87,7 +87,7 @@ void Decimal256Builder::UnsafeAppend(const Decimal256& value) { UnsafeAppendToBitmap(true); } -void Decimal256Builder::UnsafeAppend(util::string_view value) { +void Decimal256Builder::UnsafeAppend(std::string_view value) { FixedSizeBinaryBuilder::UnsafeAppend(value); } diff --git a/cpp/src/arrow/array/builder_decimal.h b/cpp/src/arrow/array/builder_decimal.h index 3464203dd4732..2c8953fdec055 100644 --- a/cpp/src/arrow/array/builder_decimal.h +++ b/cpp/src/arrow/array/builder_decimal.h @@ -47,7 +47,7 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder { Status Append(Decimal128 val); void UnsafeAppend(Decimal128 val); - void UnsafeAppend(util::string_view val); + void UnsafeAppend(std::string_view val); Status FinishInternal(std::shared_ptr* out) override; @@ -77,7 +77,7 @@ class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder { Status Append(const Decimal256& val); void UnsafeAppend(const Decimal256& val); - void UnsafeAppend(util::string_view val); + void UnsafeAppend(std::string_view val); Status FinishInternal(std::shared_ptr* out) override; diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc index d51dd4c041ad3..061fb600412fd 100644 --- a/cpp/src/arrow/array/builder_dict.cc +++ b/cpp/src/arrow/array/builder_dict.cc @@ -188,12 +188,12 @@ GET_OR_INSERT(MonthIntervalType); #undef GET_OR_INSERT -Status DictionaryMemoTable::GetOrInsert(const BinaryType*, util::string_view value, +Status DictionaryMemoTable::GetOrInsert(const BinaryType*, std::string_view value, int32_t* out) { return impl_->GetOrInsert(value, out); } -Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, util::string_view value, +Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, std::string_view value, int32_t* out) { return impl_->GetOrInsert(value, out); } diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h index b720f73d7d21e..7bb134ec3875f 100644 --- a/cpp/src/arrow/array/builder_dict.h +++ b/cpp/src/arrow/array/builder_dict.h @@ -54,7 +54,7 @@ struct DictionaryValue { template struct DictionaryValue> { - using type = util::string_view; + using type = std::string_view; using PhysicalType = typename std::conditional::value, BinaryType, LargeBinaryType>::type; @@ -62,7 +62,7 @@ struct DictionaryValue> { template struct DictionaryValue> { - using type = util::string_view; + using type = std::string_view; using PhysicalType = BinaryType; }; @@ -112,8 +112,8 @@ class ARROW_EXPORT DictionaryMemoTable { Status GetOrInsert(const FloatType*, float value, int32_t* out); Status GetOrInsert(const DoubleType*, double value, int32_t* out); - Status GetOrInsert(const BinaryType*, util::string_view value, int32_t* out); - Status GetOrInsert(const LargeBinaryType*, util::string_view value, int32_t* out); + Status GetOrInsert(const BinaryType*, std::string_view value, int32_t* out); + Status GetOrInsert(const LargeBinaryType*, std::string_view value, int32_t* out); class DictionaryMemoTableImpl; std::unique_ptr impl_; @@ -257,13 +257,13 @@ class DictionaryBuilderBase : public ArrayBuilder { /// \brief Append a fixed-width string (only for FixedSizeBinaryType) template enable_if_fixed_size_binary Append(const uint8_t* value) { - return Append(util::string_view(reinterpret_cast(value), byte_width_)); + return Append(std::string_view(reinterpret_cast(value), byte_width_)); } /// \brief Append a fixed-width string (only for FixedSizeBinaryType) template enable_if_fixed_size_binary Append(const char* value) { - return Append(util::string_view(value, byte_width_)); + return Append(std::string_view(value, byte_width_)); } /// \brief Append a string (only for binary types) @@ -275,13 +275,13 @@ class DictionaryBuilderBase : public ArrayBuilder { /// \brief Append a string (only for binary types) template enable_if_binary_like Append(const char* value, int32_t length) { - return Append(util::string_view(value, length)); + return Append(std::string_view(value, length)); } /// \brief Append a string (only for string types) template enable_if_string_like Append(const char* value, int32_t length) { - return Append(util::string_view(value, length)); + return Append(std::string_view(value, length)); } /// \brief Append a decimal (only for Decimal128Type) diff --git a/cpp/src/arrow/array/dict_internal.h b/cpp/src/arrow/array/dict_internal.h index a8b69133cfed2..5245c8d0ff313 100644 --- a/cpp/src/arrow/array/dict_internal.h +++ b/cpp/src/arrow/array/dict_internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/hashing.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/array/diff.cc b/cpp/src/arrow/array/diff.cc index 16f4f9c7638a8..10802939a7302 100644 --- a/cpp/src/arrow/array/diff.cc +++ b/cpp/src/arrow/array/diff.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,6 @@ #include "arrow/util/logging.h" #include "arrow/util/range.h" #include "arrow/util/string.h" -#include "arrow/util/string_view.h" #include "arrow/vendored/datetime.h" #include "arrow/visit_type_inline.h" diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc index 05155d64b6acb..56470ac74b0c7 100644 --- a/cpp/src/arrow/array/validate.cc +++ b/cpp/src/arrow/array/validate.cc @@ -54,7 +54,7 @@ struct UTF8DataValidator { int64_t i = 0; return VisitArraySpanInline( data, - [&](util::string_view v) { + [&](std::string_view v) { if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(v))) { return Status::Invalid("Invalid UTF8 sequence at string index ", i); } @@ -675,7 +675,7 @@ struct ValidateArrayImpl { const int32_t precision = type.precision(); return VisitArraySpanInline( data, - [&](util::string_view bytes) { + [&](std::string_view bytes) { DCHECK_EQ(bytes.size(), DecimalType::kByteWidth); CType value(reinterpret_cast(bytes.data())); if (!value.FitsInPrecision(precision)) { diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 8be10d282b060..584a33fbdeb35 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -21,14 +21,15 @@ #include #include #include +#include #include #include #include "arrow/device.h" #include "arrow/status.h" #include "arrow/type_fwd.h" +#include "arrow/util/bytes_view.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -77,7 +78,7 @@ class ARROW_EXPORT Buffer { /// /// \note The memory viewed by data must not be deallocated in the lifetime of the /// Buffer; temporary rvalue strings must be stored in an lvalue somewhere - explicit Buffer(util::string_view data) + explicit Buffer(std::string_view data) : Buffer(reinterpret_cast(data.data()), static_cast(data.size())) {} @@ -159,10 +160,10 @@ class ARROW_EXPORT Buffer { /// \note Can throw std::bad_alloc if buffer is large std::string ToString() const; - /// \brief View buffer contents as a util::string_view - /// \return util::string_view - explicit operator util::string_view() const { - return util::string_view(reinterpret_cast(data_), size_); + /// \brief View buffer contents as a std::string_view + /// \return std::string_view + explicit operator std::string_view() const { + return std::string_view(reinterpret_cast(data_), size_); } /// \brief View buffer contents as a util::bytes_view diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index 724db80eba732..fd159dd979793 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -204,8 +204,8 @@ Result> MyMemoryManager::ViewBufferTo( } // Like AssertBufferEqual, but doesn't call Buffer::data() -void AssertMyBufferEqual(const Buffer& buffer, util::string_view expected) { - ASSERT_EQ(util::string_view(buffer), expected); +void AssertMyBufferEqual(const Buffer& buffer, std::string_view expected) { + ASSERT_EQ(std::string_view(buffer), expected); } void AssertIsCPUBuffer(const Buffer& buf) { diff --git a/cpp/src/arrow/builder_benchmark.cc b/cpp/src/arrow/builder_benchmark.cc index 97745d4692e10..cf3e7f32d5eda 100644 --- a/cpp/src/arrow/builder_benchmark.cc +++ b/cpp/src/arrow/builder_benchmark.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "benchmark/benchmark.h" @@ -30,7 +31,6 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/bit_util.h" #include "arrow/util/decimal.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -55,7 +55,7 @@ constexpr int64_t kBytesProcessPerRound = kNumberOfElements * sizeof(ValueType); constexpr int64_t kBytesProcessed = kRounds * kBytesProcessPerRound; static const char* kBinaryString = "12345678"; -static arrow::util::string_view kBinaryView(kBinaryString); +static std::string_view kBinaryView(kBinaryString); static void BuildIntArrayNoNulls(benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index de531dbc6078d..2a7374fe6f153 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -40,7 +41,6 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/small_vector.h" -#include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" #include "arrow/visit_type_inline.h" @@ -666,7 +666,7 @@ namespace { static constexpr int64_t kMaxImportRecursionLevel = 64; -Status InvalidFormatString(util::string_view v) { +Status InvalidFormatString(std::string_view v) { return Status::Invalid("Invalid or unsupported format string: '", v, "'"); } @@ -674,13 +674,13 @@ class FormatStringParser { public: FormatStringParser() {} - explicit FormatStringParser(util::string_view v) : view_(v), index_(0) {} + explicit FormatStringParser(std::string_view v) : view_(v), index_(0) {} bool AtEnd() const { return index_ >= view_.length(); } char Next() { return view_[index_++]; } - util::string_view Rest() { return view_.substr(index_); } + std::string_view Rest() { return view_.substr(index_); } Status CheckNext(char c) { if (AtEnd() || Next() != c) { @@ -704,7 +704,7 @@ class FormatStringParser { } template - Result ParseInt(util::string_view v) { + Result ParseInt(std::string_view v) { using ArrowIntType = typename CTypeTraits::ArrowType; IntType value; if (!internal::ParseValue(v.data(), v.size(), &value)) { @@ -729,13 +729,13 @@ class FormatStringParser { } } - SmallVector Split(util::string_view v, char delim = ',') { - SmallVector parts; + SmallVector Split(std::string_view v, char delim = ',') { + SmallVector parts; size_t start = 0, end; while (true) { end = v.find_first_of(delim, start); parts.push_back(v.substr(start, end - start)); - if (end == util::string_view::npos) { + if (end == std::string_view::npos) { break; } start = end + 1; @@ -744,7 +744,7 @@ class FormatStringParser { } template - Result> ParseInts(util::string_view v) { + Result> ParseInts(std::string_view v) { auto parts = Split(v); std::vector result; result.reserve(parts.size()); @@ -758,7 +758,7 @@ class FormatStringParser { Status Invalid() { return InvalidFormatString(view_); } protected: - util::string_view view_; + std::string_view view_; size_t index_; }; diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index bb722c52b67e9..a54da82e10c07 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,6 @@ #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" namespace arrow { diff --git a/cpp/src/arrow/compute/exec/asof_join_node.cc b/cpp/src/arrow/compute/exec/asof_join_node.cc index 35e7b1c6cc6cc..09ef7d722d9ac 100644 --- a/cpp/src/arrow/compute/exec/asof_join_node.cc +++ b/cpp/src/arrow/compute/exec/asof_join_node.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/future.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" namespace arrow { namespace compute { @@ -1015,15 +1015,15 @@ class AsofJoinNode : public ExecNode { static inline Result FindColIndex(const Schema& schema, const FieldRef& field_ref, - util::string_view key_kind) { + std::string_view key_kind) { auto match_res = field_ref.FindOne(schema); if (!match_res.ok()) { return Status::Invalid("Bad join key on table : ", match_res.status().message()); } ARROW_ASSIGN_OR_RAISE(auto match, match_res); if (match.indices().size() != 1) { - return Status::Invalid("AsOfJoinNode does not support a nested ", - to_string(key_kind), "-key ", field_ref.ToString()); + return Status::Invalid("AsOfJoinNode does not support a nested ", key_kind, "-key ", + field_ref.ToString()); } return match.indices()[0]; } diff --git a/cpp/src/arrow/compute/exec/asof_join_node_test.cc b/cpp/src/arrow/compute/exec/asof_join_node_test.cc index 2e4bb06176a00..c8dbd27d7b678 100644 --- a/cpp/src/arrow/compute/exec/asof_join_node_test.cc +++ b/cpp/src/arrow/compute/exec/asof_join_node_test.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "arrow/api.h" @@ -34,7 +35,6 @@ #include "arrow/testing/random.h" #include "arrow/util/checked_cast.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "arrow/util/thread_pool.h" #define TRACED_TEST(t_class, t_name, t_body) \ @@ -69,7 +69,7 @@ bool is_temporal_primitive(Type::type type_id) { Result MakeBatchesFromNumString( const std::shared_ptr& schema, - const std::vector& json_strings, int multiplicity = 1) { + const std::vector& json_strings, int multiplicity = 1) { FieldVector num_fields; for (auto field : schema->fields()) { num_fields.push_back( @@ -413,12 +413,12 @@ struct BasicTestTypes { }; struct BasicTest { - BasicTest(const std::vector& l_data, - const std::vector& r0_data, - const std::vector& r1_data, - const std::vector& exp_nokey_data, - const std::vector& exp_emptykey_data, - const std::vector& exp_data, int64_t tolerance) + BasicTest(const std::vector& l_data, + const std::vector& r0_data, + const std::vector& r1_data, + const std::vector& exp_nokey_data, + const std::vector& exp_emptykey_data, + const std::vector& exp_data, int64_t tolerance) : l_data(std::move(l_data)), r0_data(std::move(r0_data)), r1_data(std::move(r1_data)), @@ -622,12 +622,12 @@ struct BasicTest { exp_emptykey_batches, exp_batches); } - std::vector l_data; - std::vector r0_data; - std::vector r1_data; - std::vector exp_nokey_data; - std::vector exp_emptykey_data; - std::vector exp_data; + std::vector l_data; + std::vector r0_data; + std::vector r1_data; + std::vector exp_nokey_data; + std::vector exp_emptykey_data; + std::vector exp_data; int64_t tolerance; }; diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc index d23838303f7fd..ff59977b6719d 100644 --- a/cpp/src/arrow/compute/exec/expression.cc +++ b/cpp/src/arrow/compute/exec/expression.cc @@ -40,6 +40,7 @@ namespace arrow { using internal::checked_cast; using internal::checked_pointer_cast; +using internal::EndsWith; namespace compute { @@ -117,8 +118,7 @@ std::string PrintDatum(const Datum& datum) { case Type::STRING: case Type::LARGE_STRING: return '"' + - Escape(util::string_view(*datum.scalar_as().value)) + - '"'; + Escape(std::string_view(*datum.scalar_as().value)) + '"'; case Type::BINARY: case Type::FIXED_SIZE_BINARY: @@ -163,8 +163,8 @@ std::string Expression::ToString() const { return binary(Comparison::GetOp(*cmp)); } - constexpr util::string_view kleene = "_kleene"; - if (util::string_view{call->function_name}.ends_with(kleene)) { + constexpr std::string_view kleene = "_kleene"; + if (EndsWith(call->function_name, kleene)) { auto op = call->function_name.substr(0, call->function_name.size() - kleene.size()); return binary(std::move(op)); } diff --git a/cpp/src/arrow/compute/exec/hash_join_dict.cc b/cpp/src/arrow/compute/exec/hash_join_dict.cc index 560b0ea8d4d89..4ce89446d3c77 100644 --- a/cpp/src/arrow/compute/exec/hash_join_dict.cc +++ b/cpp/src/arrow/compute/exec/hash_join_dict.cc @@ -127,7 +127,7 @@ static Result> ConvertImp( } else { const auto& scalar = input.scalar_as(); if (scalar.is_valid) { - const util::string_view data = scalar.view(); + const std::string_view data = scalar.view(); DCHECK_EQ(data.size(), sizeof(FROM)); const FROM from = *reinterpret_cast(data.data()); const TO to_value = static_cast(from); diff --git a/cpp/src/arrow/compute/exec/hash_join_node_test.cc b/cpp/src/arrow/compute/exec/hash_join_node_test.cc index b45af65445083..de3592ab086aa 100644 --- a/cpp/src/arrow/compute/exec/hash_join_node_test.cc +++ b/cpp/src/arrow/compute/exec/hash_join_node_test.cc @@ -42,7 +42,7 @@ namespace compute { BatchesWithSchema GenerateBatchesFromString( const std::shared_ptr& schema, - const std::vector& json_strings, int multiplicity = 1) { + const std::vector& json_strings, int multiplicity = 1) { BatchesWithSchema out_batches{{}, schema}; std::vector types; diff --git a/cpp/src/arrow/compute/exec/subtree_test.cc b/cpp/src/arrow/compute/exec/subtree_test.cc index 9e6e86dbd4fcf..908af3be7ef0e 100644 --- a/cpp/src/arrow/compute/exec/subtree_test.cc +++ b/cpp/src/arrow/compute/exec/subtree_test.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -26,9 +27,12 @@ #include "arrow/compute/exec/forest_internal.h" #include "arrow/compute/exec/subtree_internal.h" #include "arrow/testing/gtest_util.h" -#include "arrow/util/string_view.h" +#include "arrow/util/string.h" namespace arrow { + +using internal::StartsWith; + namespace compute { using testing::ContainerEq; @@ -94,18 +98,18 @@ struct TestPathTree { using PT = TestPathTree; -util::string_view RemoveTrailingSlash(util::string_view key) { +std::string_view RemoveTrailingSlash(std::string_view key) { while (!key.empty() && key.back() == '/') { key.remove_suffix(1); } return key; } -bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) { +bool IsAncestorOf(std::string_view ancestor, std::string_view descendant) { // See filesystem/path_util.h ancestor = RemoveTrailingSlash(ancestor); if (ancestor == "") return true; descendant = RemoveTrailingSlash(descendant); - if (!descendant.starts_with(ancestor)) return false; + if (!StartsWith(descendant, ancestor)) return false; descendant.remove_prefix(ancestor.size()); if (descendant.empty()) return true; return descendant.front() == '/'; diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc index 2abe6e9e0290f..efb91a708ab1a 100644 --- a/cpp/src/arrow/compute/exec/test_util.cc +++ b/cpp/src/arrow/compute/exec/test_util.cc @@ -142,8 +142,7 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector& types, - util::string_view json) { +ExecBatch ExecBatchFromJSON(const std::vector& types, std::string_view json) { auto fields = ::arrow::internal::MapVector( [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types); @@ -153,7 +152,7 @@ ExecBatch ExecBatchFromJSON(const std::vector& types, } ExecBatch ExecBatchFromJSON(const std::vector& types, - const std::vector& shapes, util::string_view json) { + const std::vector& shapes, std::string_view json) { DCHECK_EQ(types.size(), shapes.size()); ExecBatch batch = ExecBatchFromJSON(types, json); @@ -235,9 +234,9 @@ BatchesWithSchema MakeRandomBatches(const std::shared_ptr& schema, return out; } -BatchesWithSchema MakeBatchesFromString( - const std::shared_ptr& schema, - const std::vector& json_strings, int multiplicity) { +BatchesWithSchema MakeBatchesFromString(const std::shared_ptr& schema, + const std::vector& json_strings, + int multiplicity) { BatchesWithSchema out_batches{{}, schema}; std::vector types; diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h index 5b6e8226b7e84..ae7eac61e95ab 100644 --- a/cpp/src/arrow/compute/exec/test_util.h +++ b/cpp/src/arrow/compute/exec/test_util.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "arrow/compute/exec.h" @@ -31,7 +32,6 @@ #include "arrow/testing/visibility.h" #include "arrow/util/async_generator.h" #include "arrow/util/pcg_random.h" -#include "arrow/util/string_view.h" namespace arrow { namespace compute { @@ -45,7 +45,7 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector& types, util::string_view json); +ExecBatch ExecBatchFromJSON(const std::vector& types, std::string_view json); /// \brief Shape qualifier for value types. In certain instances /// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in @@ -54,7 +54,7 @@ enum class ArgShape { ANY, ARRAY, SCALAR }; ARROW_TESTING_EXPORT ExecBatch ExecBatchFromJSON(const std::vector& types, - const std::vector& shapes, util::string_view json); + const std::vector& shapes, std::string_view json); struct BatchesWithSchema { std::vector batches; @@ -109,9 +109,9 @@ BatchesWithSchema MakeRandomBatches(const std::shared_ptr& schema, int num_batches = 10, int batch_size = 4); ARROW_TESTING_EXPORT -BatchesWithSchema MakeBatchesFromString( - const std::shared_ptr& schema, - const std::vector& json_strings, int multiplicity = 1); +BatchesWithSchema MakeBatchesFromString(const std::shared_ptr& schema, + const std::vector& json_strings, + int multiplicity = 1); ARROW_TESTING_EXPORT Result> SortTableOnAllFields(const std::shared_ptr& tab); diff --git a/cpp/src/arrow/compute/exec/tpch_node_test.cc b/cpp/src/arrow/compute/exec/tpch_node_test.cc index 133dbfdf43c4a..dbc5b341d6007 100644 --- a/cpp/src/arrow/compute/exec/tpch_node_test.cc +++ b/cpp/src/arrow/compute/exec/tpch_node_test.cc @@ -17,6 +17,11 @@ #include +#include +#include +#include +#include + #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/test_util.h" #include "arrow/compute/exec/tpch_node.h" @@ -29,14 +34,13 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/make_unique.h" #include "arrow/util/pcg_random.h" +#include "arrow/util/string.h" #include "arrow/util/thread_pool.h" -#include -#include -#include -#include - namespace arrow { + +using internal::StartsWith; + namespace compute { namespace internal { @@ -94,10 +98,10 @@ void VerifyUniqueKey(std::unordered_set* seen, const Datum& d, int32_t } } -void VerifyStringAndNumber_Single(const util::string_view& row, - const util::string_view& prefix, const int64_t i, +void VerifyStringAndNumber_Single(const std::string_view& row, + const std::string_view& prefix, const int64_t i, const int32_t* nums, bool verify_padding) { - ASSERT_TRUE(row.starts_with(prefix)) << row << ", prefix=" << prefix << ", i=" << i; + ASSERT_TRUE(StartsWith(row, prefix)) << row << ", prefix=" << prefix << ", i=" << i; const char* num_str = row.data() + prefix.size(); const char* num_str_end = row.data() + row.size(); int64_t num = 0; @@ -124,7 +128,7 @@ void VerifyStringAndNumber_Single(const util::string_view& row, // corresponding row in numbers. Some TPC-H data is padded to 9 zeros, which this function // can optionally verify as well. This string function verifies fixed width columns. void VerifyStringAndNumber_FixedWidth(const Datum& strings, const Datum& numbers, - int byte_width, const util::string_view& prefix, + int byte_width, const std::string_view& prefix, bool verify_padding = true) { int64_t length = strings.length(); const char* str = reinterpret_cast(strings.array()->buffers[1]->data()); @@ -137,14 +141,14 @@ void VerifyStringAndNumber_FixedWidth(const Datum& strings, const Datum& numbers for (int64_t i = 0; i < length; i++) { const char* row = str + i * byte_width; - util::string_view view(row, byte_width); + std::string_view view(row, byte_width); VerifyStringAndNumber_Single(view, prefix, i, nums, verify_padding); } } // Same as above but for variable length columns void VerifyStringAndNumber_Varlen(const Datum& strings, const Datum& numbers, - const util::string_view& prefix, + const std::string_view& prefix, bool verify_padding = true) { int64_t length = strings.length(); const int32_t* offsets = @@ -160,7 +164,7 @@ void VerifyStringAndNumber_Varlen(const Datum& strings, const Datum& numbers, for (int64_t i = 0; i < length; i++) { int32_t start = offsets[i]; int32_t str_len = offsets[i + 1] - offsets[i]; - util::string_view view(str + start, str_len); + std::string_view view(str + start, str_len); VerifyStringAndNumber_Single(view, prefix, i, nums, verify_padding); } } @@ -253,7 +257,7 @@ void VerifyCorrectNumberOfWords_Varlen(const Datum& d, int num_words) { int32_t start = offsets[i]; int32_t end = offsets[i + 1]; int32_t str_len = end - start; - util::string_view view(str + start, str_len); + std::string_view view(str + start, str_len); bool is_only_alphas_or_spaces = true; for (const char& c : view) { bool is_space = c == ' '; @@ -300,14 +304,14 @@ void VerifyOneOf(const Datum& d, const std::unordered_set& possibilities) // Verifies that each fixed-width row is one of the possibilities void VerifyOneOf(const Datum& d, int32_t byte_width, - const std::unordered_set& possibilities) { + const std::unordered_set& possibilities) { int64_t length = d.length(); const char* col = reinterpret_cast(d.array()->buffers[1]->data()); for (int64_t i = 0; i < length; i++) { const char* row = col + i * byte_width; int32_t row_len = 0; while (row[row_len] && row_len < byte_width) row_len++; - util::string_view view(row, row_len); + std::string_view view(row, row_len); ASSERT_TRUE(possibilities.find(view) != possibilities.end()) << view << " is not a valid string."; } @@ -331,10 +335,10 @@ void CountModifiedComments(const Datum& d, int* good_count, int* bad_count) { for (int64_t i = 0; i < length; i++) { const char* row = str + offsets[i]; int32_t row_length = offsets[i + 1] - offsets[i]; - util::string_view view(row, row_length); - bool customer = view.find("Customer") != util::string_view::npos; - bool recommends = view.find("Recommends") != util::string_view::npos; - bool complaints = view.find("Complaints") != util::string_view::npos; + std::string_view view(row, row_length); + bool customer = view.find("Customer") != std::string_view::npos; + bool recommends = view.find("Recommends") != std::string_view::npos; + bool complaints = view.find("Complaints") != std::string_view::npos; if (customer) { ASSERT_TRUE(recommends ^ complaints); if (recommends) *good_count += 1; diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index 400ccbdf9f656..ce8b7e867ec06 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -233,11 +233,11 @@ void AddCountDistinctKernels(ScalarAggregateFunction* func) { AddCountDistinctKernel(day_time_interval(), func); AddCountDistinctKernel(month_day_nano_interval(), func); // Binary & String - AddCountDistinctKernel(match::BinaryLike(), func); - AddCountDistinctKernel(match::LargeBinaryLike(), - func); + AddCountDistinctKernel(match::BinaryLike(), func); + AddCountDistinctKernel(match::LargeBinaryLike(), + func); // Fixed binary & Decimal - AddCountDistinctKernel( + AddCountDistinctKernel( match::FixedSizeBinaryLike(), func); } diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h index bd2fe53460832..aa89f8dc3b492 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h +++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h @@ -360,7 +360,7 @@ struct MinMaxState> { return *this; } - void MergeOne(util::string_view value) { + void MergeOne(std::string_view value) { MergeOne(T(reinterpret_cast(value.data()))); } @@ -398,14 +398,14 @@ struct MinMaxStatemin = std::string(value); this->max = std::string(value); } else { - if (value < util::string_view(this->min)) { + if (value < std::string_view(this->min)) { this->min = std::string(value); - } else if (value > util::string_view(this->max)) { + } else if (value > std::string_view(this->max)) { this->max = std::string(value); } } diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index 8f400b2d249a2..c7ae70e21083c 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -942,12 +942,12 @@ class TestCountDistinctKernel : public ::testing::Test { CheckScalar("count_distinct", {input}, Expected(expected_all), &all); } - void Check(const std::shared_ptr& type, util::string_view json, + void Check(const std::shared_ptr& type, std::string_view json, int64_t expected_all, bool has_nulls = true) { Check(ArrayFromJSON(type, json), expected_all, has_nulls); } - void Check(const std::shared_ptr& type, util::string_view json) { + void Check(const std::shared_ptr& type, std::string_view json) { auto input = ScalarFromJSON(type, json); auto zero = ResultWith(Expected(0)); auto one = ResultWith(Expected(1)); diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index a20b4ce147609..b0001832174c7 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,6 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "arrow/visit_data_inline.h" namespace arrow { @@ -136,7 +136,7 @@ struct GetViewType> { template struct GetViewType::value || is_fixed_size_binary_type::value>> { - using T = util::string_view; + using T = std::string_view; using PhysicalType = T; static T LogicalValue(PhysicalType value) { return value; } @@ -145,7 +145,7 @@ struct GetViewType::value || template <> struct GetViewType { using T = Decimal128; - using PhysicalType = util::string_view; + using PhysicalType = std::string_view; static T LogicalValue(PhysicalType value) { return Decimal128(reinterpret_cast(value.data())); @@ -157,7 +157,7 @@ struct GetViewType { template <> struct GetViewType { using T = Decimal256; - using PhysicalType = util::string_view; + using PhysicalType = std::string_view; static T LogicalValue(PhysicalType value) { return Decimal256(reinterpret_cast(value.data())); @@ -271,9 +271,9 @@ struct ArrayIterator> { data(reinterpret_cast(arr.buffers[2].data)), position(0) {} - util::string_view operator()() { + std::string_view operator()() { offset_type next_offset = offsets[++position]; - auto result = util::string_view(data + cur_offset, next_offset - cur_offset); + auto result = std::string_view(data + cur_offset, next_offset - cur_offset); cur_offset = next_offset; return result; } @@ -292,8 +292,8 @@ struct ArrayIterator { width(arr.type->byte_width()), position(arr.offset) {} - util::string_view operator()() { - auto result = util::string_view(data + position * width, width); + std::string_view operator()() { + auto result = std::string_view(data + position * width, width); position++; return result; } @@ -331,7 +331,7 @@ template struct UnboxScalar> { using T = typename Type::c_type; static T Unbox(const Scalar& val) { - util::string_view view = + std::string_view view = checked_cast(val).view(); DCHECK_EQ(view.size(), sizeof(T)); return *reinterpret_cast(view.data()); @@ -340,9 +340,9 @@ struct UnboxScalar> { template struct UnboxScalar> { - using T = util::string_view; + using T = std::string_view; static T Unbox(const Scalar& val) { - if (!val.is_valid) return util::string_view(); + if (!val.is_valid) return std::string_view(); return checked_cast(val).view(); } }; @@ -401,7 +401,7 @@ struct BoxScalar { }; // A VisitArraySpanInline variant that calls its visitor function with logical -// values, such as Decimal128 rather than util::string_view. +// values, such as Decimal128 rather than std::string_view. template static typename ::arrow::internal::call_traits::enable_if_return::type diff --git a/cpp/src/arrow/compute/kernels/common.h b/cpp/src/arrow/compute/kernels/common.h index 21244320f3864..bf90d11451251 100644 --- a/cpp/src/arrow/compute/kernels/common.h +++ b/cpp/src/arrow/compute/kernels/common.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -42,7 +43,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" // IWYU pragma: end_exports diff --git a/cpp/src/arrow/compute/kernels/copy_data_internal.h b/cpp/src/arrow/compute/kernels/copy_data_internal.h index 2e13563980ca4..a4083e7e0650f 100644 --- a/cpp/src/arrow/compute/kernels/copy_data_internal.h +++ b/cpp/src/arrow/compute/kernels/copy_data_internal.h @@ -58,7 +58,7 @@ struct CopyDataUtils { if (!scalar.is_valid) { std::memset(begin, 0x00, width * length); } else { - const util::string_view buffer = scalar.view(); + const std::string_view buffer = scalar.view(); DCHECK_GE(buffer.size(), static_cast(width)); for (int i = 0; i < length; i++) { std::memcpy(begin, buffer.data(), width); diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 068fcab95e40e..f947cc732f77a 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -1373,7 +1373,7 @@ struct GroupedMinMaxImpl( batch, - [&](uint32_t g, util::string_view val) { + [&](uint32_t g, std::string_view val) { if (!mins_[g] || val < *mins_[g]) { mins_[g].emplace(val.data(), val.size(), allocator_); } @@ -2092,7 +2092,7 @@ struct GroupedOneImpl::value || Status Consume(const ExecSpan& batch) override { return VisitGroupedValues( batch, - [&](uint32_t g, util::string_view val) -> Status { + [&](uint32_t g, std::string_view val) -> Status { if (!bit_util::GetBit(has_one_.data(), g)) { ones_[g].emplace(val.data(), val.size(), allocator_); bit_util::SetBit(has_one_.mutable_data(), g); @@ -2419,7 +2419,7 @@ struct GroupedListImpl::value || num_args_ += num_values; return VisitGroupedValues( batch, - [&](uint32_t group, util::string_view val) -> Status { + [&](uint32_t group, std::string_view val) -> Status { values_.emplace_back(StringType(val.data(), val.size(), allocator_)); return Status::OK(); }, diff --git a/cpp/src/arrow/compute/kernels/row_encoder.cc b/cpp/src/arrow/compute/kernels/row_encoder.cc index 3ab6fc8c337b1..a38fa1db2059a 100644 --- a/cpp/src/arrow/compute/kernels/row_encoder.cc +++ b/cpp/src/arrow/compute/kernels/row_encoder.cc @@ -145,7 +145,7 @@ Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length, viewed.type = view_ty.get(); VisitArraySpanInline( viewed, - [&](util::string_view bytes) { + [&](std::string_view bytes) { auto& encoded_ptr = *encoded_bytes++; *encoded_ptr++ = kValidByte; memcpy(encoded_ptr, bytes.data(), byte_width_); @@ -160,7 +160,7 @@ Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length, } else { const auto& scalar = data.scalar_as(); if (scalar.is_valid) { - const util::string_view data = scalar.view(); + const std::string_view data = scalar.view(); DCHECK_EQ(data.size(), static_cast(byte_width_)); for (int64_t i = 0; i < batch_length; i++) { auto& encoded_ptr = *encoded_bytes++; diff --git a/cpp/src/arrow/compute/kernels/row_encoder.h b/cpp/src/arrow/compute/kernels/row_encoder.h index 139b1be4197d8..5fe80e0f50653 100644 --- a/cpp/src/arrow/compute/kernels/row_encoder.h +++ b/cpp/src/arrow/compute/kernels/row_encoder.h @@ -121,7 +121,7 @@ struct VarLengthKeyEncoder : KeyEncoder { int64_t i = 0; VisitArraySpanInline( data.array, - [&](util::string_view bytes) { + [&](std::string_view bytes) { lengths[i++] += kExtraByteForNull + sizeof(Offset) + static_cast(bytes.size()); }, @@ -146,7 +146,7 @@ struct VarLengthKeyEncoder : KeyEncoder { if (data.is_array()) { VisitArraySpanInline( data.array, - [&](util::string_view bytes) { + [&](std::string_view bytes) { auto& encoded_ptr = *encoded_bytes++; *encoded_ptr++ = kValidByte; util::SafeStore(encoded_ptr, static_cast(bytes.size())); diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index be8a445c74af3..7b74e8e5d6072 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -452,7 +452,7 @@ template std::string MakeArray(Elements... elements) { std::vector elements_as_strings = {std::to_string(elements)...}; - std::vector elements_as_views(sizeof...(Elements)); + std::vector elements_as_views(sizeof...(Elements)); std::copy(elements_as_strings.begin(), elements_as_strings.end(), elements_as_views.begin()); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 7a77b63e37a73..4e547ef6ccfbf 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -57,7 +57,7 @@ struct NumericToStringCastFunctor { RETURN_NOT_OK(VisitArraySpanInline( input, [&](value_type v) { - return formatter(v, [&](util::string_view v) { return builder.Append(v); }); + return formatter(v, [&](std::string_view v) { return builder.Append(v); }); }, [&]() { return builder.AppendNull(); })); @@ -84,7 +84,7 @@ struct TemporalToStringCastFunctor { RETURN_NOT_OK(VisitArraySpanInline( input, [&](value_type v) { - return formatter(v, [&](util::string_view v) { return builder.Append(v); }); + return formatter(v, [&](std::string_view v) { return builder.Append(v); }); }, [&]() { return builder.AppendNull(); })); @@ -126,7 +126,7 @@ struct TemporalToStringCastFunctor { RETURN_NOT_OK(VisitArraySpanInline( input, [&](value_type v) { - return formatter(v, [&](util::string_view v) { return builder.Append(v); }); + return formatter(v, [&](std::string_view v) { return builder.Append(v); }); }, [&]() { builder.UnsafeAppendNull(); @@ -196,7 +196,7 @@ struct TemporalToStringCastFunctor { struct Utf8Validator { Status VisitNull() { return Status::OK(); } - Status VisitValue(util::string_view str) { + Status VisitValue(std::string_view str) { if (ARROW_PREDICT_FALSE(!ValidateUTF8Inline(str))) { return Status::Invalid("Invalid UTF8 payload"); } diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc index 290a0e5df66a8..bbd579884770c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_compare.cc +++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc @@ -29,7 +29,7 @@ namespace arrow { using internal::checked_cast; using internal::checked_pointer_cast; -using util::string_view; +using std::string_view; namespace compute { namespace internal { diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc index 2b834ee2eb367..48fa780b03104 100644 --- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc @@ -42,7 +42,7 @@ using internal::BitmapReader; namespace compute { -using util::string_view; +using std::string_view; template static void ValidateCompare(CompareOptions options, const Datum& lhs, const Datum& rhs, @@ -136,7 +136,7 @@ Datum SimpleScalarArrayCompare(CompareOptions options, const Datum& const Datum& rhs) { bool swap = lhs.is_array(); auto array = std::static_pointer_cast((swap ? lhs : rhs).make_array()); - auto value = util::string_view( + auto value = std::string_view( *std::static_pointer_cast((swap ? rhs : lhs).scalar())->value); std::vector bitmap(array->length()); diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 8c941934a1eae..bb3ac6635e080 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -723,7 +723,7 @@ struct IfElseFunctor> { // ASA static Status Call(KernelContext* ctx, const ArraySpan& cond, const Scalar& left, const ArraySpan& right, ExecResult* out) { - util::string_view left_data = internal::UnboxScalar::Unbox(left); + std::string_view left_data = internal::UnboxScalar::Unbox(left); auto left_size = static_cast(left_data.size()); const auto* right_offsets = right.GetValues(1); @@ -754,7 +754,7 @@ struct IfElseFunctor> { const auto* left_offsets = left.GetValues(1); const uint8_t* left_data = left.buffers[2].data; - util::string_view right_data = internal::UnboxScalar::Unbox(right); + std::string_view right_data = internal::UnboxScalar::Unbox(right); auto right_size = static_cast(right_data.size()); // allocate data buffer conservatively @@ -779,10 +779,10 @@ struct IfElseFunctor> { // ASS static Status Call(KernelContext* ctx, const ArraySpan& cond, const Scalar& left, const Scalar& right, ExecResult* out) { - util::string_view left_data = internal::UnboxScalar::Unbox(left); + std::string_view left_data = internal::UnboxScalar::Unbox(left); auto left_size = static_cast(left_data.size()); - util::string_view right_data = internal::UnboxScalar::Unbox(right); + std::string_view right_data = internal::UnboxScalar::Unbox(right); auto right_size = static_cast(right_data.size()); // allocate data buffer conservatively @@ -2314,9 +2314,9 @@ struct CoalesceFunctor> { } RETURN_NOT_OK(builder.ReserveData(static_cast(data_reserve))); - util::string_view fill_value(*scalar.value); + std::string_view fill_value(*scalar.value); VisitArraySpanInline( - left, [&](util::string_view s) { builder.UnsafeAppend(s); }, + left, [&](std::string_view s) { builder.UnsafeAppend(s); }, [&]() { builder.UnsafeAppend(fill_value); }); ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish()); diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc index 86b7a5597a04f..9d8e33b1d04e7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc @@ -818,7 +818,7 @@ TEST_F(TestIndexInKernel, BinaryResizeTable) { char buf[kBufSize] = "test"; ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0); - input_builder.UnsafeAppend(util::string_view(buf)); + input_builder.UnsafeAppend(std::string_view(buf)); expected_builder.UnsafeAppend(index); } diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc index c362cfa8d99f8..db5eca79d96cd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc @@ -26,9 +26,14 @@ #include "arrow/array/builder_nested.h" #include "arrow/compute/kernels/scalar_string_internal.h" +#include "arrow/util/string.h" #include "arrow/util/value_parsing.h" namespace arrow { + +using internal::EndsWith; +using internal::StartsWith; + namespace compute { namespace internal { @@ -38,11 +43,11 @@ namespace { // re2 utilities #ifdef ARROW_WITH_RE2 -util::string_view ToStringView(re2::StringPiece piece) { +std::string_view ToStringView(re2::StringPiece piece) { return {piece.data(), piece.length()}; } -re2::StringPiece ToStringPiece(util::string_view view) { +re2::StringPiece ToStringPiece(std::string_view view) { return {view.data(), view.length()}; } @@ -261,7 +266,7 @@ struct StringBinaryTransformExecBase { // Apply transform RETURN_NOT_OK(VisitArraySpanInline( data1, - [&](util::string_view input_string_view) { + [&](std::string_view input_string_view) { auto input_ncodeunits = static_cast(input_string_view.length()); auto input_string = reinterpret_cast(input_string_view.data()); ARROW_ASSIGN_OR_RAISE( @@ -844,7 +849,7 @@ void AddAsciiStringCaseConversion(FunctionRegistry* registry) { // Binary string length struct BinaryLength { - template + template static OutValue Call(KernelContext*, Arg0Value val, Status*) { return static_cast(val.size()); } @@ -1238,7 +1243,7 @@ struct PlainSubstringMatcher { } } - int64_t Find(util::string_view current) const { + int64_t Find(std::string_view current) const { // Phase 2: Find the prefix in the data const auto pattern_length = options_.pattern.size(); int64_t pattern_pos = 0; @@ -1257,7 +1262,7 @@ struct PlainSubstringMatcher { return -1; } - bool Match(util::string_view current) const { return Find(current) >= 0; } + bool Match(std::string_view current) const { return Find(current) >= 0; } }; struct PlainStartsWithMatcher { @@ -1273,9 +1278,8 @@ struct PlainStartsWithMatcher { return ::arrow::internal::make_unique(options); } - bool Match(util::string_view current) const { - // string_view::starts_with is C++20 - return current.substr(0, options_.pattern.size()) == options_.pattern; + bool Match(std::string_view current) const { + return StartsWith(current, options_.pattern); } }; @@ -1292,11 +1296,8 @@ struct PlainEndsWithMatcher { return ::arrow::internal::make_unique(options); } - bool Match(util::string_view current) const { - // string_view::ends_with is C++20 - return current.size() >= options_.pattern.size() && - current.substr(current.size() - options_.pattern.size(), - options_.pattern.size()) == options_.pattern; + bool Match(std::string_view current) const { + return EndsWith(current, options_.pattern); } }; @@ -1319,7 +1320,7 @@ struct RegexSubstringMatcher { regex_match_(options_.pattern, MakeRE2Options(is_utf8, options.ignore_case, literal)) {} - bool Match(util::string_view current) const { + bool Match(std::string_view current) const { auto piece = re2::StringPiece(current.data(), current.length()); return RE2::PartialMatch(piece, regex_match_); } @@ -1341,7 +1342,7 @@ struct MatchSubstringImpl { for (int64_t i = 0; i < length; ++i) { const char* current_data = reinterpret_cast(data + offsets[i]); int64_t current_length = offsets[i + 1] - offsets[i]; - if (matcher->Match(util::string_view(current_data, current_length))) { + if (matcher->Match(std::string_view(current_data, current_length))) { bitmap_writer.Set(); } bitmap_writer.Next(); @@ -1660,7 +1661,7 @@ struct FindSubstring { explicit FindSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {} template - OutValue Call(KernelContext*, util::string_view val, Status*) const { + OutValue Call(KernelContext*, std::string_view val, Status*) const { return static_cast(matcher_.Find(val)); } }; @@ -1680,7 +1681,7 @@ struct FindSubstringRegex { } template - OutValue Call(KernelContext*, util::string_view val, Status*) const { + OutValue Call(KernelContext*, std::string_view val, Status*) const { re2::StringPiece piece(val.data(), val.length()); re2::StringPiece match; if (RE2::PartialMatch(piece, *regex_match_, &match)) { @@ -1781,7 +1782,7 @@ struct CountSubstring { explicit CountSubstring(PlainSubstringMatcher matcher) : matcher_(std::move(matcher)) {} template - OutValue Call(KernelContext*, util::string_view val, Status*) const { + OutValue Call(KernelContext*, std::string_view val, Status*) const { OutValue count = 0; uint64_t start = 0; const auto pattern_size = std::max(1, matcher_.options_.pattern.size()); @@ -1815,7 +1816,7 @@ struct CountSubstringRegex { } template - OutValue Call(KernelContext*, util::string_view val, Status*) const { + OutValue Call(KernelContext*, std::string_view val, Status*) const { OutValue count = 0; re2::StringPiece input(val.data(), val.size()); auto last_size = input.size(); @@ -1950,7 +1951,7 @@ struct ReplaceSubstring { RETURN_NOT_OK(VisitArraySpanInline( batch[0].array, - [&](util::string_view s) { + [&](std::string_view s) { RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder)); offset_builder.UnsafeAppend( static_cast(value_data_builder.length())); @@ -1979,9 +1980,13 @@ struct PlainSubstringReplacer { explicit PlainSubstringReplacer(const ReplaceSubstringOptions& options) : options_(options) {} - Status ReplaceString(util::string_view s, TypedBufferBuilder* builder) const { - const char* i = s.begin(); - const char* end = s.end(); + Status ReplaceString(std::string_view s, TypedBufferBuilder* builder) const { + if (s.empty()) { + // Special-case empty input as s.data() may not be a valid pointer + return Status::OK(); + } + const char* i = s.data(); + const char* end = s.data() + s.length(); int64_t max_replacements = options_.max_replacements; while ((i < end) && (max_replacements != 0)) { const char* pos = @@ -2040,11 +2045,15 @@ struct RegexSubstringReplacer { regex_find_("(" + options_.pattern + ")", MakeRE2Options()), regex_replacement_(options_.pattern, MakeRE2Options()) {} - Status ReplaceString(util::string_view s, TypedBufferBuilder* builder) const { + Status ReplaceString(std::string_view s, TypedBufferBuilder* builder) const { + if (s.empty()) { + // Special-case empty input as s.data() may not be a valid pointer + return Status::OK(); + } re2::StringPiece replacement(options_.replacement); if (options_.max_replacements == -1) { - std::string s_copy(s.to_string()); + std::string s_copy(s); RE2::GlobalReplace(&s_copy, regex_replacement_, replacement); return builder->Append(reinterpret_cast(s_copy.data()), s_copy.length()); @@ -2053,8 +2062,8 @@ struct RegexSubstringReplacer { // Since RE2 does not have the concept of max_replacements, we have to do some work // ourselves. // We might do this faster similar to RE2::GlobalReplace using Match and Rewrite - const char* i = s.begin(); - const char* end = s.end(); + const char* i = s.data(); + const char* end = s.data() + s.length(); re2::StringPiece piece(s.data(), s.length()); int64_t max_replacements = options_.max_replacements; @@ -2228,7 +2237,7 @@ struct ExtractRegexBase { args_pointers_start = (group_count > 0) ? args_pointers.data() : &null_arg; } - bool Match(util::string_view s) { + bool Match(std::string_view s) { return RE2::PartialMatchN(ToStringPiece(s), *data.regex, args_pointers_start, group_count); } @@ -2266,7 +2275,7 @@ struct ExtractRegex : public ExtractRegexBase { } auto visit_null = [&]() { return struct_builder->AppendNull(); }; - auto visit_value = [&](util::string_view s) { + auto visit_value = [&](std::string_view s) { if (Match(s)) { for (int i = 0; i < group_count; i++) { RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i]))); @@ -2669,17 +2678,17 @@ struct BinaryJoin { }; struct SeparatorScalarLookup { - const util::string_view separator; + const std::string_view separator; bool IsNull(int64_t i) { return false; } - util::string_view GetView(int64_t i) { return separator; } + std::string_view GetView(int64_t i) { return separator; } }; struct SeparatorArrayLookup { const ArrayType& separators; bool IsNull(int64_t i) { return separators.IsNull(i); } - util::string_view GetView(int64_t i) { return separators.GetView(i); } + std::string_view GetView(int64_t i) { return separators.GetView(i); } }; // Scalar, array -> array @@ -2742,7 +2751,7 @@ struct BinaryJoin { return Status::OK(); } - util::string_view separator(*separator_scalar.value); + std::string_view separator(*separator_scalar.value); const auto& strings = checked_cast(*lists.values()); const auto list_offsets = lists.raw_value_offsets(); @@ -2795,7 +2804,7 @@ struct BinaryJoin { const ArrayType& separators; bool IsNull(int64_t i) { return separators.IsNull(i); } - util::string_view GetView(int64_t i) { return separators.GetView(i); } + std::string_view GetView(int64_t i) { return separators.GetView(i); } }; return JoinStrings(lists.length(), strings, ListArrayOffsetLookup{lists}, SeparatorArrayLookup{separators}, &builder, out); @@ -2868,7 +2877,7 @@ struct BinaryJoinElementWise { RETURN_NOT_OK(builder.Reserve(batch.length)); RETURN_NOT_OK(builder.ReserveData(final_size)); - std::vector valid_cols(batch.num_values()); + std::vector valid_cols(batch.num_values()); for (int64_t row = 0; row < batch.length; row++) { int num_valid = 0; // Not counting separator for (int col = 0; col < batch.num_values(); col++) { @@ -2878,7 +2887,7 @@ struct BinaryJoinElementWise { valid_cols[col] = UnboxScalar::Unbox(scalar); if (col < batch.num_values() - 1) num_valid++; } else { - valid_cols[col] = util::string_view(); + valid_cols[col] = std::string_view(); } } else { const ArraySpan& array = batch[col].array; @@ -2887,11 +2896,11 @@ struct BinaryJoinElementWise { const offset_type* offsets = array.GetValues(1); const uint8_t* data = array.GetValues(2, /*absolute_offset=*/0); const int64_t length = offsets[row + 1] - offsets[row]; - valid_cols[col] = util::string_view( + valid_cols[col] = std::string_view( reinterpret_cast(data + offsets[row]), length); if (col < batch.num_values() - 1) num_valid++; } else { - valid_cols[col] = util::string_view(); + valid_cols[col] = std::string_view(); } } } @@ -2914,7 +2923,7 @@ struct BinaryJoinElementWise { const auto separator = valid_cols.back(); bool first = true; for (int col = 0; col < batch.num_values() - 1; col++) { - util::string_view value = valid_cols[col]; + std::string_view value = valid_cols[col]; if (!value.data()) { switch (options.null_handling) { case JoinOptions::EMIT_NULL: diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h index 32731414e089b..defd7c3715794 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h +++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h @@ -224,7 +224,7 @@ struct StringPredicateFunctor { ArraySpan* out_arr = out->array_span(); ::arrow::internal::GenerateBitsUnrolled( out_arr->buffers[1].data, out_arr->offset, input.length, [&]() -> bool { - util::string_view val = input_it(); + std::string_view val = input_it(); return Predicate::Call(ctx, reinterpret_cast(val.data()), val.size(), &st); }); @@ -307,7 +307,7 @@ struct StringSplitExec { using State = OptionsWrapper; // Keep the temporary storage accross individual values, to minimize reallocations - std::vector parts; + std::vector parts; Options options; explicit StringSplitExec(const Options& options) : options(options) {} @@ -351,7 +351,7 @@ struct StringSplitExec { return Status::OK(); } - Status SplitString(const util::string_view& s, SplitFinder* finder, + Status SplitString(const std::string_view& s, SplitFinder* finder, BuilderType* builder) { const uint8_t* begin = reinterpret_cast(s.data()); const uint8_t* end = begin + s.length(); diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc index 4b3191c825dc5..fb197e13a688b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc @@ -524,7 +524,7 @@ struct Utf8NormalizeBase { // Try to decompose the given UTF8 string into the codepoints space, // returning the number of codepoints output. - Result DecomposeIntoScratch(util::string_view v) { + Result DecomposeIntoScratch(std::string_view v) { auto decompose = [&]() { return utf8proc_decompose(reinterpret_cast(v.data()), v.size(), @@ -544,7 +544,7 @@ struct Utf8NormalizeBase { return res; } - Result Decompose(util::string_view v, BufferBuilder* data_builder) { + Result Decompose(std::string_view v, BufferBuilder* data_builder) { if (::arrow::util::ValidateAscii(v)) { // Fast path: normalization is a no-op RETURN_NOT_OK(data_builder->Append(v.data(), v.size())); @@ -623,7 +623,7 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase { RETURN_NOT_OK(VisitArraySpanInline( array, - [&](util::string_view v) { + [&](std::string_view v) { ARROW_ASSIGN_OR_RAISE(auto n_bytes, exec.Decompose(v, &data_builder)); offset += n_bytes; *out_offsets++ = static_cast(offset); @@ -656,7 +656,7 @@ void AddUtf8StringNormalize(FunctionRegistry* registry) { // String length struct Utf8Length { - template + template static OutValue Call(KernelContext*, Arg0Value val, Status*) { auto str = reinterpret_cast(val.data()); auto strlen = val.size(); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index d7c045d84b079..c0dc747e497a5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -1265,7 +1265,7 @@ struct Strptime { out_writer.Next(); null_count++; }; - auto visit_value = [&](util::string_view s) { + auto visit_value = [&](std::string_view s) { int64_t result; if ((*self.parser)(s.data(), s.size(), self.unit, &result)) { *out_data++ = result; @@ -1292,7 +1292,7 @@ struct Strptime { *out_data++ = 0; return Status::OK(); }; - auto visit_value = [&](util::string_view s) { + auto visit_value = [&](std::string_view s) { int64_t result; if ((*self.parser)(s.data(), s.size(), self.unit, &result)) { *out_data++ = result; diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index c8b5173b8d9ac..c294992d27f0e 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -517,7 +517,7 @@ struct HashKernelTraits> { template struct HashKernelTraits> { - using HashKernel = RegularHashKernel; + using HashKernel = RegularHashKernel; }; template diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index f98af93eef366..a58825abdab17 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -37,7 +37,7 @@ namespace arrow { using internal::checked_cast; using internal::checked_pointer_cast; -using util::string_view; +using std::string_view; namespace compute { diff --git a/cpp/src/arrow/csv/chunker.cc b/cpp/src/arrow/csv/chunker.cc index dc863579db03c..bc1b69cb8ae89 100644 --- a/cpp/src/arrow/csv/chunker.cc +++ b/cpp/src/arrow/csv/chunker.cc @@ -20,13 +20,13 @@ #include #include #include +#include #include #include "arrow/csv/lexing_internal.h" #include "arrow/status.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" namespace arrow { namespace csv { @@ -269,7 +269,7 @@ class LexingBoundaryFinder : public BoundaryFinder { explicit LexingBoundaryFinder(ParseOptions options) : options_(std::move(options)), lexer_(options_) {} - Status FindFirst(util::string_view partial, util::string_view block, + Status FindFirst(std::string_view partial, std::string_view block, int64_t* out_pos) override { lexer_.Reset(); if (lexer_.ShouldUseBulkFilter(block.data(), block.data() + block.size())) { @@ -280,7 +280,7 @@ class LexingBoundaryFinder : public BoundaryFinder { } template - Status FindFirstInternal(util::string_view partial, util::string_view block, + Status FindFirstInternal(std::string_view partial, std::string_view block, int64_t* out_pos) { const char* line_end = lexer_.template ReadLine( partial.data(), partial.data() + partial.size()); @@ -298,7 +298,7 @@ class LexingBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindLast(util::string_view block, int64_t* out_pos) override { + Status FindLast(std::string_view block, int64_t* out_pos) override { lexer_.Reset(); if (lexer_.ShouldUseBulkFilter(block.data(), block.data() + block.size())) { return FindLastInternal(block, out_pos); @@ -308,7 +308,7 @@ class LexingBoundaryFinder : public BoundaryFinder { } template - Status FindLastInternal(util::string_view block, int64_t* out_pos) { + Status FindLastInternal(std::string_view block, int64_t* out_pos) { const char* data = block.data(); const char* const data_end = block.data() + block.size(); @@ -331,7 +331,7 @@ class LexingBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindNth(util::string_view partial, util::string_view block, int64_t count, + Status FindNth(std::string_view partial, std::string_view block, int64_t count, int64_t* out_pos, int64_t* num_found) override { lexer_.Reset(); diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc index c07eddffd4305..a06686954ed3e 100644 --- a/cpp/src/arrow/csv/converter.cc +++ b/cpp/src/arrow/csv/converter.cc @@ -125,8 +125,8 @@ struct ValueDecoder { if (quoted && !options_.quoted_strings_can_be_null) { return false; } - return null_trie_.Find( - util::string_view(reinterpret_cast(data), size)) >= 0; + return null_trie_.Find(std::string_view(reinterpret_cast(data), size)) >= + 0; } protected: @@ -166,7 +166,7 @@ struct FixedSizeBinaryValueDecoder : public ValueDecoder { template struct BinaryValueDecoder : public ValueDecoder { - using value_type = util::string_view; + using value_type = std::string_view; using ValueDecoder::ValueDecoder; @@ -252,12 +252,12 @@ struct BooleanValueDecoder : public ValueDecoder { Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) { // XXX should quoted values be allowed at all? - if (false_trie_.Find(util::string_view(reinterpret_cast(data), size)) >= + if (false_trie_.Find(std::string_view(reinterpret_cast(data), size)) >= 0) { *out = false; return Status::OK(); } - if (ARROW_PREDICT_TRUE(true_trie_.Find(util::string_view( + if (ARROW_PREDICT_TRUE(true_trie_.Find(std::string_view( reinterpret_cast(data), size)) >= 0)) { *out = true; return Status::OK(); @@ -288,7 +288,7 @@ struct DecimalValueDecoder : public ValueDecoder { TrimWhiteSpace(&data, &size); Decimal128 decimal; int32_t precision, scale; - util::string_view view(reinterpret_cast(data), size); + std::string_view view(reinterpret_cast(data), size); RETURN_NOT_OK(Decimal128::FromString(view, &decimal, &precision, &scale)); if (precision > type_precision_) { return Status::Invalid("Error converting '", view, "' to ", type_->ToString(), diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc index c32b07d2de40c..ea4e171d57e71 100644 --- a/cpp/src/arrow/csv/converter_test.cc +++ b/cpp/src/arrow/csv/converter_test.cc @@ -655,7 +655,7 @@ TEST(TimestampConversion, UserDefinedParsersWithZone) { AssertConversionError(type, {"01/02/1970,1970-01-03T00:00:00+0000\n"}, {0}, options); } -Decimal128 Dec128(util::string_view value) { +Decimal128 Dec128(std::string_view value) { Decimal128 dec; int32_t scale = 0; int32_t precision = 0; diff --git a/cpp/src/arrow/csv/invalid_row.h b/cpp/src/arrow/csv/invalid_row.h index 8a07b568a352e..4360ceaaea6ac 100644 --- a/cpp/src/arrow/csv/invalid_row.h +++ b/cpp/src/arrow/csv/invalid_row.h @@ -18,8 +18,7 @@ #pragma once #include - -#include "arrow/util/string_view.h" +#include namespace arrow { namespace csv { @@ -36,7 +35,7 @@ struct InvalidRow { /// CSV header rows). int64_t number; /// \brief View of the entire row. Memory will be freed after callback returns - const util::string_view text; + const std::string_view text; }; /// \brief Result returned by an InvalidRowHandler diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc index 8b060df254065..da3472a9d9a35 100644 --- a/cpp/src/arrow/csv/parser.cc +++ b/cpp/src/arrow/csv/parser.cc @@ -212,7 +212,7 @@ class BlockParserImpl { batch_.num_rows_ + batch_.num_skipped_rows(); InvalidRow row{batch_.num_cols_, num_cols, first_row_ < 0 ? -1 : first_row_ + batch_row_including_skipped, - util::string_view(start, end - start)}; + std::string_view(start, end - start)}; if (options_.invalid_row_handler && options_.invalid_row_handler(row) == InvalidRowResult::Skip) { @@ -508,7 +508,7 @@ class BlockParserImpl { } template - Status ParseSpecialized(const std::vector& views, bool is_final, + Status ParseSpecialized(const std::vector& views, bool is_final, uint32_t* out_size) { internal::PreferredBulkFilterType bulk_filter(options_); @@ -604,7 +604,7 @@ class BlockParserImpl { return Status::OK(); } - Status Parse(const std::vector& data, bool is_final, + Status Parse(const std::vector& data, bool is_final, uint32_t* out_size) { if (options_.quoting) { if (options_.escaping) { @@ -651,21 +651,20 @@ BlockParser::BlockParser(MemoryPool* pool, ParseOptions options, int32_t num_col BlockParser::~BlockParser() {} -Status BlockParser::Parse(const std::vector& data, - uint32_t* out_size) { +Status BlockParser::Parse(const std::vector& data, uint32_t* out_size) { return impl_->Parse(data, false /* is_final */, out_size); } -Status BlockParser::ParseFinal(const std::vector& data, +Status BlockParser::ParseFinal(const std::vector& data, uint32_t* out_size) { return impl_->Parse(data, true /* is_final */, out_size); } -Status BlockParser::Parse(util::string_view data, uint32_t* out_size) { +Status BlockParser::Parse(std::string_view data, uint32_t* out_size) { return impl_->Parse({data}, false /* is_final */, out_size); } -Status BlockParser::ParseFinal(util::string_view data, uint32_t* out_size) { +Status BlockParser::ParseFinal(std::string_view data, uint32_t* out_size) { return impl_->Parse({data}, true /* is_final */, out_size); } diff --git a/cpp/src/arrow/csv/parser.h b/cpp/src/arrow/csv/parser.h index fb003faaff6bb..e257d315e3050 100644 --- a/cpp/src/arrow/csv/parser.h +++ b/cpp/src/arrow/csv/parser.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "arrow/buffer.h" @@ -28,7 +29,6 @@ #include "arrow/csv/type_fwd.h" #include "arrow/status.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -169,23 +169,23 @@ class ARROW_EXPORT BlockParser { /// /// Parse a block of CSV data, ingesting up to max_num_rows rows. /// The number of bytes actually parsed is returned in out_size. - Status Parse(util::string_view data, uint32_t* out_size); + Status Parse(std::string_view data, uint32_t* out_size); /// \brief Parse sequential blocks of data /// /// Only the last block is allowed to be truncated. - Status Parse(const std::vector& data, uint32_t* out_size); + Status Parse(const std::vector& data, uint32_t* out_size); /// \brief Parse the final block of data /// /// Like Parse(), but called with the final block in a file. /// The last row may lack a trailing line separator. - Status ParseFinal(util::string_view data, uint32_t* out_size); + Status ParseFinal(std::string_view data, uint32_t* out_size); /// \brief Parse the final sequential blocks of data /// /// Only the last block is allowed to be truncated. - Status ParseFinal(const std::vector& data, uint32_t* out_size); + Status ParseFinal(const std::vector& data, uint32_t* out_size); /// \brief Return the number of parsed rows int32_t num_rows() const { return parsed_batch().num_rows(); } diff --git a/cpp/src/arrow/csv/parser_benchmark.cc b/cpp/src/arrow/csv/parser_benchmark.cc index 84495fc542eb0..203cfa4ea02b4 100644 --- a/cpp/src/arrow/csv/parser_benchmark.cc +++ b/cpp/src/arrow/csv/parser_benchmark.cc @@ -20,12 +20,12 @@ #include #include #include +#include #include "arrow/csv/chunker.h" #include "arrow/csv/options.h" #include "arrow/csv/parser.h" #include "arrow/testing/gtest_util.h" -#include "arrow/util/string_view.h" namespace arrow { namespace csv { @@ -77,7 +77,7 @@ static std::string BuildCSVData(const Example& example) { static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const reference const std::string& csv, ParseOptions options) { auto chunker = MakeChunker(options); - auto block = std::make_shared(util::string_view(csv)); + auto block = std::make_shared(std::string_view(csv)); while (state.KeepRunning()) { std::shared_ptr whole, partial; @@ -161,7 +161,7 @@ static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const re while (state.KeepRunning()) { uint32_t parsed_size = 0; - ABORT_NOT_OK(parser.Parse(util::string_view(csv), &parsed_size)); + ABORT_NOT_OK(parser.Parse(std::string_view(csv), &parsed_size)); // Include performance of visiting the parsed values, as that might // vary depending on the parser's internal data structures. diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc index 3fb2f11387df0..960a69c59db5d 100644 --- a/cpp/src/arrow/csv/parser_test.cc +++ b/cpp/src/arrow/csv/parser_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -120,7 +121,7 @@ void GetLastRow(const BlockParser& parser, std::vector* out, } } -size_t TotalViewLength(const std::vector& views) { +size_t TotalViewLength(const std::vector& views) { size_t total_view_length = 0; for (const auto& view : views) { total_view_length += view.length(); @@ -128,12 +129,19 @@ size_t TotalViewLength(const std::vector& views) { return total_view_length; } +std::vector Views(const std::vector& strings) { + std::vector views(strings.size()); + std::transform(strings.begin(), strings.end(), views.begin(), + [](const std::string& s) { return std::string_view(s); }); + return views; +} + Status Parse(BlockParser& parser, const std::string& str, uint32_t* out_size) { - return parser.Parse(util::string_view(str), out_size); + return parser.Parse(std::string_view(str), out_size); } Status ParseFinal(BlockParser& parser, const std::string& str, uint32_t* out_size) { - return parser.ParseFinal(util::string_view(str), out_size); + return parser.ParseFinal(std::string_view(str), out_size); } void AssertParseOk(BlockParser& parser, const std::string& str) { @@ -142,7 +150,7 @@ void AssertParseOk(BlockParser& parser, const std::string& str) { ASSERT_EQ(parsed_size, str.size()); } -void AssertParseOk(BlockParser& parser, const std::vector& data) { +void AssertParseOk(BlockParser& parser, const std::vector& data) { uint32_t parsed_size = static_cast(-1); ASSERT_OK(parser.Parse(data, &parsed_size)); ASSERT_EQ(parsed_size, TotalViewLength(data)); @@ -154,7 +162,7 @@ void AssertParseFinal(BlockParser& parser, const std::string& str) { ASSERT_EQ(parsed_size, str.size()); } -void AssertParseFinal(BlockParser& parser, const std::vector& data) { +void AssertParseFinal(BlockParser& parser, const std::vector& data) { uint32_t parsed_size = static_cast(-1); ASSERT_OK(parser.ParseFinal(data, &parsed_size)); ASSERT_EQ(parsed_size, TotalViewLength(data)); @@ -167,15 +175,16 @@ void AssertParsePartial(BlockParser& parser, const std::string& str, ASSERT_EQ(parsed_size, expected_size); } -void AssertLastRowEq(const BlockParser& parser, const std::vector expected) { +void AssertLastRowEq(const BlockParser& parser, + const std::vector& expected) { std::vector values; GetLastRow(parser, &values); ASSERT_EQ(parser.num_rows(), expected.size()); ASSERT_EQ(values, expected); } -void AssertLastRowEq(const BlockParser& parser, const std::vector expected, - const std::vector expected_quoted) { +void AssertLastRowEq(const BlockParser& parser, const std::vector& expected, + const std::vector& expected_quoted) { std::vector values; std::vector quoted; GetLastRow(parser, &values, "ed); @@ -185,7 +194,7 @@ void AssertLastRowEq(const BlockParser& parser, const std::vector e } void AssertColumnEq(const BlockParser& parser, int32_t col_index, - const std::vector expected) { + const std::vector& expected) { std::vector values; GetColumn(parser, col_index, &values); ASSERT_EQ(parser.num_rows(), expected.size()); @@ -193,8 +202,8 @@ void AssertColumnEq(const BlockParser& parser, int32_t col_index, } void AssertColumnEq(const BlockParser& parser, int32_t col_index, - const std::vector expected, - const std::vector expected_quoted) { + const std::vector& expected, + const std::vector& expected_quoted) { std::vector values; std::vector quoted; GetColumn(parser, col_index, &values, "ed); @@ -204,7 +213,7 @@ void AssertColumnEq(const BlockParser& parser, int32_t col_index, } void AssertColumnsEq(const BlockParser& parser, - const std::vector> expected) { + const std::vector>& expected) { ASSERT_EQ(parser.num_cols(), expected.size()); for (int32_t col_index = 0; col_index < parser.num_cols(); ++col_index) { AssertColumnEq(parser, col_index, expected[col_index]); @@ -212,8 +221,8 @@ void AssertColumnsEq(const BlockParser& parser, } void AssertColumnsEq(const BlockParser& parser, - const std::vector> expected, - const std::vector> quoted) { + const std::vector>& expected, + const std::vector>& quoted) { ASSERT_EQ(parser.num_cols(), expected.size()); for (int32_t col_index = 0; col_index < parser.num_cols(); ++col_index) { AssertColumnEq(parser, col_index, expected[col_index], quoted[col_index]); @@ -238,9 +247,9 @@ TEST(BlockParser, Basics) { { auto csv1 = MakeCSVData({"ab,cd,\n", "ef,,gh\n"}); auto csv2 = MakeCSVData({",ij,kl\n"}); - std::vector csvs = {csv1, csv2}; + std::vector csvs = {csv1, csv2}; BlockParser parser(ParseOptions::Defaults()); - AssertParseOk(parser, {{csv1}, {csv2}}); + AssertParseOk(parser, csvs); AssertColumnsEq(parser, {{"ab", "ef", ""}, {"cd", "", "ij"}, {"", "gh", "kl"}}); AssertLastRowEq(parser, {"", "ij", "kl"}, {false, false, false}); } @@ -392,7 +401,8 @@ TEST(BlockParser, Final) { // Two blocks auto csv1 = MakeCSVData({"ab,cd\n"}); auto csv2 = MakeCSVData({"ef,"}); - AssertParseFinal(parser, {{csv1}, {csv2}}); + std::vector csvs = {csv1, csv2}; + AssertParseFinal(parser, csvs); AssertColumnsEq(parser, {{"ab", "ef"}, {"cd", ""}}); } @@ -596,7 +606,7 @@ TEST(BlockParser, MismatchingNumColumnsHandler) { operator InvalidRowHandler() { return [this](const InvalidRow& row) { // Copy the row to a string since the array behind the string_view can go away - rows.emplace_back(row, row.text.to_string()); + rows.emplace_back(row, row.text); return InvalidRowResult::Skip; }; } diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc index d770fa734f561..fdc7fcb1380e5 100644 --- a/cpp/src/arrow/csv/reader.cc +++ b/cpp/src/arrow/csv/reader.cc @@ -406,7 +406,7 @@ class BlockParsingOperator { io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows); std::shared_ptr straddling; - std::vector views; + std::vector views; if (block.partial->size() != 0 || block.completion->size() != 0) { if (block.partial->size() == 0) { straddling = block.completion; @@ -417,9 +417,9 @@ class BlockParsingOperator { straddling, ConcatenateBuffers({block.partial, block.completion}, io_context_.pool())); } - views = {util::string_view(*straddling), util::string_view(*block.buffer)}; + views = {std::string_view(*straddling), std::string_view(*block.buffer)}; } else { - views = {util::string_view(*block.buffer)}; + views = {std::string_view(*block.buffer)}; } uint32_t parsed_size; if (block.is_final) { @@ -588,7 +588,7 @@ class ReaderMixin { num_rows_seen_, 1); uint32_t parsed_size = 0; RETURN_NOT_OK(parser.Parse( - util::string_view(reinterpret_cast(data), data_end - data), + std::string_view(reinterpret_cast(data), data_end - data), &parsed_size)); if (parser.num_rows() != 1) { return Status::Invalid( @@ -718,7 +718,7 @@ class ReaderMixin { io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows); std::shared_ptr straddling; - std::vector views; + std::vector views; if (partial->size() != 0 || completion->size() != 0) { if (partial->size() == 0) { straddling = completion; @@ -728,9 +728,9 @@ class ReaderMixin { ARROW_ASSIGN_OR_RAISE( straddling, ConcatenateBuffers({partial, completion}, io_context_.pool())); } - views = {util::string_view(*straddling), util::string_view(*block)}; + views = {std::string_view(*straddling), std::string_view(*block)}; } else { - views = {util::string_view(*block)}; + views = {std::string_view(*block)}; } uint32_t parsed_size; if (is_final) { diff --git a/cpp/src/arrow/csv/test_common.cc b/cpp/src/arrow/csv/test_common.cc index 6ba4ff2e3cfeb..648ad18e3c6a0 100644 --- a/cpp/src/arrow/csv/test_common.cc +++ b/cpp/src/arrow/csv/test_common.cc @@ -35,7 +35,7 @@ void MakeCSVParser(std::vector lines, ParseOptions options, int32_t auto csv = MakeCSVData(lines); auto parser = std::make_shared(options, num_cols); uint32_t out_size; - ASSERT_OK(parser->Parse(util::string_view(csv), &out_size)); + ASSERT_OK(parser->Parse(std::string_view(csv), &out_size)); ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed"; *out = parser; } diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc index 95c2e03a10caf..bb8d555a789f5 100644 --- a/cpp/src/arrow/csv/writer.cc +++ b/cpp/src/arrow/csv/writer.cc @@ -99,7 +99,7 @@ RecordBatchIterator RecordBatchSliceIterator(const RecordBatch& batch, } // Counts the number of quotes in s. -int64_t CountQuotes(arrow::util::string_view s) { +int64_t CountQuotes(std::string_view s) { return static_cast(std::count(s.begin(), s.end(), '"')); } @@ -155,7 +155,7 @@ class ColumnPopulator { // Copies the contents of s to out properly escaping any necessary characters. // Returns the position next to last copied character. -char* Escape(arrow::util::string_view s, char* out) { +char* Escape(std::string_view s, char* out) { for (const char c : s) { *out++ = c; if (c == '"') { @@ -189,7 +189,7 @@ class UnquotedColumnPopulator : public ColumnPopulator { int64_t row_number = 0; VisitArraySpanInline( *casted_array_->data(), - [&](arrow::util::string_view s) { + [&](std::string_view s) { row_lengths[row_number] += static_cast(s.length()); row_number++; }, @@ -202,7 +202,7 @@ class UnquotedColumnPopulator : public ColumnPopulator { Status PopulateRows(char* output, int64_t* offsets) const override { // Function applied to valid values cast to string. - auto valid_function = [&](arrow::util::string_view s) { + auto valid_function = [&](std::string_view s) { memcpy(output + *offsets, s.data(), s.length()); CopyEndChars(output + *offsets + s.length(), end_chars_.c_str(), end_chars_.size()); *offsets += static_cast(s.length() + end_chars_.size()); @@ -290,7 +290,7 @@ class QuotedColumnPopulator : public ColumnPopulator { int row_number = 0; VisitArraySpanInline( *input.data(), - [&](arrow::util::string_view s) { + [&](std::string_view s) { row_lengths[row_number] += static_cast(s.length()) + kQuoteCount; row_number++; }, @@ -302,7 +302,7 @@ class QuotedColumnPopulator : public ColumnPopulator { int row_number = 0; VisitArraySpanInline( *input.data(), - [&](arrow::util::string_view s) { + [&](std::string_view s) { // Each quote in the value string needs to be escaped. int64_t escaped_count = CountQuotes(s); row_needs_escaping_[row_number] = escaped_count > 0; @@ -322,7 +322,7 @@ class QuotedColumnPopulator : public ColumnPopulator { auto needs_escaping = row_needs_escaping_.begin(); VisitArraySpanInline( *(casted_array_->data()), - [&](arrow::util::string_view s) { + [&](std::string_view s) { // still needs string content length to be added char* row = output + *offsets; *row++ = '"'; diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc index bad363b381817..d8e00054e1c0b 100644 --- a/cpp/src/arrow/dataset/dataset_writer.cc +++ b/cpp/src/arrow/dataset/dataset_writer.cc @@ -38,7 +38,7 @@ namespace internal { namespace { -constexpr util::string_view kIntegerToken = "{i}"; +constexpr std::string_view kIntegerToken = "{i}"; class Throttle { public: @@ -414,16 +414,16 @@ class DatasetWriterDirectoryQueue { uint32_t file_counter_ = 0; }; -Status ValidateBasenameTemplate(util::string_view basename_template) { - if (basename_template.find(fs::internal::kSep) != util::string_view::npos) { +Status ValidateBasenameTemplate(std::string_view basename_template) { + if (basename_template.find(fs::internal::kSep) != std::string_view::npos) { return Status::Invalid("basename_template contained '/'"); } size_t token_start = basename_template.find(kIntegerToken); - if (token_start == util::string_view::npos) { + if (token_start == std::string_view::npos) { return Status::Invalid("basename_template did not contain '", kIntegerToken, "'"); } size_t next_token_start = basename_template.find(kIntegerToken, token_start + 1); - if (next_token_start != util::string_view::npos) { + if (next_token_start != std::string_view::npos) { return Status::Invalid("basename_template contained '", kIntegerToken, "' more than once"); } diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc index edc9bc8bbc13d..6c9c292739399 100644 --- a/cpp/src/arrow/dataset/dataset_writer_test.cc +++ b/cpp/src/arrow/dataset/dataset_writer_test.cc @@ -130,7 +130,7 @@ class DatasetWriterTestFixture : public testing::Test { << "The file " << expected_path << " was not in the list of files visited"; } - std::shared_ptr ReadAsBatch(util::string_view data, int* num_batches) { + std::shared_ptr ReadAsBatch(std::string_view data, int* num_batches) { std::shared_ptr in_stream = std::make_shared(data); EXPECT_OK_AND_ASSIGN(std::shared_ptr reader, diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc index 25fa7ff2b70ce..a38ec00bb916d 100644 --- a/cpp/src/arrow/dataset/discovery.cc +++ b/cpp/src/arrow/dataset/discovery.cc @@ -30,8 +30,12 @@ #include "arrow/dataset/type_fwd.h" #include "arrow/filesystem/path_util.h" #include "arrow/util/logging.h" +#include "arrow/util/string.h" namespace arrow { + +using internal::StartsWith; + namespace dataset { DatasetFactory::DatasetFactory() : root_partition_(compute::literal(true)) {} @@ -158,10 +162,9 @@ bool StartsWithAnyOf(const std::string& path, const std::vector& pr } auto parts = fs::internal::SplitAbstractPath(path); - return std::any_of(parts.cbegin(), parts.cend(), [&](util::string_view part) { - return std::any_of(prefixes.cbegin(), prefixes.cend(), [&](util::string_view prefix) { - return util::string_view(part).starts_with(prefix); - }); + return std::any_of(parts.cbegin(), parts.cend(), [&](std::string_view part) { + return std::any_of(prefixes.cbegin(), prefixes.cend(), + [&](std::string_view prefix) { return StartsWith(part, prefix); }); }); } diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc index bfc710105ed4a..be963338b4007 100644 --- a/cpp/src/arrow/dataset/file_csv.cc +++ b/cpp/src/arrow/dataset/file_csv.cc @@ -56,13 +56,13 @@ using RecordBatchGenerator = std::function>( Result> GetColumnNames( const csv::ReadOptions& read_options, const csv::ParseOptions& parse_options, - util::string_view first_block, MemoryPool* pool) { + std::string_view first_block, MemoryPool* pool) { // Skip BOM when reading column names (ARROW-14644, ARROW-17382) auto size = first_block.length(); const uint8_t* data = reinterpret_cast(first_block.data()); ARROW_ASSIGN_OR_RAISE(auto data_no_bom, util::SkipUTF8BOM(data, size)); size = size - static_cast(data_no_bom - data); - first_block = util::string_view(reinterpret_cast(data_no_bom), size); + first_block = std::string_view(reinterpret_cast(data_no_bom), size); if (!read_options.column_names.empty()) { std::unordered_set column_names; for (const auto& s : read_options.column_names) { @@ -78,7 +78,7 @@ Result> GetColumnNames( csv::BlockParser parser(pool, parse_options, /*num_cols=*/-1, /*first_row=*/1, max_num_rows); - RETURN_NOT_OK(parser.Parse(util::string_view{first_block}, &parsed_size)); + RETURN_NOT_OK(parser.Parse(std::string_view{first_block}, &parsed_size)); if (parser.num_rows() != max_num_rows) { return Status::Invalid("Could not read first ", max_num_rows, @@ -104,7 +104,7 @@ Result> GetColumnNames( RETURN_NOT_OK( parser.VisitLastRow([&](const uint8_t* data, uint32_t size, bool quoted) -> Status { - util::string_view view{reinterpret_cast(data), size}; + std::string_view view{reinterpret_cast(data), size}; if (column_names.emplace(std::string(view)).second) { return Status::OK(); } @@ -116,7 +116,7 @@ Result> GetColumnNames( static inline Result GetConvertOptions( const CsvFileFormat& format, const ScanOptions* scan_options, - const util::string_view first_block) { + const std::string_view first_block) { ARROW_ASSIGN_OR_RAISE( auto csv_scan_options, GetFragmentScanOptions( diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc index a9744d0aabf04..48594336878a4 100644 --- a/cpp/src/arrow/dataset/partition.cc +++ b/cpp/src/arrow/dataset/partition.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -37,7 +38,6 @@ #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "arrow/util/uri.h" #include "arrow/util/utf8.h" @@ -45,7 +45,7 @@ namespace arrow { using internal::checked_cast; using internal::checked_pointer_cast; -using util::string_view; +using std::string_view; using internal::DictionaryMemoTable; @@ -53,7 +53,7 @@ namespace dataset { namespace { /// Apply UriUnescape, then ensure the results are valid UTF-8. -Result SafeUriUnescape(util::string_view encoded) { +Result SafeUriUnescape(std::string_view encoded) { auto decoded = ::arrow::internal::UriUnescape(encoded); if (!util::ValidateUTF8(decoded)) { return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ", @@ -482,7 +482,7 @@ class KeyValuePartitioningFactory : public PartitioningFactory { } } - Status InsertRepr(int index, util::string_view repr) { + Status InsertRepr(int index, std::string_view repr) { int dummy; return repr_memos_[index]->GetOrInsert(repr, &dummy); } @@ -738,9 +738,9 @@ Result> HivePartitioning::ParseKey( break; } case SegmentEncoding::Uri: { - auto raw_value = util::string_view(segment).substr(name_end + 1); + auto raw_value = std::string_view(segment).substr(name_end + 1); ARROW_ASSIGN_OR_RAISE(value, SafeUriUnescape(raw_value)); - auto raw_key = util::string_view(segment).substr(0, name_end); + auto raw_key = std::string_view(segment).substr(0, name_end); ARROW_ASSIGN_OR_RAISE(name, SafeUriUnescape(raw_key)); break; } diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index 0768014b8626f..6cafe10f78a5d 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -44,7 +44,6 @@ #include "arrow/util/vector.h" using testing::ElementsAre; -using testing::IsEmpty; using testing::UnorderedElementsAreArray; namespace arrow { @@ -1265,11 +1264,11 @@ TEST(ScanOptions, TestMaterializedFields) { // empty dataset, project nothing = nothing materialized opts->dataset_schema = schema({}); set_projection_from_names({}); - EXPECT_THAT(opts->MaterializedFields(), IsEmpty()); + ASSERT_EQ(opts->MaterializedFields().size(), 0); // non-empty dataset, project nothing = nothing materialized opts->dataset_schema = schema({i32, i64}); - EXPECT_THAT(opts->MaterializedFields(), IsEmpty()); + ASSERT_EQ(opts->MaterializedFields().size(), 0); // project nothing, filter on i32 = materialize i32 opts->filter = equal(field_ref("i32"), literal(10)); diff --git a/cpp/src/arrow/dataset/test_util.h b/cpp/src/arrow/dataset/test_util.h index fb54dc3a91ab8..e17afd25a34fc 100644 --- a/cpp/src/arrow/dataset/test_util.h +++ b/cpp/src/arrow/dataset/test_util.h @@ -508,11 +508,11 @@ class FileFormatFixtureMixin : public ::testing::Test { bool supported = false; - std::shared_ptr buf = std::make_shared(util::string_view("")); + std::shared_ptr buf = std::make_shared(std::string_view("")); ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf))); ASSERT_EQ(supported, false); - buf = std::make_shared(util::string_view("corrupted")); + buf = std::make_shared(std::string_view("corrupted")); ASSERT_OK_AND_ASSIGN(supported, format_->IsSupported(FileSource(buf))); ASSERT_EQ(supported, false); @@ -985,7 +985,7 @@ class JSONRecordBatchFileFormat : public FileFormat { ARROW_ASSIGN_OR_RAISE(auto buffer, file->Read(size)); ARROW_ASSIGN_OR_RAISE(auto schema, Inspect(fragment->source())); - RecordBatchVector batches{RecordBatchFromJSON(schema, util::string_view{*buffer})}; + RecordBatchVector batches{RecordBatchFromJSON(schema, std::string_view{*buffer})}; return MakeVectorGenerator(std::move(batches)); } @@ -1479,7 +1479,7 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin { } auto expected_struct = ArrayFromJSON(struct_(expected_physical_schema_->fields()), - {file_contents->second}); + file_contents->second); AssertArraysEqual(*expected_struct, *actual_struct, /*verbose=*/true); } diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h b/cpp/src/arrow/engine/simple_extension_type_internal.h index 66d86088a76c6..c3f0226283d5f 100644 --- a/cpp/src/arrow/engine/simple_extension_type_internal.h +++ b/cpp/src/arrow/engine/simple_extension_type_internal.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "arrow/extension_type.h" @@ -41,7 +42,7 @@ namespace engine { /// Note: The serialization is a very barebones JSON-like format and /// probably shouldn't be hand-edited -template GetStorage(const Params&)> class SimpleExtensionType : public ExtensionType { @@ -67,7 +68,7 @@ class SimpleExtensionType : public ExtensionType { return &::arrow::internal::checked_cast(type).params_; } - std::string extension_name() const override { return kExtensionName.to_string(); } + std::string extension_name() const override { return std::string(kExtensionName); } std::string ToString() const override { return "extension<" + this->Serialize() + ">"; } @@ -101,16 +102,15 @@ class SimpleExtensionType : public ExtensionType { } struct DeserializeImpl { - explicit DeserializeImpl(util::string_view repr) { + explicit DeserializeImpl(std::string_view repr) { Init(kExtensionName, repr, kProperties->size()); kProperties->ForEach(*this); } void Fail() { params_ = std::nullopt; } - void Init(util::string_view class_name, util::string_view repr, - size_t num_properties) { - if (!repr.starts_with(class_name)) return Fail(); + void Init(std::string_view class_name, std::string_view repr, size_t num_properties) { + if (!::arrow::internal::StartsWith(repr, class_name)) return Fail(); repr = repr.substr(class_name.size()); if (repr.empty()) return Fail(); @@ -127,7 +127,7 @@ class SimpleExtensionType : public ExtensionType { if (!params_) return; auto first_colon = members_[i].find_first_of(':'); - if (first_colon == util::string_view::npos) return Fail(); + if (first_colon == std::string_view::npos) return Fail(); auto name = members_[i].substr(0, first_colon); if (name != prop.name()) return Fail(); @@ -135,7 +135,7 @@ class SimpleExtensionType : public ExtensionType { auto value_repr = members_[i].substr(first_colon + 1); typename Property::Type value; try { - std::stringstream ss(value_repr.to_string()); + std::stringstream ss{std::string{value_repr}}; ss >> value; if (!ss.eof()) return Fail(); } catch (...) { @@ -145,7 +145,7 @@ class SimpleExtensionType : public ExtensionType { } std::optional params_; - std::vector members_; + std::vector members_; }; Result> Deserialize( std::shared_ptr storage_type, @@ -179,7 +179,7 @@ class SimpleExtensionType : public ExtensionType { } std::string Finish() { - return kExtensionName.to_string() + "{" + + return std::string(kExtensionName) + "{" + ::arrow::internal::JoinStrings(members_, ",") + "}"; } diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index ec0578828a60d..6f181ac0218c4 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -906,10 +906,9 @@ Result> EncodeSubstraitCa substrait::FunctionArgument* arg = scalar_fn->add_arguments(); if (call.HasEnumArg(i)) { auto enum_val = internal::make_unique(); - ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, - call.GetEnumArg(i)); + ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, call.GetEnumArg(i)); if (enum_arg) { - enum_val->set_specified(enum_arg->to_string()); + enum_val->set_specified(std::string(*enum_arg)); } else { enum_val->set_allocated_unspecified(new google::protobuf::Empty()); } diff --git a/cpp/src/arrow/engine/substrait/ext_test.cc b/cpp/src/arrow/engine/substrait/ext_test.cc index 4b37aa8fcdba3..525af7c946471 100644 --- a/cpp/src/arrow/engine/substrait/ext_test.cc +++ b/cpp/src/arrow/engine/substrait/ext_test.cc @@ -68,7 +68,7 @@ bool operator!=(const Id& id1, const Id& id2) { return !(id1 == id2); } struct TypeName { std::shared_ptr type; - util::string_view name; + std::string_view name; }; static const std::vector kTypeNames = { @@ -87,7 +87,7 @@ static const std::vector kFunctionIds = { {kSubstraitArithmeticFunctionsUri, "add"}, }; -static const std::vector kTempFunctionNames = { +static const std::vector kTempFunctionNames = { "temp_func_1", "temp_func_2", }; @@ -156,7 +156,7 @@ TEST_P(ExtensionIdRegistryTest, ReregisterFunctions) { for (Id function_id : kFunctionIds) { ASSERT_RAISES(Invalid, registry->CanAddSubstraitCallToArrow(function_id)); ASSERT_RAISES(Invalid, registry->AddSubstraitCallToArrow( - function_id, function_id.name.to_string())); + function_id, std::string(function_id.name))); } } @@ -206,12 +206,12 @@ TEST(ExtensionIdRegistryTest, RegisterTempFunctions) { for (int i = 0; i < rounds; i++) { auto registry = MakeExtensionIdRegistry(); - for (util::string_view name : kTempFunctionNames) { + for (std::string_view name : kTempFunctionNames) { auto id = Id{kArrowExtTypesUri, name}; ASSERT_OK(registry->CanAddSubstraitCallToArrow(id)); - ASSERT_OK(registry->AddSubstraitCallToArrow(id, name.to_string())); + ASSERT_OK(registry->AddSubstraitCallToArrow(id, std::string(name))); ASSERT_RAISES(Invalid, registry->CanAddSubstraitCallToArrow(id)); - ASSERT_RAISES(Invalid, registry->AddSubstraitCallToArrow(id, name.to_string())); + ASSERT_RAISES(Invalid, registry->AddSubstraitCallToArrow(id, std::string(name))); ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id)); } } @@ -248,8 +248,8 @@ TEST(ExtensionIdRegistryTest, RegisterNestedTypes) { } TEST(ExtensionIdRegistryTest, RegisterNestedFunctions) { - util::string_view name1 = kTempFunctionNames[0]; - util::string_view name2 = kTempFunctionNames[1]; + std::string_view name1 = kTempFunctionNames[0]; + std::string_view name2 = kTempFunctionNames[1]; auto id1 = Id{kArrowExtTypesUri, name1}; auto id2 = Id{kArrowExtTypesUri, name2}; @@ -259,20 +259,20 @@ TEST(ExtensionIdRegistryTest, RegisterNestedFunctions) { auto registry1 = MakeExtensionIdRegistry(); ASSERT_OK(registry1->CanAddSubstraitCallToArrow(id1)); - ASSERT_OK(registry1->AddSubstraitCallToArrow(id1, name1.to_string())); + ASSERT_OK(registry1->AddSubstraitCallToArrow(id1, std::string(name1))); for (int j = 0; j < rounds; j++) { auto registry2 = MakeExtensionIdRegistry(); ASSERT_OK(registry2->CanAddSubstraitCallToArrow(id2)); - ASSERT_OK(registry2->AddSubstraitCallToArrow(id2, name2.to_string())); + ASSERT_OK(registry2->AddSubstraitCallToArrow(id2, std::string(name2))); ASSERT_RAISES(Invalid, registry2->CanAddSubstraitCallToArrow(id2)); - ASSERT_RAISES(Invalid, registry2->AddSubstraitCallToArrow(id2, name2.to_string())); + ASSERT_RAISES(Invalid, registry2->AddSubstraitCallToArrow(id2, std::string(name2))); ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id2)); } ASSERT_RAISES(Invalid, registry1->CanAddSubstraitCallToArrow(id1)); - ASSERT_RAISES(Invalid, registry1->AddSubstraitCallToArrow(id1, name1.to_string())); + ASSERT_RAISES(Invalid, registry1->AddSubstraitCallToArrow(id1, std::string(name1))); ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id1)); } } diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index f7fcd1e1279d1..2f7c85c9d5ccf 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -25,7 +25,6 @@ #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" namespace arrow { namespace engine { @@ -68,9 +67,9 @@ bool IdHashEq::operator()(Id l, Id r) const { return l.uri == r.uri && l.name == class IdStorageImpl : public IdStorage { public: Id Emplace(Id id) override { - util::string_view owned_uri = EmplaceUri(id.uri); + std::string_view owned_uri = EmplaceUri(id.uri); - util::string_view owned_name; + std::string_view owned_name; auto name_itr = names_.find(id.name); if (name_itr == names_.end()) { owned_names_.emplace_back(id.name); @@ -84,7 +83,7 @@ class IdStorageImpl : public IdStorage { } std::optional Find(Id id) const override { - std::optional maybe_owned_uri = FindUri(id.uri); + std::optional maybe_owned_uri = FindUri(id.uri); if (!maybe_owned_uri) { return std::nullopt; } @@ -97,7 +96,7 @@ class IdStorageImpl : public IdStorage { } } - std::optional FindUri(util::string_view uri) const override { + std::optional FindUri(std::string_view uri) const override { auto uri_itr = uris_.find(uri); if (uri_itr == uris_.end()) { return std::nullopt; @@ -105,11 +104,11 @@ class IdStorageImpl : public IdStorage { return *uri_itr; } - util::string_view EmplaceUri(util::string_view uri) override { + std::string_view EmplaceUri(std::string_view uri) override { auto uri_itr = uris_.find(uri); if (uri_itr == uris_.end()) { owned_uris_.emplace_back(uri); - util::string_view owned_uri = owned_uris_.back(); + std::string_view owned_uri = owned_uris_.back(); uris_.insert(owned_uri); return owned_uri; } @@ -117,8 +116,8 @@ class IdStorageImpl : public IdStorage { } private: - std::unordered_set uris_; - std::unordered_set names_; + std::unordered_set uris_; + std::unordered_set names_; std::list owned_uris_; std::list owned_names_; }; @@ -127,7 +126,7 @@ std::unique_ptr IdStorage::Make() { return ::arrow::internal::make_unique(); } -Result> SubstraitCall::GetEnumArg(uint32_t index) const { +Result> SubstraitCall::GetEnumArg(uint32_t index) const { if (index >= size_) { return Status::Invalid("Expected Substrait call to have an enum argument at index ", index, " but it did not have enough arguments"); @@ -176,10 +175,10 @@ void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) { // a map of what Ids we have seen. ExtensionSet::ExtensionSet(const ExtensionIdRegistry* registry) : registry_(registry) {} -Status ExtensionSet::CheckHasUri(util::string_view uri) { +Status ExtensionSet::CheckHasUri(std::string_view uri) { auto it = std::find_if(uris_.begin(), uris_.end(), - [&uri](const std::pair& anchor_uri_pair) { + [&uri](const std::pair& anchor_uri_pair) { return anchor_uri_pair.second == uri; }); if (it != uris_.end()) return Status::OK(); @@ -189,10 +188,10 @@ Status ExtensionSet::CheckHasUri(util::string_view uri) { " was referenced by an extension but was not declared in the ExtensionSet."); } -void ExtensionSet::AddUri(std::pair uri) { +void ExtensionSet::AddUri(std::pair uri) { auto it = std::find_if(uris_.begin(), uris_.end(), - [&uri](const std::pair& anchor_uri_pair) { + [&uri](const std::pair& anchor_uri_pair) { return anchor_uri_pair.second == uri.second; }); if (it != uris_.end()) return; @@ -211,14 +210,14 @@ Status ExtensionSet::AddUri(Id id) { // Creates an extension set from the Substrait plan's top-level extensions block Result ExtensionSet::Make( - std::unordered_map uris, + std::unordered_map uris, std::unordered_map type_ids, std::unordered_map function_ids, const ExtensionIdRegistry* registry) { ExtensionSet set(default_extension_id_registry()); set.registry_ = registry; for (auto& uri : uris) { - std::optional maybe_uri_internal = registry->FindUri(uri.second); + std::optional maybe_uri_internal = registry->FindUri(uri.second); if (maybe_uri_internal) { set.uris_[uri.first] = *maybe_uri_internal; } else { @@ -324,9 +323,9 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { virtual ~ExtensionIdRegistryImpl() {} - std::optional FindUri(util::string_view uri) const override { + std::optional FindUri(std::string_view uri) const override { if (parent_) { - std::optional parent_uri = parent_->FindUri(uri); + std::optional parent_uri = parent_->FindUri(uri); if (parent_uri) { return parent_uri; } @@ -620,7 +619,7 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { }; template -using EnumParser = std::function(std::optional)>; +using EnumParser = std::function(std::optional)>; template EnumParser GetEnumParser(const std::vector& options) { @@ -628,12 +627,12 @@ EnumParser GetEnumParser(const std::vector& options) { for (std::size_t i = 0; i < options.size(); i++) { parse_map[options[i]] = static_cast(i + 1); } - return [parse_map](std::optional enum_val) -> Result { + return [parse_map](std::optional enum_val) -> Result { if (!enum_val) { // Assumes 0 is always kUnspecified in Enum return static_cast(0); } - auto maybe_parsed = parse_map.find(enum_val->to_string()); + auto maybe_parsed = parse_map.find(std::string(*enum_val)); if (maybe_parsed == parse_map.end()) { return Status::Invalid("The value ", *enum_val, " is not an expected enum value"); } @@ -655,7 +654,7 @@ static EnumParser kOverflowParser = template Result ParseEnumArg(const SubstraitCall& call, uint32_t arg_index, const EnumParser& parser) { - ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, + ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, call.GetEnumArg(arg_index)); return parser(enum_arg); } @@ -808,7 +807,7 @@ struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { // ----------- Extension Types ---------------------------- struct TypeName { std::shared_ptr type; - util::string_view name; + std::string_view name; }; // The type (variation) mappings listed below need to be kept in sync @@ -847,14 +846,14 @@ struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { } // Basic binary mappings for (const auto& function_name : - std::vector>{ + std::vector>{ {kSubstraitBooleanFunctionsUri, "xor"}, {kSubstraitComparisonFunctionsUri, "equal"}, {kSubstraitComparisonFunctionsUri, "not_equal"}}) { - DCHECK_OK( - AddSubstraitCallToArrow({function_name.first, function_name.second}, - DecodeOptionlessBasicMapping( - function_name.second.to_string(), /*max_args=*/2))); + DCHECK_OK(AddSubstraitCallToArrow( + {function_name.first, function_name.second}, + DecodeOptionlessBasicMapping(std::string(function_name.second), + /*max_args=*/2))); } for (const auto& uri : {kSubstraitComparisonFunctionsUri, kSubstraitDatetimeFunctionsUri}) { diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 46c83b81d1644..4df8952ff9a89 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include "arrow/result.h" #include "arrow/type_fwd.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" namespace arrow { namespace engine { @@ -60,7 +60,7 @@ constexpr const char* kSubstraitAggregateGenericFunctionsUri = "functions_aggregate_generic.yaml"; struct Id { - util::string_view uri, name; + std::string_view uri, name; bool empty() const { return uri.empty() && name.empty(); } std::string ToString() const; }; @@ -86,7 +86,7 @@ class IdStorage { /// \brief Get an equivalent view pointing into this storage for a URI /// /// If no URI is found then the uri will be copied into storage - virtual util::string_view EmplaceUri(util::string_view uri) = 0; + virtual std::string_view EmplaceUri(std::string_view uri) = 0; /// \brief Get an equivalent id pointing into this storage /// /// If no id is found then nullopt will be returned @@ -94,7 +94,7 @@ class IdStorage { /// \brief Get an equivalent view pointing into this storage for a URI /// /// If no URI is found then nullopt will be returned - virtual std::optional FindUri(util::string_view uri) const = 0; + virtual std::optional FindUri(std::string_view uri) const = 0; static std::unique_ptr Make(); }; @@ -119,7 +119,7 @@ class SubstraitCall { bool is_hash() const { return is_hash_; } bool HasEnumArg(uint32_t index) const; - Result> GetEnumArg(uint32_t index) const; + Result> GetEnumArg(uint32_t index) const; void SetEnumArg(uint32_t index, std::optional enum_arg); Result GetValueArg(uint32_t index) const; bool HasValueArg(uint32_t index) const; @@ -174,7 +174,7 @@ class ARROW_ENGINE_EXPORT ExtensionIdRegistry { /// \brief Return a uri view owned by this registry /// /// If the URI has never been emplaced it will return nullopt - virtual std::optional FindUri(util::string_view uri) const = 0; + virtual std::optional FindUri(std::string_view uri) const = 0; /// \brief Return a id view owned by this registry /// /// If the id has never been emplaced it will return nullopt @@ -255,7 +255,7 @@ class ARROW_ENGINE_EXPORT ExtensionIdRegistry { Id substrait_function_id) const = 0; }; -constexpr util::string_view kArrowExtTypesUri = +constexpr std::string_view kArrowExtTypesUri = "https://github.com/apache/arrow/blob/master/format/substrait/" "extension_types.yaml"; @@ -309,7 +309,7 @@ class ARROW_ENGINE_EXPORT ExtensionSet { public: struct FunctionRecord { Id id; - util::string_view name; + std::string_view name; }; struct TypeRecord { @@ -336,12 +336,12 @@ class ARROW_ENGINE_EXPORT ExtensionSet { /// An extension set should instead be created using /// arrow::engine::GetExtensionSetFromPlan static Result Make( - std::unordered_map uris, + std::unordered_map uris, std::unordered_map type_ids, std::unordered_map function_ids, const ExtensionIdRegistry* = default_extension_id_registry()); - const std::unordered_map& uris() const { return uris_; } + const std::unordered_map& uris() const { return uris_; } /// \brief Returns a data type given an anchor /// @@ -407,7 +407,7 @@ class ARROW_ENGINE_EXPORT ExtensionSet { std::unique_ptr plan_specific_ids_ = IdStorage::Make(); // Map from anchor values to URI values referenced by this extension set - std::unordered_map uris_; + std::unordered_map uris_; // Map from anchor values to type definitions, used during Substrait->Arrow // and populated from the Substrait extension set std::unordered_map types_; @@ -421,8 +421,8 @@ class ARROW_ENGINE_EXPORT ExtensionSet { // and built as the plan is created. std::unordered_map functions_map_; - Status CheckHasUri(util::string_view uri); - void AddUri(std::pair uri); + Status CheckHasUri(std::string_view uri); + void AddUri(std::pair uri); Status AddUri(Id id); }; diff --git a/cpp/src/arrow/engine/substrait/extension_types.cc b/cpp/src/arrow/engine/substrait/extension_types.cc index 2b7211766ee53..6a89e3cf98b14 100644 --- a/cpp/src/arrow/engine/substrait/extension_types.cc +++ b/cpp/src/arrow/engine/substrait/extension_types.cc @@ -17,9 +17,10 @@ #include "arrow/engine/substrait/extension_types.h" +#include + #include "arrow/engine/simple_extension_type_internal.h" #include "arrow/util/hashing.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -29,7 +30,7 @@ using internal::MakeProperties; namespace engine { namespace { -constexpr util::string_view kUuidExtensionName = "uuid"; +constexpr std::string_view kUuidExtensionName = "uuid"; struct UuidExtensionParams {}; std::shared_ptr UuidGetStorage(const UuidExtensionParams&) { return fixed_size_binary(16); @@ -40,7 +41,7 @@ using UuidType = SimpleExtensionType; -constexpr util::string_view kFixedCharExtensionName = "fixed_char"; +constexpr std::string_view kFixedCharExtensionName = "fixed_char"; struct FixedCharExtensionParams { int32_t length; }; @@ -55,7 +56,7 @@ using FixedCharType = decltype(kFixedCharExtensionParamsProperties), &kFixedCharExtensionParamsProperties, FixedCharGetStorage>; -constexpr util::string_view kVarCharExtensionName = "varchar"; +constexpr std::string_view kVarCharExtensionName = "varchar"; struct VarCharExtensionParams { int32_t length; }; @@ -70,7 +71,7 @@ using VarCharType = decltype(kVarCharExtensionParamsProperties), &kVarCharExtensionParamsProperties, VarCharGetStorage>; -constexpr util::string_view kIntervalYearExtensionName = "interval_year"; +constexpr std::string_view kIntervalYearExtensionName = "interval_year"; struct IntervalYearExtensionParams {}; std::shared_ptr IntervalYearGetStorage(const IntervalYearExtensionParams&) { return fixed_size_list(int32(), 2); @@ -82,7 +83,7 @@ using IntervalYearType = decltype(kIntervalYearExtensionParamsProperties), &kIntervalYearExtensionParamsProperties, IntervalYearGetStorage>; -constexpr util::string_view kIntervalDayExtensionName = "interval_day"; +constexpr std::string_view kIntervalDayExtensionName = "interval_day"; struct IntervalDayExtensionParams {}; std::shared_ptr IntervalDayGetStorage(const IntervalDayExtensionParams&) { return fixed_size_list(int32(), 2); diff --git a/cpp/src/arrow/engine/substrait/extension_types.h b/cpp/src/arrow/engine/substrait/extension_types.h index c623d081b188e..3b08084c753df 100644 --- a/cpp/src/arrow/engine/substrait/extension_types.h +++ b/cpp/src/arrow/engine/substrait/extension_types.h @@ -20,13 +20,10 @@ #pragma once #include -#include -#include "arrow/buffer.h" #include "arrow/compute/function.h" #include "arrow/engine/substrait/visibility.h" #include "arrow/type_fwd.h" -#include "arrow/util/string_view.h" namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index 1efd4e1a0a935..bd30f043a1b2c 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -40,7 +40,7 @@ using ::arrow::internal::make_unique; Status AddExtensionSetToPlan(const ExtensionSet& ext_set, substrait::Plan* plan) { plan->clear_extension_uris(); - std::unordered_map map; + std::unordered_map map; auto uris = plan->mutable_extension_uris(); uris->Reserve(static_cast(ext_set.uris().size())); @@ -49,7 +49,7 @@ Status AddExtensionSetToPlan(const ExtensionSet& ext_set, substrait::Plan* plan) if (uri.empty()) continue; auto ext_uri = internal::make_unique(); - ext_uri->set_uri(uri.to_string()); + ext_uri->set_uri(std::string(uri)); ext_uri->set_extension_uri_anchor(anchor); uris->AddAllocated(ext_uri.release()); @@ -70,7 +70,7 @@ Status AddExtensionSetToPlan(const ExtensionSet& ext_set, substrait::Plan* plan) auto type = internal::make_unique(); type->set_extension_uri_reference(map[type_record.id.uri]); type->set_type_anchor(anchor); - type->set_name(type_record.id.name.to_string()); + type->set_name(std::string(type_record.id.name)); ext_decl->set_allocated_extension_type(type.release()); extensions->AddAllocated(ext_decl.release()); } @@ -81,7 +81,7 @@ Status AddExtensionSetToPlan(const ExtensionSet& ext_set, substrait::Plan* plan) auto fn = internal::make_unique(); fn->set_extension_uri_reference(map[function_id.uri]); fn->set_function_anchor(anchor); - fn->set_name(function_id.name.to_string()); + fn->set_name(std::string(function_id.name)); auto ext_decl = internal::make_unique(); ext_decl->set_allocated_extension_function(fn.release()); @@ -96,7 +96,7 @@ Result GetExtensionSetFromPlan(const substrait::Plan& plan, if (registry == NULLPTR) { registry = default_extension_id_registry(); } - std::unordered_map uris; + std::unordered_map uris; uris.reserve(plan.extension_uris_size()); for (const auto& uri : plan.extension_uris()) { uris[uri.extension_uri_anchor()] = uri.uri(); @@ -114,14 +114,14 @@ Result GetExtensionSetFromPlan(const substrait::Plan& plan, case substrait::extensions::SimpleExtensionDeclaration::kExtensionType: { const auto& type = ext.extension_type(); - util::string_view uri = uris[type.extension_uri_reference()]; + std::string_view uri = uris[type.extension_uri_reference()]; type_ids[type.type_anchor()] = Id{uri, type.name()}; break; } case substrait::extensions::SimpleExtensionDeclaration::kExtensionFunction: { const auto& fn = ext.extension_function(); - util::string_view uri = uris[fn.extension_uri_reference()]; + std::string_view uri = uris[fn.extension_uri_reference()]; function_ids[fn.function_anchor()] = Id{uri, fn.name()}; break; } diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index ed07f75f2b98f..c920a1a46d0d0 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -31,13 +31,15 @@ #include "arrow/filesystem/util_internal.h" #include "arrow/util/checked_cast.h" #include "arrow/util/make_unique.h" +#include "arrow/util/string.h" #include "arrow/util/uri.h" namespace arrow { -using ::arrow::internal::UriFromAbsolutePath; using internal::checked_cast; using internal::make_unique; +using internal::StartsWith; +using internal::UriFromAbsolutePath; namespace engine { @@ -189,7 +191,7 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& "unknown substrait::ReadRel::LocalFiles::FileOrFiles::file_format"); } - if (!util::string_view{path}.starts_with("file:///")) { + if (!StartsWith(path, "file:///")) { return Status::NotImplemented("substrait::ReadRel::LocalFiles item (", path, ") with other than local filesystem " "(file:///)"); diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index 1e1c61fc3224b..c7792c7c76ef3 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -31,7 +31,6 @@ #include "arrow/engine/substrait/type_fwd.h" #include "arrow/engine/substrait/type_internal.h" #include "arrow/type.h" -#include "arrow/util/string_view.h" #include #include @@ -315,7 +314,7 @@ static Status CheckMessagesEquivalent(const Buffer& l_buf, const Buffer& r_buf) return Status::Invalid("Messages were not equivalent: ", out); } -Status CheckMessagesEquivalent(util::string_view message_name, const Buffer& l_buf, +Status CheckMessagesEquivalent(std::string_view message_name, const Buffer& l_buf, const Buffer& r_buf) { if (message_name == "Type") { return CheckMessagesEquivalent(l_buf, r_buf); @@ -357,9 +356,9 @@ inline google::protobuf::util::TypeResolver* GetGeneratedTypeResolver() { return type_resolver.get(); } -Result> SubstraitFromJSON(util::string_view type_name, - util::string_view json) { - std::string type_url = "/substrait." + type_name.to_string(); +Result> SubstraitFromJSON(std::string_view type_name, + std::string_view json) { + std::string type_url = "/substrait." + std::string(type_name); google::protobuf::io::ArrayInputStream json_stream{json.data(), static_cast(json.size())}; @@ -378,8 +377,8 @@ Result> SubstraitFromJSON(util::string_view type_name, return Buffer::FromString(std::move(out)); } -Result SubstraitToJSON(util::string_view type_name, const Buffer& buf) { - std::string type_url = "/substrait." + type_name.to_string(); +Result SubstraitToJSON(std::string_view type_name, const Buffer& buf) { + std::string type_url = "/substrait." + std::string(type_name); google::protobuf::io::ArrayInputStream buf_stream{buf.data(), static_cast(buf.size())}; diff --git a/cpp/src/arrow/engine/substrait/serde.h b/cpp/src/arrow/engine/substrait/serde.h index cc59adb0d2508..23683dba0c396 100644 --- a/cpp/src/arrow/engine/substrait/serde.h +++ b/cpp/src/arrow/engine/substrait/serde.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "arrow/compute/type_fwd.h" @@ -33,7 +34,6 @@ #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" namespace arrow { namespace engine { @@ -253,7 +253,7 @@ namespace internal { /// \param[in] r_buf buffer containing the second protobuf serialization to compare /// \return success if equivalent, failure if not ARROW_ENGINE_EXPORT -Status CheckMessagesEquivalent(util::string_view message_name, const Buffer& l_buf, +Status CheckMessagesEquivalent(std::string_view message_name, const Buffer& l_buf, const Buffer& r_buf); /// \brief Utility function to convert a JSON serialization of a Substrait message to @@ -263,8 +263,8 @@ Status CheckMessagesEquivalent(util::string_view message_name, const Buffer& l_b /// \param[in] json the JSON string to convert /// \return a buffer filled with the binary protobuf serialization of message ARROW_ENGINE_EXPORT -Result> SubstraitFromJSON(util::string_view type_name, - util::string_view json); +Result> SubstraitFromJSON(std::string_view type_name, + std::string_view json); /// \brief Utility function to convert a binary protobuf serialization of a Substrait /// message to JSON @@ -273,7 +273,7 @@ Result> SubstraitFromJSON(util::string_view type_name, /// \param[in] buf the buffer containing the binary protobuf serialization of the message /// \return a JSON string representing the message ARROW_ENGINE_EXPORT -Result SubstraitToJSON(util::string_view type_name, const Buffer& buf); +Result SubstraitToJSON(std::string_view type_name, const Buffer& buf); } // namespace internal } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 7601bcf437018..de40a53cbc1f3 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -198,7 +198,7 @@ void CheckRoundTripResult(const std::shared_ptr output_schema, } TEST(Substrait, SupportedTypes) { - auto ExpectEq = [](util::string_view json, std::shared_ptr expected_type) { + auto ExpectEq = [](std::string_view json, std::shared_ptr expected_type) { ARROW_SCOPED_TRACE(json); ExtensionSet empty; @@ -396,12 +396,12 @@ TEST(Substrait, NoEquivalentSubstraitType) { } TEST(Substrait, SupportedLiterals) { - auto ExpectEq = [](util::string_view json, Datum expected_value) { + auto ExpectEq = [](std::string_view json, Datum expected_value) { ARROW_SCOPED_TRACE(json); ASSERT_OK_AND_ASSIGN( auto buf, internal::SubstraitFromJSON("Expression", - "{\"literal\":" + json.to_string() + "}")); + "{\"literal\":" + std::string(json) + "}")); ExtensionSet ext_set; ASSERT_OK_AND_ASSIGN(auto expr, DeserializeExpression(*buf, ext_set)); diff --git a/cpp/src/arrow/engine/substrait/util.cc b/cpp/src/arrow/engine/substrait/util.cc index 0df3420c234fe..867e33a7cd0ba 100644 --- a/cpp/src/arrow/engine/substrait/util.cc +++ b/cpp/src/arrow/engine/substrait/util.cc @@ -143,7 +143,7 @@ std::shared_ptr MakeExtensionIdRegistry() { } const std::string& default_extension_types_uri() { - static std::string uri = engine::kArrowExtTypesUri.to_string(); + static std::string uri(engine::kArrowExtTypesUri); return uri; } diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 48b4646bea039..c8fa4d1c3776f 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -258,7 +258,7 @@ Result> FileSystem::OpenAppendStream( namespace { -Status ValidateSubPath(util::string_view s) { +Status ValidateSubPath(std::string_view s) { if (internal::IsLikelyUri(s)) { return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'"); } @@ -639,7 +639,7 @@ Status CopyFiles(const std::shared_ptr& source_fs, } auto destination_path = - internal::ConcatAbstractPath(destination_base_dir, relative->to_string()); + internal::ConcatAbstractPath(destination_base_dir, std::string(*relative)); if (source_info.IsDirectory()) { dirs.push_back(destination_path); diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index da7b856be4747..e7d1965d51060 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -81,7 +81,7 @@ struct GcsPath { return Status::Invalid("Path cannot start with a separator ('", s, "')"); } if (first_sep == std::string::npos) { - return GcsPath{s, internal::RemoveTrailingSlash(s).to_string(), ""}; + return GcsPath{s, std::string(internal::RemoveTrailingSlash(s)), ""}; } GcsPath path; path.full_path = s; @@ -412,7 +412,7 @@ class GcsFileSystem::Impl { // limitations) using marker objects. That and listing with prefixes creates the // illusion of folders. google::cloud::StatusOr CreateDirMarker(const std::string& bucket, - util::string_view name) { + std::string_view name) { // Make the name canonical. const auto canonical = internal::EnsureTrailingSlash(name); google::cloud::StatusOr object = client_.InsertObject( diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.cc b/cpp/src/arrow/filesystem/gcsfs_internal.cc index b8f0ab80b2103..c984fe12f0181 100644 --- a/cpp/src/arrow/filesystem/gcsfs_internal.cc +++ b/cpp/src/arrow/filesystem/gcsfs_internal.cc @@ -295,7 +295,7 @@ Result> FromObjectMetadata( return result; } -std::int64_t Depth(arrow::util::string_view path) { +std::int64_t Depth(std::string_view path) { // The last slash is not counted towards depth because it represents a // directory. bool has_trailing_slash = !path.empty() && path.back() == '/'; diff --git a/cpp/src/arrow/filesystem/gcsfs_internal.h b/cpp/src/arrow/filesystem/gcsfs_internal.h index 101f7f62df63e..c2a0e2921dc25 100644 --- a/cpp/src/arrow/filesystem/gcsfs_internal.h +++ b/cpp/src/arrow/filesystem/gcsfs_internal.h @@ -51,7 +51,7 @@ ARROW_EXPORT Result ToObjectMetadata ARROW_EXPORT Result> FromObjectMetadata( google::cloud::storage::ObjectMetadata const& m); -ARROW_EXPORT std::int64_t Depth(arrow::util::string_view path); +ARROW_EXPORT std::int64_t Depth(std::string_view path); } // namespace internal } // namespace fs diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc index 50f9a32fa1acd..48d56f7b7bb62 100644 --- a/cpp/src/arrow/filesystem/gcsfs_test.cc +++ b/cpp/src/arrow/filesystem/gcsfs_test.cc @@ -73,7 +73,6 @@ namespace gcs = google::cloud::storage; using ::testing::Eq; using ::testing::HasSubstr; -using ::testing::IsEmpty; using ::testing::Not; using ::testing::NotNull; using ::testing::Pair; @@ -171,7 +170,7 @@ class GcsIntegrationTest : public ::testing::Test { protected: void SetUp() override { ASSERT_THAT(Testbench(), NotNull()); - ASSERT_THAT(Testbench()->error(), IsEmpty()); + ASSERT_TRUE(Testbench()->error().empty()); ASSERT_TRUE(Testbench()->running()); // Initialize a PRNG with a small amount of entropy. @@ -280,7 +279,7 @@ class GcsIntegrationTest : public ::testing::Test { std::transform(expected.begin(), expected.end(), expected.begin(), [](FileInfo const& info) { if (!info.IsDirectory()) return info; - return Dir(internal::RemoveTrailingSlash(info.path()).to_string()); + return Dir(std::string(internal::RemoveTrailingSlash(info.path()))); }); return expected; } @@ -767,7 +766,7 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundTrue) { selector.allow_not_found = true; selector.recursive = true; ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector)); - EXPECT_THAT(results, IsEmpty()); + EXPECT_EQ(results.size(), 0); } TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundFalse) { diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index 585131ecc5eb7..03b4ad3bc7260 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -85,7 +85,7 @@ bool DetectAbsolutePath(const std::string& s) { namespace { -Status ValidatePath(util::string_view s) { +Status ValidatePath(std::string_view s) { if (internal::IsLikelyUri(s)) { return Status::Invalid("Expected a local filesystem path, got a URI: '", s, "'"); } diff --git a/cpp/src/arrow/filesystem/localfs_benchmark.cc b/cpp/src/arrow/filesystem/localfs_benchmark.cc index 1eb15ccfe23a3..3c4ded7e537da 100644 --- a/cpp/src/arrow/filesystem/localfs_benchmark.cc +++ b/cpp/src/arrow/filesystem/localfs_benchmark.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "benchmark/benchmark.h" @@ -29,7 +30,6 @@ #include "arrow/util/async_generator.h" #include "arrow/util/io_util.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" namespace arrow { diff --git a/cpp/src/arrow/filesystem/mockfs.cc b/cpp/src/arrow/filesystem/mockfs.cc index d8302bed47149..bb211e23df4e6 100644 --- a/cpp/src/arrow/filesystem/mockfs.cc +++ b/cpp/src/arrow/filesystem/mockfs.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,6 @@ #include "arrow/util/async_generator.h" #include "arrow/util/future.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/windows_fixup.h" namespace arrow { @@ -44,7 +44,7 @@ namespace internal { namespace { -Status ValidatePath(util::string_view s) { +Status ValidatePath(std::string_view s) { if (internal::IsLikelyUri(s)) { return Status::Invalid("Expected a filesystem path, got a URI: '", s, "'"); } @@ -66,9 +66,9 @@ struct File { int64_t size() const { return data ? data->size() : 0; } - explicit operator util::string_view() const { + explicit operator std::string_view() const { if (data) { - return util::string_view(*data); + return std::string_view(*data); } else { return ""; } @@ -372,7 +372,7 @@ class MockFileSystem::Impl { Entry* child = pair.second.get(); if (child->is_file()) { auto& file = child->as_file(); - out->push_back({path + file.name, file.mtime, util::string_view(file)}); + out->push_back({path + file.name, file.mtime, std::string_view(file)}); } else if (child->is_dir()) { DumpFiles(path, child->as_dir(), out); } @@ -752,7 +752,7 @@ std::vector MockFileSystem::AllFiles() { return result; } -Status MockFileSystem::CreateFile(const std::string& path, util::string_view contents, +Status MockFileSystem::CreateFile(const std::string& path, std::string_view contents, bool recursive) { RETURN_NOT_OK(ValidatePath(path)); auto parent = fs::internal::GetAbstractPathParent(path).first; diff --git a/cpp/src/arrow/filesystem/mockfs.h b/cpp/src/arrow/filesystem/mockfs.h index 2427d4a3bf705..fe86e19be4e7f 100644 --- a/cpp/src/arrow/filesystem/mockfs.h +++ b/cpp/src/arrow/filesystem/mockfs.h @@ -20,10 +20,10 @@ #include #include #include +#include #include #include "arrow/filesystem/filesystem.h" -#include "arrow/util/string_view.h" #include "arrow/util/windows_fixup.h" namespace arrow { @@ -44,7 +44,7 @@ struct MockDirInfo { struct MockFileInfo { std::string full_path; TimePoint mtime; - util::string_view data; + std::string_view data; bool operator==(const MockFileInfo& other) const { return mtime == other.mtime && full_path == other.full_path && data == other.data; @@ -102,7 +102,7 @@ class ARROW_EXPORT MockFileSystem : public FileSystem { std::vector AllFiles(); // Create a File with a content from a string. - Status CreateFile(const std::string& path, util::string_view content, + Status CreateFile(const std::string& path, std::string_view content, bool recursive = true); // Create a MockFileSystem out of (empty) FileInfo. The content of every diff --git a/cpp/src/arrow/filesystem/path_util.cc b/cpp/src/arrow/filesystem/path_util.cc index 2216a4bb258f8..53cd610377693 100644 --- a/cpp/src/arrow/filesystem/path_util.cc +++ b/cpp/src/arrow/filesystem/path_util.cc @@ -23,10 +23,13 @@ #include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" +#include "arrow/util/string.h" #include "arrow/util/uri.h" namespace arrow { + +using internal::StartsWith; + namespace fs { namespace internal { @@ -34,7 +37,7 @@ namespace internal { std::vector SplitAbstractPath(const std::string& path, char sep) { std::vector parts; - auto v = util::string_view(path); + auto v = std::string_view(path); // Strip trailing separator if (v.length() > 0 && v.back() == sep) { v = v.substr(0, v.length() - 1); @@ -75,13 +78,13 @@ std::pair GetAbstractPathParent(const std::string& s) } std::string GetAbstractPathExtension(const std::string& s) { - util::string_view basename(s); + std::string_view basename(s); auto offset = basename.find_last_of(kSep); if (offset != std::string::npos) { basename = basename.substr(offset); } auto dot = basename.find_last_of('.'); - if (dot == util::string_view::npos) { + if (dot == std::string_view::npos) { // Empty extension return ""; } @@ -108,7 +111,7 @@ std::string ConcatAbstractPath(const std::string& base, const std::string& stem) return EnsureTrailingSlash(base) + std::string(RemoveLeadingSlash(stem)); } -std::string EnsureTrailingSlash(util::string_view v) { +std::string EnsureTrailingSlash(std::string_view v) { if (v.length() > 0 && v.back() != kSep) { // XXX How about "C:" on Windows? We probably don't want to turn it into "C:/"... // Unless the local filesystem always uses absolute paths @@ -118,7 +121,7 @@ std::string EnsureTrailingSlash(util::string_view v) { } } -std::string EnsureLeadingSlash(util::string_view v) { +std::string EnsureLeadingSlash(std::string_view v) { if (v.length() == 0 || v.front() != kSep) { // XXX How about "C:" on Windows? We probably don't want to turn it into "/C:"... return kSep + std::string(v); @@ -126,21 +129,21 @@ std::string EnsureLeadingSlash(util::string_view v) { return std::string(v); } } -util::string_view RemoveTrailingSlash(util::string_view key) { +std::string_view RemoveTrailingSlash(std::string_view key) { while (!key.empty() && key.back() == kSep) { key.remove_suffix(1); } return key; } -util::string_view RemoveLeadingSlash(util::string_view key) { +std::string_view RemoveLeadingSlash(std::string_view key) { while (!key.empty() && key.front() == kSep) { key.remove_prefix(1); } return key; } -Status AssertNoTrailingSlash(util::string_view key) { +Status AssertNoTrailingSlash(std::string_view key) { if (key.back() == '/') { return NotAFile(key); } @@ -154,8 +157,8 @@ Result MakeAbstractPathRelative(const std::string& base, base, "'"); } auto b = EnsureLeadingSlash(RemoveTrailingSlash(base)); - auto p = util::string_view(path); - if (p.substr(0, b.size()) != util::string_view(b)) { + auto p = std::string_view(path); + if (p.substr(0, b.size()) != std::string_view(b)) { return Status::Invalid("Path '", path, "' is not relative to '", base, "'"); } p = p.substr(b.size()); @@ -165,7 +168,7 @@ Result MakeAbstractPathRelative(const std::string& base, return std::string(RemoveLeadingSlash(p)); } -bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) { +bool IsAncestorOf(std::string_view ancestor, std::string_view descendant) { ancestor = RemoveTrailingSlash(ancestor); if (ancestor == "") { // everything is a descendant of the root directory @@ -173,7 +176,7 @@ bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) { } descendant = RemoveTrailingSlash(descendant); - if (!descendant.starts_with(ancestor)) { + if (!StartsWith(descendant, ancestor)) { // an ancestor path is a prefix of descendant paths return false; } @@ -186,11 +189,11 @@ bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) { } // "/hello/w" is not an ancestor of "/hello/world" - return descendant.starts_with(std::string{kSep}); + return StartsWith(descendant, std::string{kSep}); } -std::optional RemoveAncestor(util::string_view ancestor, - util::string_view descendant) { +std::optional RemoveAncestor(std::string_view ancestor, + std::string_view descendant) { if (!IsAncestorOf(ancestor, descendant)) { return std::nullopt; } @@ -199,8 +202,8 @@ std::optional RemoveAncestor(util::string_view ancestor, return RemoveLeadingSlash(relative_to_ancestor); } -std::vector AncestorsFromBasePath(util::string_view base_path, - util::string_view descendant) { +std::vector AncestorsFromBasePath(std::string_view base_path, + std::string_view descendant) { std::vector ancestry; if (auto relative = RemoveAncestor(base_path, descendant)) { auto relative_segments = fs::internal::SplitAbstractPath(std::string(*relative)); @@ -245,7 +248,7 @@ std::vector MinimalCreateDirSet(std::vector dirs) { return dirs; } -std::string ToBackslashes(util::string_view v) { +std::string ToBackslashes(std::string_view v) { std::string s(v); for (auto& c : s) { if (c == '/') { @@ -255,7 +258,7 @@ std::string ToBackslashes(util::string_view v) { return s; } -std::string ToSlashes(util::string_view v) { +std::string ToSlashes(std::string_view v) { std::string s(v); #ifdef _WIN32 for (auto& c : s) { @@ -267,7 +270,7 @@ std::string ToSlashes(util::string_view v) { return s; } -bool IsEmptyPath(util::string_view v) { +bool IsEmptyPath(std::string_view v) { for (const auto c : v) { if (c != '/') { return false; @@ -276,7 +279,7 @@ bool IsEmptyPath(util::string_view v) { return true; } -bool IsLikelyUri(util::string_view v) { +bool IsLikelyUri(std::string_view v) { if (v.empty() || v[0] == '/') { return false; } diff --git a/cpp/src/arrow/filesystem/path_util.h b/cpp/src/arrow/filesystem/path_util.h index ea8e56df5d43b..fc1d2d82443f3 100644 --- a/cpp/src/arrow/filesystem/path_util.h +++ b/cpp/src/arrow/filesystem/path_util.h @@ -19,11 +19,11 @@ #include #include +#include #include #include #include "arrow/type_fwd.h" -#include "arrow/util/string_view.h" namespace arrow { namespace fs { @@ -61,34 +61,34 @@ Result MakeAbstractPathRelative(const std::string& base, const std::string& path); ARROW_EXPORT -std::string EnsureLeadingSlash(util::string_view s); +std::string EnsureLeadingSlash(std::string_view s); ARROW_EXPORT -util::string_view RemoveLeadingSlash(util::string_view s); +std::string_view RemoveLeadingSlash(std::string_view s); ARROW_EXPORT -std::string EnsureTrailingSlash(util::string_view s); +std::string EnsureTrailingSlash(std::string_view s); ARROW_EXPORT -util::string_view RemoveTrailingSlash(util::string_view s); +std::string_view RemoveTrailingSlash(std::string_view s); ARROW_EXPORT -Status AssertNoTrailingSlash(util::string_view s); +Status AssertNoTrailingSlash(std::string_view s); ARROW_EXPORT -bool IsAncestorOf(util::string_view ancestor, util::string_view descendant); +bool IsAncestorOf(std::string_view ancestor, std::string_view descendant); ARROW_EXPORT -std::optional RemoveAncestor(util::string_view ancestor, - util::string_view descendant); +std::optional RemoveAncestor(std::string_view ancestor, + std::string_view descendant); /// Return a vector of ancestors between a base path and a descendant. /// For example, /// /// AncestorsFromBasePath("a/b", "a/b/c/d/e") -> ["a/b/c", "a/b/c/d"] ARROW_EXPORT -std::vector AncestorsFromBasePath(util::string_view base_path, - util::string_view descendant); +std::vector AncestorsFromBasePath(std::string_view base_path, + std::string_view descendant); /// Given a vector of paths of directories which must be created, produce a the minimal /// subset for passing to CreateDir(recursive=true) by removing redundant parent @@ -118,18 +118,18 @@ std::string JoinAbstractPath(const StringRange& range, char sep = kSep) { /// Convert slashes to backslashes, on all platforms. Mostly useful for testing. ARROW_EXPORT -std::string ToBackslashes(util::string_view s); +std::string ToBackslashes(std::string_view s); /// Ensure a local path is abstract, by converting backslashes to regular slashes /// on Windows. Return the path unchanged on other systems. ARROW_EXPORT -std::string ToSlashes(util::string_view s); +std::string ToSlashes(std::string_view s); ARROW_EXPORT -bool IsEmptyPath(util::string_view s); +bool IsEmptyPath(std::string_view s); ARROW_EXPORT -bool IsLikelyUri(util::string_view s); +bool IsLikelyUri(std::string_view s); class ARROW_EXPORT Globber { public: diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index c6e6349ba2cf8..00efff166f2ab 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -33,7 +34,6 @@ #include "arrow/status.h" #include "arrow/util/logging.h" #include "arrow/util/print.h" -#include "arrow/util/string_view.h" namespace arrow { namespace fs { @@ -46,7 +46,7 @@ enum class S3Backend { Amazon, Minio, Other }; inline S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { const auto it = headers.find("server"); if (it != headers.end()) { - const auto& value = util::string_view(it->second); + const auto& value = std::string_view(it->second); if (value.find("AmazonS3") != std::string::npos) { return S3Backend::Amazon; } @@ -218,7 +218,7 @@ inline Aws::String ToAwsString(const std::string& s) { return Aws::String(s.begin(), s.end()); } -inline util::string_view FromAwsString(const Aws::String& s) { +inline std::string_view FromAwsString(const Aws::String& s) { return {s.data(), s.length()}; } diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index db79810f5d732..e75f277034af1 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -954,7 +954,7 @@ std::shared_ptr GetObjectMetadata(const ObjectResult& re auto push = [&](std::string k, const Aws::String& v) { if (!v.empty()) { - md->Append(std::move(k), FromAwsString(v).to_string()); + md->Append(std::move(k), std::string(FromAwsString(v))); } }; auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) { @@ -1948,7 +1948,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this& src, return Status::OK(); } -Status PathNotFound(util::string_view path) { +Status PathNotFound(std::string_view path) { return Status::IOError("Path does not exist '", path, "'") .WithDetail(StatusDetailFromErrno(ENOENT)); } -Status NotADir(util::string_view path) { +Status NotADir(std::string_view path) { return Status::IOError("Not a directory: '", path, "'") .WithDetail(StatusDetailFromErrno(ENOTDIR)); } -Status NotAFile(util::string_view path) { +Status NotAFile(std::string_view path) { return Status::IOError("Not a regular file: '", path, "'"); } -Status InvalidDeleteDirContents(util::string_view path) { +Status InvalidDeleteDirContents(std::string_view path) { return Status::Invalid( "DeleteDirContents called on invalid path '", path, "'. ", "If you wish to delete the root directory's contents, call DeleteRootDirContents."); diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h index 75a2d3a2ef587..cc16dbba10627 100644 --- a/cpp/src/arrow/filesystem/util_internal.h +++ b/cpp/src/arrow/filesystem/util_internal.h @@ -19,11 +19,11 @@ #include #include +#include #include "arrow/filesystem/filesystem.h" #include "arrow/io/interfaces.h" #include "arrow/status.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -39,16 +39,16 @@ Status CopyStream(const std::shared_ptr& src, const io::IOContext& io_context); ARROW_EXPORT -Status PathNotFound(util::string_view path); +Status PathNotFound(std::string_view path); ARROW_EXPORT -Status NotADir(util::string_view path); +Status NotADir(std::string_view path); ARROW_EXPORT -Status NotAFile(util::string_view path); +Status NotAFile(std::string_view path); ARROW_EXPORT -Status InvalidDeleteDirContents(util::string_view path); +Status InvalidDeleteDirContents(std::string_view path); /// \brief Return files matching the glob pattern on the filesystem /// diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc index 380ea56976d52..37672d2ecd860 100644 --- a/cpp/src/arrow/flight/cookie_internal.cc +++ b/cpp/src/arrow/flight/cookie_internal.cc @@ -63,7 +63,7 @@ size_t CaseInsensitiveHash::operator()(const std::string& key) const { return std::hash{}(upper_string); } -Cookie Cookie::Parse(const arrow::util::string_view& cookie_header_value) { +Cookie Cookie::Parse(const std::string_view& cookie_header_value) { // Parse the cookie string. If the cookie has an expiration, record it. // If the cookie has a max-age, calculate the current time + max_age and set that as // the expiration. @@ -252,7 +252,7 @@ void CookieCache::UpdateCachedCookies(const CallHeaders& incoming_headers) { const std::lock_guard guard(mutex_); for (auto it = header_values.first; it != header_values.second; ++it) { - const util::string_view& value = it->second; + const std::string_view& value = it->second; Cookie cookie = Cookie::Parse(value); // Cache cookies regardless of whether or not they are expired. The server may have diff --git a/cpp/src/arrow/flight/cookie_internal.h b/cpp/src/arrow/flight/cookie_internal.h index b87c8052266ad..f2f469b38248b 100644 --- a/cpp/src/arrow/flight/cookie_internal.h +++ b/cpp/src/arrow/flight/cookie_internal.h @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include "arrow/flight/client_middleware.h" #include "arrow/result.h" -#include "arrow/util/string_view.h" namespace arrow { namespace flight { @@ -54,7 +54,7 @@ class ARROW_FLIGHT_EXPORT Cookie { /// \brief Parse function to parse a cookie header value and return a Cookie object. /// /// \return Cookie object based on cookie header value. - static Cookie Parse(const arrow::util::string_view& cookie_header_value); + static Cookie Parse(const std::string_view& cookie_header_value); /// \brief Parse a cookie header string beginning at the given start_pos and identify /// the name and value of an attribute. diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc index f315e42a6a6a9..8a809decd70be 100644 --- a/cpp/src/arrow/flight/flight_internals_test.cc +++ b/cpp/src/arrow/flight/flight_internals_test.cc @@ -274,8 +274,8 @@ class TestCookieMiddleware : public ::testing::Test { void AddAndValidate(const std::string& incoming_cookie) { // Add cookie CallHeaders call_headers; - call_headers.insert(std::make_pair(arrow::util::string_view("set-cookie"), - arrow::util::string_view(incoming_cookie))); + call_headers.insert(std::make_pair(std::string_view("set-cookie"), + std::string_view(incoming_cookie))); middleware_->ReceivedHeaders(call_headers); expected_cookie_cache_.UpdateCachedCookies(call_headers); @@ -423,8 +423,8 @@ class TestCookieParsing : public ::testing::Test { for (auto& cookie : cookies) { // Add cookie CallHeaders call_headers; - call_headers.insert(std::make_pair(arrow::util::string_view("set-cookie"), - arrow::util::string_view(cookie))); + call_headers.insert( + std::make_pair(std::string_view("set-cookie"), std::string_view(cookie))); cookie_cache.UpdateCachedCookies(call_headers); } const std::string actual_cookies = cookie_cache.GetValidCookiesAsString(); diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index a7c79a6dc5b52..54597e5420368 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -463,7 +463,7 @@ class TracingServerMiddlewareFactory : public ServerMiddlewareFactory { const std::pair& iter_pair = incoming_headers.equal_range("x-tracing-span-id"); if (iter_pair.first != iter_pair.second) { - const util::string_view& value = (*iter_pair.first).second; + const std::string_view& value = (*iter_pair.first).second; *middleware = std::make_shared(std::string(value)); } return Status::OK(); @@ -484,7 +484,7 @@ std::string FindKeyValPrefixInCallHeaders(const CallHeaders& incoming_headers, if (iter == incoming_headers.end()) { return ""; } - const std::string val = iter->second.to_string(); + const std::string val(iter->second); if (val.size() > prefix.length()) { if (std::equal(val.begin(), val.begin() + prefix.length(), prefix.begin(), char_compare)) { @@ -773,8 +773,8 @@ class TestPropagatingMiddleware : public ::testing::Test { void CheckHeader(const std::string& header, const std::string& value, const CallHeaders::const_iterator& it) { // Construct a string_view before comparison to satisfy MSVC - util::string_view header_view(header.data(), header.length()); - util::string_view value_view(value.data(), value.length()); + std::string_view header_view(header.data(), header.length()); + std::string_view value_view(value.data(), value.length()); ASSERT_EQ(header_view, (*it).first); ASSERT_EQ(value_view, (*it).second); } diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc index 43c16e0b77a6d..0b7ddc56ecbda 100644 --- a/cpp/src/arrow/flight/integration_tests/test_integration.cc +++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc @@ -150,7 +150,7 @@ class TestServerMiddlewareFactory : public ServerMiddlewareFactory { incoming_headers.equal_range("x-middleware"); std::string received = ""; if (iter_pair.first != iter_pair.second) { - const util::string_view& value = (*iter_pair.first).second; + const std::string_view& value = (*iter_pair.first).second; received = std::string(value); } *middleware = std::make_shared(received); @@ -176,7 +176,7 @@ class TestClientMiddleware : public ClientMiddleware { const std::pair& iter_pair = incoming_headers.equal_range("x-middleware"); if (iter_pair.first != iter_pair.second) { - const util::string_view& value = (*iter_pair.first).second; + const std::string_view& value = (*iter_pair.first).second; *received_header_ = std::string(value); } } diff --git a/cpp/src/arrow/flight/middleware.h b/cpp/src/arrow/flight/middleware.h index d11ba11477c11..b050e9cc6ed92 100644 --- a/cpp/src/arrow/flight/middleware.h +++ b/cpp/src/arrow/flight/middleware.h @@ -23,11 +23,11 @@ #include #include #include +#include #include #include "arrow/flight/visibility.h" // IWYU pragma: keep #include "arrow/status.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -36,7 +36,7 @@ namespace flight { /// \brief Headers sent from the client or server. /// /// Header values are ordered. -using CallHeaders = std::multimap; +using CallHeaders = std::multimap; /// \brief A write-only wrapper around headers for an RPC call. class ARROW_FLIGHT_EXPORT AddCallHeaders { diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc index e9736b0615e44..66185cfeba5e5 100644 --- a/cpp/src/arrow/flight/server.cc +++ b/cpp/src/arrow/flight/server.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,6 @@ #include "arrow/status.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/uri.h" namespace arrow { diff --git a/cpp/src/arrow/flight/sql/example/sqlite_tables_schema_batch_reader.cc b/cpp/src/arrow/flight/sql/example/sqlite_tables_schema_batch_reader.cc index 68bde35c718b7..921dd13182e2d 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_tables_schema_batch_reader.cc +++ b/cpp/src/arrow/flight/sql/example/sqlite_tables_schema_batch_reader.cc @@ -92,8 +92,7 @@ Status SqliteTablesWithSchemaBatchReader::ReadNext(std::shared_ptr* ARROW_ASSIGN_OR_RAISE(schema_buffer, value); column_fields.clear(); - ARROW_RETURN_NOT_OK( - schema_builder.Append(::arrow::util::string_view(*schema_buffer))); + ARROW_RETURN_NOT_OK(schema_builder.Append(::std::string_view(*schema_buffer))); } std::shared_ptr schema_array; diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc index e6f5016960739..34c4ae91627cf 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc @@ -44,6 +44,7 @@ #include "arrow/util/base64.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" +#include "arrow/util/string.h" #include "arrow/util/uri.h" #include "arrow/flight/client.h" @@ -59,6 +60,8 @@ namespace arrow { +using internal::EndsWith; + namespace flight { namespace transport { namespace grpc { @@ -151,8 +154,8 @@ class GrpcClientInterceptorAdapter : public ::grpc::experimental::Interceptor { received_headers_ = true; CallHeaders headers; for (const auto& entry : metadata) { - headers.insert({util::string_view(entry.first.data(), entry.first.length()), - util::string_view(entry.second.data(), entry.second.length())}); + headers.insert({std::string_view(entry.first.data(), entry.first.length()), + std::string_view(entry.second.data(), entry.second.length())}); } for (const auto& middleware : middleware_) { middleware->ReceivedHeaders(headers); @@ -180,24 +183,24 @@ class GrpcClientInterceptorAdapterFactory std::vector> middleware; FlightMethod flight_method = FlightMethod::Invalid; - util::string_view method(info->method()); - if (method.ends_with("/Handshake")) { + std::string_view method(info->method()); + if (EndsWith(method, "/Handshake")) { flight_method = FlightMethod::Handshake; - } else if (method.ends_with("/ListFlights")) { + } else if (EndsWith(method, "/ListFlights")) { flight_method = FlightMethod::ListFlights; - } else if (method.ends_with("/GetFlightInfo")) { + } else if (EndsWith(method, "/GetFlightInfo")) { flight_method = FlightMethod::GetFlightInfo; - } else if (method.ends_with("/GetSchema")) { + } else if (EndsWith(method, "/GetSchema")) { flight_method = FlightMethod::GetSchema; - } else if (method.ends_with("/DoGet")) { + } else if (EndsWith(method, "/DoGet")) { flight_method = FlightMethod::DoGet; - } else if (method.ends_with("/DoPut")) { + } else if (EndsWith(method, "/DoPut")) { flight_method = FlightMethod::DoPut; - } else if (method.ends_with("/DoExchange")) { + } else if (EndsWith(method, "/DoExchange")) { flight_method = FlightMethod::DoExchange; - } else if (method.ends_with("/DoAction")) { + } else if (EndsWith(method, "/DoAction")) { flight_method = FlightMethod::DoAction; - } else if (method.ends_with("/ListActions")) { + } else if (EndsWith(method, "/ListActions")) { flight_method = FlightMethod::ListActions; } else { ARROW_LOG(WARNING) << "Unknown Flight method: " << info->method(); diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc index 14daaa587654a..a643111e3b2b0 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc @@ -313,8 +313,8 @@ class GrpcServiceHandler final : public FlightService::Service { CallHeaders incoming_headers; for (const auto& entry : context->client_metadata()) { incoming_headers.insert( - {util::string_view(entry.first.data(), entry.first.length()), - util::string_view(entry.second.data(), entry.second.length())}); + {std::string_view(entry.first.data(), entry.first.length()), + std::string_view(entry.second.data(), entry.second.length())}); } GrpcAddServerHeaders outgoing_headers(context); diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc index 14b5638adabf9..80124123d4ae5 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc @@ -581,7 +581,7 @@ class UcxClientImpl : public arrow::flight::internal::ClientTransport { ARROW_ASSIGN_OR_RAISE(auto incoming_message, driver->ReadNextFrame()); if (incoming_message->type == FrameType::kBuffer) { ARROW_ASSIGN_OR_RAISE( - *info, FlightInfo::Deserialize(util::string_view(*incoming_message->buffer))); + *info, FlightInfo::Deserialize(std::string_view(*incoming_message->buffer))); ARROW_ASSIGN_OR_RAISE(incoming_message, driver->ReadNextFrame()); } RETURN_NOT_OK(driver->ExpectFrameType(*incoming_message, FrameType::kHeaders)); diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc b/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc index 373333663f8f7..318f6204ac9ee 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc @@ -180,7 +180,7 @@ arrow::Result HeadersFrame::Parse(std::unique_ptr buffer) return Status::Invalid("Buffer underflow, expected key ", i + 1, " to have length ", key_length, ", but only ", (end - payload), " bytes remain"); } - const util::string_view key(reinterpret_cast(payload), key_length); + const std::string_view key(reinterpret_cast(payload), key_length); payload += key_length; if (ARROW_PREDICT_FALSE((end - payload) < value_length)) { @@ -188,7 +188,7 @@ arrow::Result HeadersFrame::Parse(std::unique_ptr buffer) " to have length ", value_length, ", but only ", (end - payload), " bytes remain"); } - const util::string_view value(reinterpret_cast(payload), value_length); + const std::string_view value(reinterpret_cast(payload), value_length); payload += value_length; result.headers_.emplace_back(key, value); } @@ -243,7 +243,7 @@ arrow::Result HeadersFrame::Make( return Make(all_headers); } -arrow::Result HeadersFrame::Get(const std::string& key) { +arrow::Result HeadersFrame::Get(const std::string& key) { for (const auto& pair : headers_) { if (pair.first == key) return pair.second; } @@ -252,7 +252,7 @@ arrow::Result HeadersFrame::Get(const std::string& key) { Status HeadersFrame::GetStatus(Status* out) { static const std::string kUnknownMessage = "Server did not send status message header"; - util::string_view code_str, message_str; + std::string_view code_str, message_str; auto status = Get(kHeaderStatus).Value(&code_str); if (!status.ok()) { return Status::KeyError("Server did not send status code header ", kHeaderStatusCode); @@ -273,7 +273,7 @@ Status HeadersFrame::GetStatus(Status* out) { } *out = transport_status.ToStatus(); - util::string_view detail_str, bin_str; + std::string_view detail_str, bin_str; std::optional message, detail_message, detail_bin; if (!Get(kHeaderStatusCode).Value(&code_str).ok()) { // No Arrow status sent, go with the transport status @@ -363,7 +363,7 @@ Status PayloadHeaderFrame::ToFlightData(internal::FlightData* data) { return Status::Invalid("Buffer is too small: expected ", offset + size, " bytes but have ", buffer->size()); } - util::string_view desc(reinterpret_cast(buffer->data() + offset), size); + std::string_view desc(reinterpret_cast(buffer->data() + offset), size); data->descriptor.reset(new FlightDescriptor()); ARROW_ASSIGN_OR_RAISE(*data->descriptor, FlightDescriptor::Deserialize(desc)); offset += size; diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_internal.h b/cpp/src/arrow/flight/transport/ucx/ucx_internal.h index f5b81ab414751..d14296db097b4 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_internal.h +++ b/cpp/src/arrow/flight/transport/ucx/ucx_internal.h @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -35,7 +36,6 @@ #include "arrow/util/future.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" namespace arrow { namespace flight { @@ -191,8 +191,8 @@ struct Frame { std::unique_ptr buffer_) : type(type_), size(size_), counter(counter_), buffer(std::move(buffer_)) {} - util::string_view view() const { - return util::string_view(reinterpret_cast(buffer->data()), size); + std::string_view view() const { + return std::string_view(reinterpret_cast(buffer->data()), size); } /// \brief Parse a UCX active message header. This will not @@ -222,7 +222,7 @@ static constexpr uint32_t kUcpAmHandlerId = 0x1024; class HeadersFrame { public: /// \brief Get a header value (or an error if it was not found) - arrow::Result Get(const std::string& key); + arrow::Result Get(const std::string& key); /// \brief Extract the server-sent status. Status GetStatus(Status* out); /// \brief Parse the headers from the buffer. @@ -240,7 +240,7 @@ class HeadersFrame { private: std::unique_ptr buffer_; - std::vector> headers_; + std::vector> headers_; }; /// \brief A representation of a kPayloadHeader frame (i.e. all of the diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc index d7ddbfab06ec5..398bc4381467c 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc +++ b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc @@ -362,7 +362,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport { SERVER_RETURN_NOT_OK(driver, driver->ExpectFrameType(*frame, FrameType::kBuffer)); FlightDescriptor descriptor; SERVER_RETURN_NOT_OK(driver, - FlightDescriptor::Deserialize(util::string_view(*frame->buffer)) + FlightDescriptor::Deserialize(std::string_view(*frame->buffer)) .Value(&descriptor)); std::unique_ptr info; diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index a505e6d6e1ecf..2122e57ccc141 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/buffer.h" @@ -29,7 +30,6 @@ #include "arrow/status.h" #include "arrow/table.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "arrow/util/uri.h" namespace arrow { @@ -177,8 +177,7 @@ arrow::Result SchemaResult::SerializeToString() const { return out; } -arrow::Result SchemaResult::Deserialize( - arrow::util::string_view serialized) { +arrow::Result SchemaResult::Deserialize(std::string_view serialized) { pb::SchemaResult pb_schema_result; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized SchemaResult size should not exceed 2 GiB"); @@ -207,7 +206,7 @@ Status FlightDescriptor::SerializeToString(std::string* out) const { } arrow::Result FlightDescriptor::Deserialize( - arrow::util::string_view serialized) { + std::string_view serialized) { pb::FlightDescriptor pb_descriptor; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized FlightDescriptor size should not exceed 2 GiB"); @@ -244,7 +243,7 @@ Status Ticket::SerializeToString(std::string* out) const { return SerializeToString().Value(out); } -arrow::Result Ticket::Deserialize(arrow::util::string_view serialized) { +arrow::Result Ticket::Deserialize(std::string_view serialized) { pb::Ticket pb_ticket; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized Ticket size should not exceed 2 GiB"); @@ -308,7 +307,7 @@ Status FlightInfo::SerializeToString(std::string* out) const { } arrow::Result> FlightInfo::Deserialize( - arrow::util::string_view serialized) { + std::string_view serialized) { pb::FlightInfo pb_info; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized FlightInfo size should not exceed 2 GiB"); @@ -410,8 +409,7 @@ arrow::Result FlightEndpoint::SerializeToString() const { return out; } -arrow::Result FlightEndpoint::Deserialize( - arrow::util::string_view serialized) { +arrow::Result FlightEndpoint::Deserialize(std::string_view serialized) { pb::FlightEndpoint pb_flight_endpoint; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized FlightEndpoint size should not exceed 2 GiB"); @@ -441,7 +439,7 @@ arrow::Result ActionType::SerializeToString() const { return out; } -arrow::Result ActionType::Deserialize(arrow::util::string_view serialized) { +arrow::Result ActionType::Deserialize(std::string_view serialized) { pb::ActionType pb_action_type; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized ActionType size should not exceed 2 GiB"); @@ -471,7 +469,7 @@ arrow::Result Criteria::SerializeToString() const { return out; } -arrow::Result Criteria::Deserialize(arrow::util::string_view serialized) { +arrow::Result Criteria::Deserialize(std::string_view serialized) { pb::Criteria pb_criteria; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized Criteria size should not exceed 2 GiB"); @@ -502,7 +500,7 @@ arrow::Result Action::SerializeToString() const { return out; } -arrow::Result Action::Deserialize(arrow::util::string_view serialized) { +arrow::Result Action::Deserialize(std::string_view serialized) { pb::Action pb_action; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized Action size should not exceed 2 GiB"); @@ -532,7 +530,7 @@ arrow::Result Result::SerializeToString() const { return out; } -arrow::Result Result::Deserialize(arrow::util::string_view serialized) { +arrow::Result Result::Deserialize(std::string_view serialized) { pb::Result pb_result; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized Result size should not exceed 2 GiB"); @@ -645,7 +643,7 @@ bool BasicAuth::Equals(const BasicAuth& other) const { return (username == other.username) && (password == other.password); } -arrow::Result BasicAuth::Deserialize(arrow::util::string_view serialized) { +arrow::Result BasicAuth::Deserialize(std::string_view serialized) { pb::BasicAuth pb_result; if (serialized.size() > static_cast(std::numeric_limits::max())) { return Status::Invalid("Serialized BasicAuth size should not exceed 2 GiB"); diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index ae9867e44a1f5..6957c5992a328 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include "arrow/ipc/options.h" #include "arrow/ipc/writer.h" #include "arrow/result.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -153,7 +153,7 @@ struct ARROW_FLIGHT_EXPORT ActionType { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); }; /// \brief Opaque selection criteria for ListFlights RPC @@ -174,7 +174,7 @@ struct ARROW_FLIGHT_EXPORT Criteria { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); }; /// \brief An action to perform with the DoAction RPC @@ -198,7 +198,7 @@ struct ARROW_FLIGHT_EXPORT Action { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); }; /// \brief Opaque result returned after executing an action @@ -218,7 +218,7 @@ struct ARROW_FLIGHT_EXPORT Result { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); }; /// \brief message for simple auth @@ -236,7 +236,7 @@ struct ARROW_FLIGHT_EXPORT BasicAuth { } /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); /// \brief Serialize this message to its wire-format representation. arrow::Result SerializeToString() const; @@ -284,7 +284,7 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor { /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); ARROW_DEPRECATED("Deprecated in 8.0.0. Use Result-returning overload instead.") static Status Deserialize(const std::string& serialized, FlightDescriptor* out); @@ -334,7 +334,7 @@ struct ARROW_FLIGHT_EXPORT Ticket { /// /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); ARROW_DEPRECATED("Deprecated in 8.0.0. Use Result-returning overload instead.") static Status Deserialize(const std::string& serialized, Ticket* out); @@ -442,7 +442,7 @@ struct ARROW_FLIGHT_EXPORT FlightEndpoint { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); }; /// \brief Staging data structure for messages about to be put on the wire @@ -492,7 +492,7 @@ struct ARROW_FLIGHT_EXPORT SchemaResult { arrow::Result SerializeToString() const; /// \brief Deserialize this message from its wire-format representation. - static arrow::Result Deserialize(arrow::util::string_view serialized); + static arrow::Result Deserialize(std::string_view serialized); private: std::string raw_schema_; @@ -562,7 +562,7 @@ class ARROW_FLIGHT_EXPORT FlightInfo { /// Useful when interoperating with non-Flight systems (e.g. REST /// services) that may want to return Flight types. static arrow::Result> Deserialize( - arrow::util::string_view serialized); + std::string_view serialized); ARROW_DEPRECATED("Deprecated in 8.0.0. Use Result-returning overload instead.") static Status Deserialize(const std::string& serialized, diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc index ccfe9a360ab98..e0e37c580269f 100644 --- a/cpp/src/arrow/io/buffered.cc +++ b/cpp/src/arrow/io/buffered.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "arrow/buffer.h" @@ -28,7 +29,6 @@ #include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" namespace arrow { namespace io { @@ -292,7 +292,7 @@ class BufferedInputStream::Impl : public BufferedBase { return ResizeBuffer(new_buffer_size); } - Result Peek(int64_t nbytes) { + Result Peek(int64_t nbytes) { if (raw_read_bound_ >= 0) { // Do not try to peek more than the total remaining number of bytes. nbytes = std::min(nbytes, bytes_buffered_ + (raw_read_bound_ - raw_read_total_)); @@ -324,8 +324,8 @@ class BufferedInputStream::Impl : public BufferedBase { nbytes = bytes_buffered_; } DCHECK(nbytes <= bytes_buffered_); // Enough bytes available - return util::string_view(reinterpret_cast(buffer_data_ + buffer_pos_), - static_cast(nbytes)); + return std::string_view(reinterpret_cast(buffer_data_ + buffer_pos_), + static_cast(nbytes)); } int64_t bytes_buffered() const { return bytes_buffered_; } @@ -458,7 +458,7 @@ std::shared_ptr BufferedInputStream::raw() const { return impl_->ra Result BufferedInputStream::DoTell() const { return impl_->Tell(); } -Result BufferedInputStream::DoPeek(int64_t nbytes) { +Result BufferedInputStream::DoPeek(int64_t nbytes) { return impl_->Peek(nbytes); } diff --git a/cpp/src/arrow/io/buffered.h b/cpp/src/arrow/io/buffered.h index 8116613fa4ee6..01c0a016daba0 100644 --- a/cpp/src/arrow/io/buffered.h +++ b/cpp/src/arrow/io/buffered.h @@ -21,10 +21,10 @@ #include #include +#include #include "arrow/io/concurrency.h" #include "arrow/io/interfaces.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -157,7 +157,7 @@ class ARROW_EXPORT BufferedInputStream /// \brief Return a zero-copy string view referencing buffered data, /// but do not advance the position of the stream. Buffers data and /// expands the buffer size if necessary - Result DoPeek(int64_t nbytes) override; + Result DoPeek(int64_t nbytes) override; class ARROW_NO_EXPORT Impl; std::unique_ptr impl_; diff --git a/cpp/src/arrow/io/buffered_test.cc b/cpp/src/arrow/io/buffered_test.cc index f6f6d61f84981..520eaaa93567a 100644 --- a/cpp/src/arrow/io/buffered_test.cc +++ b/cpp/src/arrow/io/buffered_test.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -44,7 +45,6 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/io_util.h" -#include "arrow/util/string_view.h" namespace arrow { namespace io { @@ -503,7 +503,7 @@ class TestBufferedInputStreamBound : public ::testing::Test { TEST_F(TestBufferedInputStreamBound, Basics) { std::shared_ptr buffer; - util::string_view view; + std::string_view view; // source is at offset 10 ASSERT_OK_AND_ASSIGN(view, stream_->Peek(10)); @@ -559,7 +559,7 @@ TEST_F(TestBufferedInputStreamBound, Basics) { TEST_F(TestBufferedInputStreamBound, LargeFirstPeek) { // Test a first peek larger than chunk size std::shared_ptr buffer; - util::string_view view; + std::string_view view; int64_t n = 70; ASSERT_GT(n, chunk_size_); @@ -592,7 +592,7 @@ TEST_F(TestBufferedInputStreamBound, LargeFirstPeek) { TEST_F(TestBufferedInputStreamBound, UnboundedPeek) { CreateExample(/*bounded=*/false); - util::string_view view; + std::string_view view; ASSERT_OK_AND_ASSIGN(view, stream_->Peek(10)); ASSERT_EQ(10, view.size()); ASSERT_EQ(50, stream_->bytes_buffered()); diff --git a/cpp/src/arrow/io/concurrency.h b/cpp/src/arrow/io/concurrency.h index b41ad2c135070..43ceb8debcecb 100644 --- a/cpp/src/arrow/io/concurrency.h +++ b/cpp/src/arrow/io/concurrency.h @@ -116,7 +116,7 @@ class ARROW_EXPORT InputStreamConcurrencyWrapper : public InputStream { return derived()->DoRead(nbytes); } - Result Peek(int64_t nbytes) final { + Result Peek(int64_t nbytes) final { auto guard = lock_.exclusive_guard(); return derived()->DoPeek(nbytes); } @@ -132,7 +132,7 @@ class ARROW_EXPORT InputStreamConcurrencyWrapper : public InputStream { And optionally: Status DoAbort() override; - Result DoPeek(int64_t nbytes) override; + Result DoPeek(int64_t nbytes) override; These methods should be protected in the derived class and InputStreamConcurrencyWrapper declared as a friend with @@ -145,7 +145,7 @@ class ARROW_EXPORT InputStreamConcurrencyWrapper : public InputStream { // have derived classes itself. virtual Status DoAbort() { return derived()->DoClose(); } - virtual Result DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) { + virtual Result DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) { return Status::NotImplemented("Peek not implemented"); } @@ -186,7 +186,7 @@ class ARROW_EXPORT RandomAccessFileConcurrencyWrapper : public RandomAccessFile return derived()->DoRead(nbytes); } - Result Peek(int64_t nbytes) final { + Result Peek(int64_t nbytes) final { auto guard = lock_.exclusive_guard(); return derived()->DoPeek(nbytes); } @@ -232,7 +232,7 @@ class ARROW_EXPORT RandomAccessFileConcurrencyWrapper : public RandomAccessFile And optionally: Status DoAbort() override; - Result DoPeek(int64_t nbytes) override; + Result DoPeek(int64_t nbytes) override; These methods should be protected in the derived class and RandomAccessFileConcurrencyWrapper declared as a friend with @@ -245,7 +245,7 @@ class ARROW_EXPORT RandomAccessFileConcurrencyWrapper : public RandomAccessFile // have derived classes itself. virtual Status DoAbort() { return derived()->DoClose(); } - virtual Result DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) { + virtual Result DoPeek(int64_t ARROW_ARG_UNUSED(nbytes)) { return Status::NotImplemented("Peek not implemented"); } diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 8165c9c0b49c4..b5c8797b0b07f 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -67,7 +67,7 @@ class FileTestFixture : public ::testing::Test { EnsureFileDeleted(); } - std::string TempFile(arrow::util::string_view path) { + std::string TempFile(std::string_view path) { return temp_dir_->path().Join(std::string(path)).ValueOrDie().ToString(); } @@ -563,7 +563,7 @@ class TestMemoryMappedFile : public ::testing::Test, public MemoryMapFixture { void TearDown() override { MemoryMapFixture::TearDown(); } - std::string TempFile(arrow::util::string_view path) { + std::string TempFile(std::string_view path) { return temp_dir_->path().Join(std::string(path)).ValueOrDie().ToString(); } diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 1dfb0bdf8ad16..238e297a7f4c1 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,6 @@ #include "arrow/util/io_util.h" #include "arrow/util/iterator.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/thread_pool.h" namespace arrow { @@ -107,7 +107,7 @@ const IOContext& Readable::io_context() const { return g_default_io_context; } Status InputStream::Advance(int64_t nbytes) { return Read(nbytes).status(); } -Result InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) { +Result InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) { return Status::NotImplemented("Peek not implemented"); } @@ -178,7 +178,7 @@ Status RandomAccessFile::WillNeed(const std::vector& ranges) { return Status::OK(); } -Status Writable::Write(util::string_view data) { +Status Writable::Write(std::string_view data) { return Write(data.data(), static_cast(data.size())); } diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index 70c0dd8520fb6..86e9ad2d5248a 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -20,13 +20,13 @@ #include #include #include +#include #include #include "arrow/io/type_fwd.h" #include "arrow/type_fwd.h" #include "arrow/util/cancel.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/type_fwd.h" #include "arrow/util/visibility.h" @@ -175,7 +175,7 @@ class ARROW_EXPORT Writable { /// \brief Flush buffered bytes, if any virtual Status Flush(); - Status Write(util::string_view data); + Status Write(std::string_view data); }; class ARROW_EXPORT Readable { @@ -227,7 +227,7 @@ class ARROW_EXPORT InputStream : virtual public FileInterface, /// May return NotImplemented on streams that don't support it. /// /// \param[in] nbytes the maximum number of bytes to see - virtual Result Peek(int64_t nbytes); + virtual Result Peek(int64_t nbytes); /// \brief Return true if InputStream is capable of zero copy Buffer reads /// diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc index 6495242e63bed..9b2b03133238e 100644 --- a/cpp/src/arrow/io/memory.cc +++ b/cpp/src/arrow/io/memory.cc @@ -274,7 +274,7 @@ BufferReader::BufferReader(const uint8_t* data, int64_t size) BufferReader::BufferReader(const Buffer& buffer) : BufferReader(buffer.data(), buffer.size()) {} -BufferReader::BufferReader(const util::string_view& data) +BufferReader::BufferReader(const std::string_view& data) : BufferReader(reinterpret_cast(data.data()), static_cast(data.size())) {} @@ -290,12 +290,12 @@ Result BufferReader::DoTell() const { return position_; } -Result BufferReader::DoPeek(int64_t nbytes) { +Result BufferReader::DoPeek(int64_t nbytes) { RETURN_NOT_OK(CheckClosed()); const int64_t bytes_available = std::min(nbytes, size_ - position_); - return util::string_view(reinterpret_cast(data_) + position_, - static_cast(bytes_available)); + return std::string_view(reinterpret_cast(data_) + position_, + static_cast(bytes_available)); } bool BufferReader::supports_zero_copy() const { return true; } diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h index 8213439ef7493..5c35a6015befa 100644 --- a/cpp/src/arrow/io/memory.h +++ b/cpp/src/arrow/io/memory.h @@ -21,12 +21,12 @@ #include #include +#include #include #include "arrow/io/concurrency.h" #include "arrow/io/interfaces.h" #include "arrow/type_fwd.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -149,9 +149,9 @@ class ARROW_EXPORT BufferReader explicit BufferReader(const Buffer& buffer); BufferReader(const uint8_t* data, int64_t size); - /// \brief Instantiate from std::string or arrow::util::string_view. Does not + /// \brief Instantiate from std::string or std::string_view. Does not /// own data - explicit BufferReader(const util::string_view& data); + explicit BufferReader(const std::string_view& data); bool closed() const override; @@ -173,7 +173,7 @@ class ARROW_EXPORT BufferReader Result> DoRead(int64_t nbytes); Result DoReadAt(int64_t position, int64_t nbytes, void* out); Result> DoReadAt(int64_t position, int64_t nbytes); - Result DoPeek(int64_t nbytes) override; + Result DoPeek(int64_t nbytes) override; Result DoTell() const; Status DoSeek(int64_t position); diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc index d361243ad6f53..cdcbe240f858a 100644 --- a/cpp/src/arrow/io/memory_test.cc +++ b/cpp/src/arrow/io/memory_test.cc @@ -162,10 +162,10 @@ TEST(TestFixedSizeBufferWriter, InvalidWrites) { TEST(TestBufferReader, FromStrings) { // ARROW-3291: construct BufferReader from std::string or - // arrow::util::string_view + // std::string_view std::string data = "data123456"; - auto view = util::string_view(data); + auto view = std::string_view(data); BufferReader reader1(data); BufferReader reader2(view); @@ -208,7 +208,7 @@ TEST(TestBufferReader, Peek) { BufferReader reader(std::make_shared(data)); - util::string_view view; + std::string_view view; ASSERT_OK_AND_ASSIGN(view, reader.Peek(4)); @@ -378,7 +378,7 @@ template void TestSlowInputStream() { using clock = std::chrono::high_resolution_clock; - auto stream = std::make_shared(util::string_view("abcdefghijkl")); + auto stream = std::make_shared(std::string_view("abcdefghijkl")); const double latency = 0.6; auto slow = std::make_shared(stream, latency); @@ -395,8 +395,8 @@ void TestSlowInputStream() { ARROW_UNUSED(dt); #endif - ASSERT_OK_AND_ASSIGN(util::string_view view, slow->Peek(4)); - ASSERT_EQ(view, util::string_view("ghij")); + ASSERT_OK_AND_ASSIGN(std::string_view view, slow->Peek(4)); + ASSERT_EQ(view, std::string_view("ghij")); ASSERT_OK(slow->Close()); ASSERT_TRUE(slow->closed()); @@ -493,7 +493,7 @@ class TestTransformInputStream : public ::testing::Test { TransformInputStream::TransformFunc transform() const { return T(); } void TestEmptyStream() { - auto wrapped = std::make_shared(util::string_view()); + auto wrapped = std::make_shared(std::string_view()); auto stream = std::make_shared(wrapped, transform()); ASSERT_OK_AND_EQ(0, stream->Tell()); diff --git a/cpp/src/arrow/io/slow.cc b/cpp/src/arrow/io/slow.cc index 1042691fa59c7..7c11a484fc1e9 100644 --- a/cpp/src/arrow/io/slow.cc +++ b/cpp/src/arrow/io/slow.cc @@ -97,7 +97,7 @@ Result> SlowInputStream::Read(int64_t nbytes) { return stream_->Read(nbytes); } -Result SlowInputStream::Peek(int64_t nbytes) { +Result SlowInputStream::Peek(int64_t nbytes) { return stream_->Peek(nbytes); } @@ -140,7 +140,7 @@ Result> SlowRandomAccessFile::ReadAt(int64_t position, return stream_->ReadAt(position, nbytes); } -Result SlowRandomAccessFile::Peek(int64_t nbytes) { +Result SlowRandomAccessFile::Peek(int64_t nbytes) { return stream_->Peek(nbytes); } diff --git a/cpp/src/arrow/io/slow.h b/cpp/src/arrow/io/slow.h index 1ed90f0c2e920..fdcc56dfa6af6 100644 --- a/cpp/src/arrow/io/slow.h +++ b/cpp/src/arrow/io/slow.h @@ -85,7 +85,7 @@ class ARROW_EXPORT SlowInputStream : public SlowInputStreamBase { Result Read(int64_t nbytes, void* out) override; Result> Read(int64_t nbytes) override; - Result Peek(int64_t nbytes) override; + Result Peek(int64_t nbytes) override; Result Tell() const override; }; @@ -107,7 +107,7 @@ class ARROW_EXPORT SlowRandomAccessFile : public SlowInputStreamBase> Read(int64_t nbytes) override; Result ReadAt(int64_t position, int64_t nbytes, void* out) override; Result> ReadAt(int64_t position, int64_t nbytes) override; - Result Peek(int64_t nbytes) override; + Result Peek(int64_t nbytes) override; Result GetSize() override; Status Seek(int64_t position) override; diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index 667fd00ae2142..1b93aeb2f2854 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" #include "arrow/json/rapidjson_defs.h" @@ -317,7 +317,7 @@ class DecimalConverter final if (json_obj.IsString()) { int32_t precision, scale; DecimalValue d; - auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); + auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); RETURN_NOT_OK(DecimalValue::FromString(view, &d, &precision, &scale)); if (scale != decimal_type_->scale()) { return Status::Invalid("Invalid scale for decimal: expected ", @@ -359,7 +359,7 @@ class TimestampConverter final : public ConcreteConverter { if (json_obj.IsNumber()) { RETURN_NOT_OK(ConvertNumber(json_obj, *this->type_, &value)); } else if (json_obj.IsString()) { - util::string_view view(json_obj.GetString(), json_obj.GetStringLength()); + std::string_view view(json_obj.GetString(), json_obj.GetStringLength()); if (!ParseValue(*timestamp_type_, view.data(), view.size(), &value)) { return Status::Invalid("couldn't parse timestamp from ", view); } @@ -461,7 +461,7 @@ class StringConverter final return this->AppendNull(); } if (json_obj.IsString()) { - auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); + auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); return builder_->Append(view); } else { return JSONTypeError("string", json_obj.GetType()); @@ -492,7 +492,7 @@ class FixedSizeBinaryConverter final return this->AppendNull(); } if (json_obj.IsString()) { - auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength()); + auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); if (view.length() != static_cast(builder_->byte_width())) { std::stringstream ss; ss << "Invalid string length " << view.length() << " in JSON input for " @@ -906,7 +906,7 @@ Status GetConverter(const std::shared_ptr& type, } // namespace Result> ArrayFromJSON(const std::shared_ptr& type, - util::string_view json_string) { + std::string_view json_string) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); @@ -926,12 +926,12 @@ Result> ArrayFromJSON(const std::shared_ptr& ty Result> ArrayFromJSON(const std::shared_ptr& type, const std::string& json_string) { - return ArrayFromJSON(type, util::string_view(json_string)); + return ArrayFromJSON(type, std::string_view(json_string)); } Result> ArrayFromJSON(const std::shared_ptr& type, const char* json_string) { - return ArrayFromJSON(type, util::string_view(json_string)); + return ArrayFromJSON(type, std::string_view(json_string)); } Status ChunkedArrayFromJSON(const std::shared_ptr& type, @@ -948,8 +948,8 @@ Status ChunkedArrayFromJSON(const std::shared_ptr& type, } Status DictArrayFromJSON(const std::shared_ptr& type, - util::string_view indices_json, - util::string_view dictionary_json, std::shared_ptr* out) { + std::string_view indices_json, std::string_view dictionary_json, + std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type); } @@ -965,8 +965,8 @@ Status DictArrayFromJSON(const std::shared_ptr& type, .Value(out); } -Status ScalarFromJSON(const std::shared_ptr& type, - util::string_view json_string, std::shared_ptr* out) { +Status ScalarFromJSON(const std::shared_ptr& type, std::string_view json_string, + std::shared_ptr* out) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); @@ -985,7 +985,7 @@ Status ScalarFromJSON(const std::shared_ptr& type, } Status DictScalarFromJSON(const std::shared_ptr& type, - util::string_view index_json, util::string_view dictionary_json, + std::string_view index_json, std::string_view dictionary_json, std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { return Status::TypeError("DictScalarFromJSON requires dictionary type, got ", *type); diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h index 2fb2e83837563..3a730ee6a3f19 100644 --- a/cpp/src/arrow/ipc/json_simple.h +++ b/cpp/src/arrow/ipc/json_simple.h @@ -21,10 +21,10 @@ #include #include +#include #include "arrow/status.h" #include "arrow/type_fwd.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -42,7 +42,7 @@ Result> ArrayFromJSON(const std::shared_ptr&, ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, - util::string_view json); + std::string_view json); ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, @@ -54,17 +54,16 @@ Status ChunkedArrayFromJSON(const std::shared_ptr& type, std::shared_ptr* out); ARROW_EXPORT -Status DictArrayFromJSON(const std::shared_ptr&, util::string_view indices_json, - util::string_view dictionary_json, std::shared_ptr* out); +Status DictArrayFromJSON(const std::shared_ptr&, std::string_view indices_json, + std::string_view dictionary_json, std::shared_ptr* out); ARROW_EXPORT -Status ScalarFromJSON(const std::shared_ptr&, util::string_view json, +Status ScalarFromJSON(const std::shared_ptr&, std::string_view json, std::shared_ptr* out); ARROW_EXPORT -Status DictScalarFromJSON(const std::shared_ptr&, util::string_view index_json, - util::string_view dictionary_json, - std::shared_ptr* out); +Status DictScalarFromJSON(const std::shared_ptr&, std::string_view index_json, + std::string_view dictionary_json, std::shared_ptr* out); } // namespace json } // namespace internal diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index be6fd513e5eb6..b556c8ed34b0f 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -381,7 +381,7 @@ class IpcTestFixture : public io::MemoryMapFixture, public ExtensionTypesMixin { ASSERT_OK_AND_ASSIGN(temp_dir_, TemporaryDir::Make("ipc-test-")); } - std::string TempFile(util::string_view file) { + std::string TempFile(std::string_view file) { return temp_dir_->path().Join(std::string(file)).ValueOrDie().ToString(); } @@ -891,7 +891,7 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { ASSERT_OK_AND_ASSIGN(temp_dir_, TemporaryDir::Make("ipc-recursion-limits-test-")); } - std::string TempFile(util::string_view file) { + std::string TempFile(std::string_view file) { return temp_dir_->path().Join(std::string(file)).ValueOrDie().ToString(); } diff --git a/cpp/src/arrow/json/chunked_builder_test.cc b/cpp/src/arrow/json/chunked_builder_test.cc index 2d89ab9b026a6..d1d6e5e5fc3a7 100644 --- a/cpp/src/arrow/json/chunked_builder_test.cc +++ b/cpp/src/arrow/json/chunked_builder_test.cc @@ -35,7 +35,7 @@ namespace arrow { namespace json { -using util::string_view; +using std::string_view; using internal::checked_cast; using internal::GetCpuThreadPool; diff --git a/cpp/src/arrow/json/chunker.cc b/cpp/src/arrow/json/chunker.cc index b4b4d31eb9442..362d8e13f5fec 100644 --- a/cpp/src/arrow/json/chunker.cc +++ b/cpp/src/arrow/json/chunker.cc @@ -18,6 +18,7 @@ #include "arrow/json/chunker.h" #include +#include #include #include @@ -28,12 +29,11 @@ #include "arrow/json/options.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" namespace arrow { using internal::make_unique; -using util::string_view; +using std::string_view; namespace json { @@ -140,7 +140,7 @@ class ParsingBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindLast(util::string_view block, int64_t* out_pos) override { + Status FindLast(std::string_view block, int64_t* out_pos) override { const size_t block_length = block.size(); size_t consumed_length = 0; while (consumed_length < block_length) { @@ -164,7 +164,7 @@ class ParsingBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindNth(util::string_view partial, util::string_view block, int64_t count, + Status FindNth(std::string_view partial, std::string_view block, int64_t count, int64_t* out_pos, int64_t* num_found) override { return Status::NotImplemented("ParsingBoundaryFinder::FindNth"); } diff --git a/cpp/src/arrow/json/chunker_test.cc b/cpp/src/arrow/json/chunker_test.cc index 1b4ea4d0824f9..ed1328fa60171 100644 --- a/cpp/src/arrow/json/chunker_test.cc +++ b/cpp/src/arrow/json/chunker_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -28,16 +29,19 @@ #include "arrow/json/test_common.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" +#include "arrow/util/string.h" namespace arrow { + +using internal::StartsWith; + namespace json { // Use no nested objects and no string literals containing braces in this test. // This way the positions of '{' and '}' can be used as simple proxies // for object begin/end. -using util::string_view; +using std::string_view; template static std::shared_ptr join(Lines&& lines, std::string delimiter, @@ -154,10 +158,10 @@ void AssertStraddledChunking(Chunker& chunker, const std::shared_ptr& bu AssertChunking(chunker, first_half, 1); std::shared_ptr first_whole, partial; ASSERT_OK(chunker.Process(first_half, &first_whole, &partial)); - ASSERT_TRUE(string_view(*first_half).starts_with(string_view(*first_whole))); + ASSERT_TRUE(StartsWith(std::string_view(*first_half), std::string_view(*first_whole))); std::shared_ptr completion, rest; ASSERT_OK(chunker.ProcessWithPartial(partial, second_half, &completion, &rest)); - ASSERT_TRUE(string_view(*second_half).starts_with(string_view(*completion))); + ASSERT_TRUE(StartsWith(std::string_view(*second_half), std::string_view(*completion))); std::shared_ptr straddling; ASSERT_OK_AND_ASSIGN(straddling, ConcatenateBuffers({partial, completion})); auto length = ConsumeWholeObject(&straddling); diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc index a2f584c0b7ff2..d677be25ae492 100644 --- a/cpp/src/arrow/json/converter.cc +++ b/cpp/src/arrow/json/converter.cc @@ -18,6 +18,7 @@ #include "arrow/json/converter.h" #include +#include #include #include "arrow/array.h" @@ -30,13 +31,12 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" namespace arrow { using internal::checked_cast; -using util::string_view; +using std::string_view; namespace json { diff --git a/cpp/src/arrow/json/object_parser.cc b/cpp/src/arrow/json/object_parser.cc index c857cd537e7a5..ba4a42aec4c6e 100644 --- a/cpp/src/arrow/json/object_parser.cc +++ b/cpp/src/arrow/json/object_parser.cc @@ -28,7 +28,7 @@ namespace rj = arrow::rapidjson; class ObjectParser::Impl { public: - Status Parse(arrow::util::string_view json) { + Status Parse(std::string_view json) { document_.Parse(reinterpret_cast(json.data()), static_cast(json.size())); @@ -70,7 +70,7 @@ ObjectParser::ObjectParser() : impl_(new ObjectParser::Impl()) {} ObjectParser::~ObjectParser() = default; -Status ObjectParser::Parse(arrow::util::string_view json) { return impl_->Parse(json); } +Status ObjectParser::Parse(std::string_view json) { return impl_->Parse(json); } Result ObjectParser::GetString(const char* key) const { return impl_->GetString(key); diff --git a/cpp/src/arrow/json/object_parser.h b/cpp/src/arrow/json/object_parser.h index ef93201651ab1..8f23923d1cecc 100644 --- a/cpp/src/arrow/json/object_parser.h +++ b/cpp/src/arrow/json/object_parser.h @@ -18,9 +18,9 @@ #pragma once #include +#include #include "arrow/result.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -34,7 +34,7 @@ class ARROW_EXPORT ObjectParser { ObjectParser(); ~ObjectParser(); - Status Parse(arrow::util::string_view json); + Status Parse(std::string_view json); Result GetString(const char* key) const; Result GetBool(const char* key) const; diff --git a/cpp/src/arrow/json/object_writer.cc b/cpp/src/arrow/json/object_writer.cc index 06d09f81e9440..3277807880ce5 100644 --- a/cpp/src/arrow/json/object_writer.cc +++ b/cpp/src/arrow/json/object_writer.cc @@ -32,7 +32,7 @@ class ObjectWriter::Impl { public: Impl() : root_(rj::kObjectType) {} - void SetString(arrow::util::string_view key, arrow::util::string_view value) { + void SetString(std::string_view key, std::string_view value) { rj::Document::AllocatorType& allocator = document_.GetAllocator(); rj::Value str_key(key.data(), allocator); @@ -41,7 +41,7 @@ class ObjectWriter::Impl { root_.AddMember(str_key, str_value, allocator); } - void SetBool(arrow::util::string_view key, bool value) { + void SetBool(std::string_view key, bool value) { rj::Document::AllocatorType& allocator = document_.GetAllocator(); rj::Value str_key(key.data(), allocator); @@ -66,12 +66,11 @@ ObjectWriter::ObjectWriter() : impl_(new ObjectWriter::Impl()) {} ObjectWriter::~ObjectWriter() = default; -void ObjectWriter::SetString(arrow::util::string_view key, - arrow::util::string_view value) { +void ObjectWriter::SetString(std::string_view key, std::string_view value) { impl_->SetString(key, value); } -void ObjectWriter::SetBool(arrow::util::string_view key, bool value) { +void ObjectWriter::SetBool(std::string_view key, bool value) { impl_->SetBool(key, value); } diff --git a/cpp/src/arrow/json/object_writer.h b/cpp/src/arrow/json/object_writer.h index 55ff0ce52bc02..b15b09dbdacfc 100644 --- a/cpp/src/arrow/json/object_writer.h +++ b/cpp/src/arrow/json/object_writer.h @@ -18,8 +18,8 @@ #pragma once #include +#include -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -33,8 +33,8 @@ class ARROW_EXPORT ObjectWriter { ObjectWriter(); ~ObjectWriter(); - void SetString(arrow::util::string_view key, arrow::util::string_view value); - void SetBool(arrow::util::string_view key, bool value); + void SetString(std::string_view key, std::string_view value); + void SetBool(std::string_view key, bool value); std::string Serialize(); diff --git a/cpp/src/arrow/json/parser.cc b/cpp/src/arrow/json/parser.cc index 815fa7dc7b7fd..3774b578a83dc 100644 --- a/cpp/src/arrow/json/parser.cc +++ b/cpp/src/arrow/json/parser.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" #include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "arrow/util/trie.h" #include "arrow/visit_type_inline.h" @@ -45,7 +45,7 @@ namespace arrow { using internal::BitsetStack; using internal::checked_cast; using internal::make_unique; -using util::string_view; +using std::string_view; namespace json { @@ -89,7 +89,7 @@ static arrow::internal::Trie MakeFromTagTrie() { Kind::type Kind::FromTag(const std::shared_ptr& tag) { static arrow::internal::Trie name_to_kind = MakeFromTagTrie(); DCHECK_NE(tag->FindKey("json_kind"), -1); - util::string_view name = tag->value(tag->FindKey("json_kind")); + std::string_view name = tag->value(tag->FindKey("json_kind")); DCHECK_NE(name_to_kind.Find(name), -1); return static_cast(name_to_kind.Find(name)); } diff --git a/cpp/src/arrow/json/parser_test.cc b/cpp/src/arrow/json/parser_test.cc index 2a44ed8375ece..e1f346bda3b49 100644 --- a/cpp/src/arrow/json/parser_test.cc +++ b/cpp/src/arrow/json/parser_test.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -29,7 +30,6 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -37,7 +37,7 @@ using internal::checked_cast; namespace json { -using util::string_view; +using std::string_view; void AssertUnconvertedStructArraysEqual(const StructArray& expected, const StructArray& actual); diff --git a/cpp/src/arrow/json/reader.cc b/cpp/src/arrow/json/reader.cc index 18aed0235ff90..85e527c8bda70 100644 --- a/cpp/src/arrow/json/reader.cc +++ b/cpp/src/arrow/json/reader.cc @@ -17,6 +17,7 @@ #include "arrow/json/reader.h" +#include #include #include @@ -33,13 +34,12 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/iterator.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/task_group.h" #include "arrow/util/thread_pool.h" namespace arrow { -using util::string_view; +using std::string_view; using internal::checked_cast; using internal::GetCpuThreadPool; diff --git a/cpp/src/arrow/json/reader_test.cc b/cpp/src/arrow/json/reader_test.cc index 976343b52118d..4037bf0be66d1 100644 --- a/cpp/src/arrow/json/reader_test.cc +++ b/cpp/src/arrow/json/reader_test.cc @@ -31,7 +31,7 @@ namespace arrow { namespace json { -using util::string_view; +using std::string_view; using internal::checked_cast; @@ -43,7 +43,7 @@ class ReaderTest : public ::testing::TestWithParam { read_options_, parse_options_)); } - void SetUpReader(util::string_view input) { + void SetUpReader(std::string_view input) { ASSERT_OK(MakeStream(input, &input_)); SetUpReader(); } diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h index 508be0c9102bb..18007a4963845 100644 --- a/cpp/src/arrow/json/test_common.h +++ b/cpp/src/arrow/json/test_common.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,6 @@ #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/string_view.h" #include "arrow/visit_type_inline.h" #include "rapidjson/document.h" @@ -51,7 +51,7 @@ namespace json { namespace rj = arrow::rapidjson; using rj::StringBuffer; -using util::string_view; +using std::string_view; using Writer = rj::Writer; inline static Status OK(bool ok) { return ok ? Status::OK() : Status::Invalid(""); } @@ -216,7 +216,7 @@ static inline std::string PrettyPrint(string_view one_line) { } template -std::string RowsOfOneColumn(util::string_view name, std::initializer_list values, +std::string RowsOfOneColumn(std::string_view name, std::initializer_list values, decltype(std::to_string(*values.begin()))* = nullptr) { std::stringstream ss; for (auto value : values) { @@ -225,7 +225,7 @@ std::string RowsOfOneColumn(util::string_view name, std::initializer_list val return ss.str(); } -inline std::string RowsOfOneColumn(util::string_view name, +inline std::string RowsOfOneColumn(std::string_view name, std::initializer_list values) { std::stringstream ss; for (auto value : values) { diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index ac92287f1bcc4..61d308a145bc3 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -26,6 +26,7 @@ #include #include // IWYU pragma: keep #include +#include #include #include @@ -41,7 +42,6 @@ #include "arrow/util/int_util_overflow.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/string.h" -#include "arrow/util/string_view.h" #include "arrow/vendored/datetime.h" #include "arrow/visit_array_inline.h" @@ -57,8 +57,8 @@ class PrettyPrinter { PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink) : options_(options), indent_(options.indent), sink_(sink) {} - inline void Write(util::string_view data); - inline void WriteIndented(util::string_view data); + inline void Write(std::string_view data); + inline void WriteIndented(std::string_view data); inline void Newline(); inline void Indent(); inline void IndentAfterNewline(); @@ -103,9 +103,9 @@ void PrettyPrinter::CloseArray(const Array& array) { (*sink_) << "]"; } -void PrettyPrinter::Write(util::string_view data) { (*sink_) << data; } +void PrettyPrinter::Write(std::string_view data) { (*sink_) << data; } -void PrettyPrinter::WriteIndented(util::string_view data) { +void PrettyPrinter::WriteIndented(std::string_view data) { Indent(); Write(data); } @@ -173,7 +173,7 @@ class ArrayPrinter : public PrettyPrinter { template Status WritePrimitiveValues(const ArrayType& array, Formatter* formatter) { - auto appender = [&](util::string_view v) { (*sink_) << v; }; + auto appender = [&](std::string_view v) { (*sink_) << v; }; auto format_func = [&](int64_t i) { (*formatter)(array.GetView(i), appender); return Status::OK(); diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 5ed92f0947649..fcf44fe82f419 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -839,16 +839,16 @@ struct ScalarParseImpl { return std::move(out_); } - ScalarParseImpl(std::shared_ptr type, util::string_view s) + ScalarParseImpl(std::shared_ptr type, std::string_view s) : type_(std::move(type)), s_(s) {} std::shared_ptr type_; - util::string_view s_; + std::string_view s_; std::shared_ptr out_; }; Result> Scalar::Parse(const std::shared_ptr& type, - util::string_view s) { + std::string_view s) { return ScalarParseImpl{type, s}.Finish(); } @@ -871,9 +871,8 @@ std::shared_ptr FormatToBuffer(Formatter&& formatter, const ScalarType& if (!from.is_valid) { return Buffer::FromString("null"); } - return formatter(from.value, [&](util::string_view v) { - return Buffer::FromString(std::string(v)); - }); + return formatter( + from.value, [&](std::string_view v) { return Buffer::FromString(std::string(v)); }); } // error fallback @@ -993,8 +992,7 @@ Status CastImpl(const DateScalar& from, TimestampScalar* to) { // string to any template Status CastImpl(const StringScalar& from, ScalarType* to) { - ARROW_ASSIGN_OR_RAISE(auto out, - Scalar::Parse(to->type, util::string_view(*from.value))); + ARROW_ASSIGN_OR_RAISE(auto out, Scalar::Parse(to->type, std::string_view(*from.value))); to->value = std::move(checked_cast(*out).value); return Status::OK(); } diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index 22532041eca01..66e1863133467 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,6 @@ #include "arrow/type_traits.h" #include "arrow/util/compare.h" #include "arrow/util/decimal.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" #include "arrow/visit_type_inline.h" @@ -95,7 +95,7 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this, Status ValidateFull() const; static Result> Parse(const std::shared_ptr& type, - util::string_view repr); + std::string_view repr); // TODO(bkietz) add compute::CastOptions Result> CastTo(std::shared_ptr to) const; @@ -140,7 +140,7 @@ struct ARROW_EXPORT PrimitiveScalarBase : public Scalar { /// \brief Get a mutable pointer to the value of this scalar. May be null. virtual void* mutable_data() = 0; /// \brief Get an immutable view of the value of this scalar as bytes. - virtual util::string_view view() const = 0; + virtual std::string_view view() const = 0; }; template @@ -159,8 +159,8 @@ struct ARROW_EXPORT PrimitiveScalar : public PrimitiveScalarBase { ValueType value{}; void* mutable_data() override { return &value; } - util::string_view view() const override { - return util::string_view(reinterpret_cast(&value), sizeof(ValueType)); + std::string_view view() const override { + return std::string_view(reinterpret_cast(&value), sizeof(ValueType)); }; }; @@ -245,8 +245,8 @@ struct ARROW_EXPORT BaseBinaryScalar : public internal::PrimitiveScalarBase { void* mutable_data() override { return value ? reinterpret_cast(value->mutable_data()) : NULLPTR; } - util::string_view view() const override { - return value ? util::string_view(*value) : util::string_view(); + std::string_view view() const override { + return value ? std::string_view(*value) : std::string_view(); } protected: @@ -415,9 +415,9 @@ struct ARROW_EXPORT DecimalScalar : public internal::PrimitiveScalarBase { return reinterpret_cast(value.mutable_native_endian_bytes()); } - util::string_view view() const override { - return util::string_view(reinterpret_cast(value.native_endian_bytes()), - ValueType::kByteWidth); + std::string_view view() const override { + return std::string_view(reinterpret_cast(value.native_endian_bytes()), + ValueType::kByteWidth); } ValueType value; @@ -561,7 +561,7 @@ struct ARROW_EXPORT DictionaryScalar : public internal::PrimitiveScalarBase { return internal::checked_cast(*value.index) .mutable_data(); } - util::string_view view() const override { + std::string_view view() const override { return internal::checked_cast(*value.index) .view(); } diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index bf001fc6fd90e..42315ca1b62be 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -59,7 +59,7 @@ void AssertMakeScalar(const Scalar& expected, MakeScalarArgs&&... args) { AssertScalarsEqual(expected, *scalar, /*verbose=*/true); } -void AssertParseScalar(const std::shared_ptr& type, const util::string_view& s, +void AssertParseScalar(const std::shared_ptr& type, const std::string_view& s, const Scalar& expected) { ASSERT_OK_AND_ASSIGN(auto scalar, Scalar::Parse(type, s)); ASSERT_OK(scalar->Validate()); @@ -643,11 +643,11 @@ TEST(TestFixedSizeBinaryScalar, MakeScalar) { AssertMakeScalar(FixedSizeBinaryScalar(buf, type), type, buf); - AssertParseScalar(type, util::string_view(data), FixedSizeBinaryScalar(buf, type)); + AssertParseScalar(type, std::string_view(data), FixedSizeBinaryScalar(buf, type)); // Wrong length ASSERT_RAISES(Invalid, MakeScalar(type, Buffer::FromString(data.substr(3))).status()); - ASSERT_RAISES(Invalid, Scalar::Parse(type, util::string_view(data).substr(3)).status()); + ASSERT_RAISES(Invalid, Scalar::Parse(type, std::string_view(data).substr(3)).status()); } TEST(TestFixedSizeBinaryScalar, ValidateErrors) { @@ -831,7 +831,7 @@ TEST(TestTimestampScalars, MakeScalar) { auto type3 = timestamp(TimeUnit::MICRO); auto type4 = timestamp(TimeUnit::NANO); - util::string_view epoch_plus_1s = "1970-01-01 00:00:01"; + std::string_view epoch_plus_1s = "1970-01-01 00:00:01"; AssertMakeScalar(TimestampScalar(1, type1), type1, int64_t(1)); AssertParseScalar(type1, epoch_plus_1s, TimestampScalar(1000, type1)); @@ -992,7 +992,7 @@ TEST(TestDayTimeIntervalScalars, Basics) { TYPED_TEST(TestNumericScalar, Cast) { auto type = TypeTraits::type_singleton(); - for (util::string_view repr : {"0", "1", "3"}) { + for (std::string_view repr : {"0", "1", "3"}) { std::shared_ptr scalar; ASSERT_OK_AND_ASSIGN(scalar, Scalar::Parse(type, repr)); @@ -1015,7 +1015,7 @@ TYPED_TEST(TestNumericScalar, Cast) { if (is_integer_type::value) { ASSERT_OK_AND_ASSIGN(auto cast_to_string, scalar->CastTo(utf8())); ASSERT_EQ( - util::string_view(*checked_cast(*cast_to_string).value), + std::string_view(*checked_cast(*cast_to_string).value), repr); } } @@ -1609,7 +1609,7 @@ class TestExtensionScalar : public ::testing::Test { } protected: - ExtensionScalar MakeUuidScalar(util::string_view value) { + ExtensionScalar MakeUuidScalar(std::string_view value) { return ExtensionScalar(std::make_shared( std::make_shared(value), storage_type_), type_); @@ -1618,10 +1618,9 @@ class TestExtensionScalar : public ::testing::Test { std::shared_ptr type_, storage_type_; const UuidType* uuid_type_{nullptr}; - const util::string_view uuid_string1_{UUID_STRING1}; - const util::string_view uuid_string2_{UUID_STRING2}; - const util::string_view uuid_json_{"[\"" UUID_STRING1 "\", \"" UUID_STRING2 - "\", null]"}; + const std::string_view uuid_string1_{UUID_STRING1}; + const std::string_view uuid_string2_{UUID_STRING2}; + const std::string_view uuid_json_{"[\"" UUID_STRING1 "\", \"" UUID_STRING2 "\", null]"}; }; #undef UUID_STRING1 diff --git a/cpp/src/arrow/stl_iterator_test.cc b/cpp/src/arrow/stl_iterator_test.cc index 652a66cb516f1..3fe57ebc0d468 100644 --- a/cpp/src/arrow/stl_iterator_test.cc +++ b/cpp/src/arrow/stl_iterator_test.cc @@ -128,11 +128,11 @@ TEST(ArrayIterator, RangeFor) { TEST(ArrayIterator, String) { auto array = checked_pointer_cast( ArrayFromJSON(utf8(), R"(["foo", "bar", null, "quux"])")); - std::vector> values; + std::vector> values; for (const auto v : *array) { values.push_back(v); } - std::vector> expected{"foo", "bar", {}, "quux"}; + std::vector> expected{"foo", "bar", {}, "quux"}; ASSERT_EQ(values, expected); } @@ -150,11 +150,11 @@ TEST(ArrayIterator, Boolean) { TEST(ArrayIterator, FixedSizeBinary) { auto array = checked_pointer_cast( ArrayFromJSON(fixed_size_binary(3), R"(["foo", "bar", null, "quu"])")); - std::vector> values; + std::vector> values; for (const auto v : *array) { values.push_back(v); } - std::vector> expected{"foo", "bar", {}, "quu"}; + std::vector> expected{"foo", "bar", {}, "quu"}; ASSERT_EQ(values, expected); } diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 2ba944e41f1c2..84879321ff168 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -410,14 +410,14 @@ void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool ve } std::shared_ptr ArrayFromJSON(const std::shared_ptr& type, - util::string_view json) { + std::string_view json) { EXPECT_OK_AND_ASSIGN(auto out, ipc::internal::json::ArrayFromJSON(type, json)); return out; } std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, - util::string_view indices_json, - util::string_view dictionary_json) { + std::string_view indices_json, + std::string_view dictionary_json) { std::shared_ptr out; ABORT_NOT_OK( ipc::internal::json::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); @@ -432,7 +432,7 @@ std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& schema, - util::string_view json) { + std::string_view json) { // Parse as a StructArray auto struct_type = struct_(schema->fields()); std::shared_ptr struct_array = ArrayFromJSON(struct_type, json); @@ -442,15 +442,15 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& } std::shared_ptr ScalarFromJSON(const std::shared_ptr& type, - util::string_view json) { + std::string_view json) { std::shared_ptr out; ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out)); return out; } std::shared_ptr DictScalarFromJSON(const std::shared_ptr& type, - util::string_view index_json, - util::string_view dictionary_json) { + std::string_view index_json, + std::string_view dictionary_json) { std::shared_ptr out; ABORT_NOT_OK( ipc::internal::json::DictScalarFromJSON(type, index_json, dictionary_json, &out)); diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index b6bfcb8e2d38c..e21a2888e8582 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,6 @@ #include "arrow/type_traits.h" #include "arrow/util/macros.h" #include "arrow/util/string_builder.h" -#include "arrow/util/string_view.h" #include "arrow/util/type_fwd.h" // NOTE: failing must be inline in the macros below, to get correct file / line number @@ -316,16 +316,16 @@ ARROW_TESTING_EXPORT void TestInitialized(const Array& array); ARROW_TESTING_EXPORT std::shared_ptr ArrayFromJSON(const std::shared_ptr&, - util::string_view json); + std::string_view json); ARROW_TESTING_EXPORT std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, - util::string_view indices_json, - util::string_view dictionary_json); + std::string_view indices_json, + std::string_view dictionary_json); ARROW_TESTING_EXPORT std::shared_ptr RecordBatchFromJSON(const std::shared_ptr&, - util::string_view); + std::string_view); ARROW_TESTING_EXPORT std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr&, @@ -333,12 +333,12 @@ std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr ScalarFromJSON(const std::shared_ptr&, - util::string_view json); + std::string_view json); ARROW_TESTING_EXPORT std::shared_ptr DictScalarFromJSON(const std::shared_ptr&, - util::string_view index_json, - util::string_view dictionary_json); + std::string_view index_json, + std::string_view dictionary_json); ARROW_TESTING_EXPORT std::shared_ptr
TableFromJSON(const std::shared_ptr&, @@ -530,15 +530,3 @@ class ARROW_TESTING_EXPORT GatingTask { }; } // namespace arrow - -namespace nonstd { -namespace sv_lite { - -// Without this hint, GTest will print string_views as a container of char -template > -void PrintTo(const basic_string_view& view, std::ostream* os) { - *os << view; -} - -} // namespace sv_lite -} // namespace nonstd diff --git a/cpp/src/arrow/testing/json_internal.cc b/cpp/src/arrow/testing/json_internal.cc index c88e95df016e3..c1d45aa2e0880 100644 --- a/cpp/src/arrow/testing/json_internal.cc +++ b/cpp/src/arrow/testing/json_internal.cc @@ -472,7 +472,7 @@ class ArrayWriter { return Status::OK(); } - void WriteRawNumber(util::string_view v) { + void WriteRawNumber(std::string_view v) { // Avoid RawNumber() as it misleadingly adds quotes // (see https://github.com/Tencent/rapidjson/pull/1155) writer_->RawValue(v.data(), v.size(), rj::kNumberType); @@ -503,7 +503,7 @@ class ArrayWriter { static const std::string null_string = "0"; for (int64_t i = 0; i < arr.length(); ++i) { if (arr.IsValid(i)) { - fmt(arr.Value(i), [&](util::string_view repr) { + fmt(arr.Value(i), [&](std::string_view repr) { writer_->String(repr.data(), static_cast(repr.size())); }); } else { @@ -630,7 +630,7 @@ class ArrayWriter { // Represent 64-bit integers as strings, as JSON numbers cannot represent // them exactly. ::arrow::internal::StringFormatter::ArrowType> formatter; - auto append = [this](util::string_view v) { + auto append = [this](std::string_view v) { writer_->String(v.data(), static_cast(v.size())); return Status::OK(); }; diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h index 4d5bb6957572a..fa2222ee1ab11 100644 --- a/cpp/src/arrow/testing/matchers.h +++ b/cpp/src/arrow/testing/matchers.h @@ -412,7 +412,7 @@ DataEqMatcher DataEq(Data&& dat) { /// Constructs an array with ArrayFromJSON against which arguments are matched inline DataEqMatcher DataEqArray(const std::shared_ptr& type, - util::string_view json) { + std::string_view json) { return DataEq(ArrayFromJSON(type, json)); } @@ -446,7 +446,7 @@ DataEqMatcher DataEqArray(T type, const std::vector>& v /// Constructs a scalar with ScalarFromJSON against which arguments are matched inline DataEqMatcher DataEqScalar(const std::shared_ptr& type, - util::string_view json) { + std::string_view json) { return DataEq(ScalarFromJSON(type, json)); } diff --git a/cpp/src/arrow/testing/random_test.cc b/cpp/src/arrow/testing/random_test.cc index 588c4f226873b..30988ac0d3c35 100644 --- a/cpp/src/arrow/testing/random_test.cc +++ b/cpp/src/arrow/testing/random_test.cc @@ -360,7 +360,7 @@ TEST(TypeSpecificTests, RepeatedStrings) { AssertTypeEqual(field->type(), base_array->type()); auto array = internal::checked_pointer_cast(base_array); ASSERT_OK(array->ValidateFull()); - util::string_view singular_value = array->GetView(0); + std::string_view singular_value = array->GetView(0); for (auto slot : *array) { if (!slot.has_value()) continue; ASSERT_EQ(slot, singular_value); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index c91fa234e0af8..25e323731964a 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -1152,35 +1152,35 @@ Result FieldRef::FromDotPath(const std::string& dot_path_arg) { std::vector children; - util::string_view dot_path = dot_path_arg; + std::string_view dot_path = dot_path_arg; auto parse_name = [&] { std::string name; for (;;) { auto segment_end = dot_path.find_first_of("\\[."); - if (segment_end == util::string_view::npos) { + if (segment_end == std::string_view::npos) { // dot_path doesn't contain any other special characters; consume all - name.append(dot_path.begin(), dot_path.end()); + name.append(dot_path.data(), dot_path.length()); dot_path = ""; break; } if (dot_path[segment_end] != '\\') { // segment_end points to a subscript for a new FieldRef - name.append(dot_path.begin(), segment_end); + name.append(dot_path.data(), segment_end); dot_path = dot_path.substr(segment_end); break; } if (dot_path.size() == segment_end + 1) { // dot_path ends with backslash; consume it all - name.append(dot_path.begin(), dot_path.end()); + name.append(dot_path.data(), dot_path.length()); dot_path = ""; break; } // append all characters before backslash, then the character which follows it - name.append(dot_path.begin(), segment_end); + name.append(dot_path.data(), segment_end); name.push_back(dot_path[segment_end + 1]); dot_path = dot_path.substr(segment_end + 2); } @@ -1198,7 +1198,7 @@ Result FieldRef::FromDotPath(const std::string& dot_path_arg) { } case '[': { auto subscript_end = dot_path.find_first_not_of("0123456789"); - if (subscript_end == util::string_view::npos || dot_path[subscript_end] != ']') { + if (subscript_end == std::string_view::npos || dot_path[subscript_end] != ']') { return Status::Invalid("Dot path '", dot_path_arg, "' contained an unterminated index"); } diff --git a/cpp/src/arrow/util/base64.h b/cpp/src/arrow/util/base64.h index a46884d17e623..5b80e19d896b7 100644 --- a/cpp/src/arrow/util/base64.h +++ b/cpp/src/arrow/util/base64.h @@ -18,18 +18,18 @@ #pragma once #include +#include -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { namespace util { ARROW_EXPORT -std::string base64_encode(string_view s); +std::string base64_encode(std::string_view s); ARROW_EXPORT -std::string base64_decode(string_view s); +std::string base64_decode(std::string_view s); } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 51a5fac97fb31..a6df1e561ee72 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "arrow/buffer.h" @@ -32,11 +33,11 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/bitmap_writer.h" +#include "arrow/util/bytes_view.h" #include "arrow/util/compare.h" #include "arrow/util/endian.h" #include "arrow/util/functional.h" #include "arrow/util/string_builder.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -49,7 +50,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, public util::EqualityComparable { public: template - using View = util::basic_string_view; + using View = std::basic_string_view; Bitmap() = default; diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 110fb6958da7d..89006ba887b29 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include diff --git a/cpp/src/arrow/util/bitset_stack.h b/cpp/src/arrow/util/bitset_stack.h index addded94943c3..9b334b3605eee 100644 --- a/cpp/src/arrow/util/bitset_stack.h +++ b/cpp/src/arrow/util/bitset_stack.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ #include "arrow/util/functional.h" #include "arrow/util/macros.h" #include "arrow/util/string_builder.h" -#include "arrow/util/string_view.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/bytes_view.h similarity index 72% rename from cpp/src/arrow/util/string_view.h rename to cpp/src/arrow/util/bytes_view.h index 4a51c2ebd9e2e..b1aacc96ed8d8 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/bytes_view.h @@ -17,22 +17,13 @@ #pragma once -#define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD - #include -#include - -#include "arrow/vendored/string_view.hpp" // IWYU pragma: export +#include namespace arrow { namespace util { -using nonstd::string_view; - -template > -using basic_string_view = nonstd::basic_string_view; - -using bytes_view = basic_string_view; +using bytes_view = std::basic_string_view; } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 7bda91cf10077..b5e5e69aa7edf 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -287,7 +287,7 @@ static void AppendLittleEndianArrayToString(const std::array& array const uint32_t* segment = &segments[num_segments - 1]; internal::StringFormatter format; // First segment is formatted as-is. - format(*segment, [&output](util::string_view formatted) { + format(*segment, [&output](std::string_view formatted) { memcpy(output, formatted.data(), formatted.size()); output += formatted.size(); }); @@ -295,7 +295,7 @@ static void AppendLittleEndianArrayToString(const std::array& array --segment; // Right-pad formatted segment such that e.g. 123 is formatted as "000000123". output += 9; - format(*segment, [output](util::string_view formatted) { + format(*segment, [output](std::string_view formatted) { memcpy(output - formatted.size(), formatted.data(), formatted.size()); }); } @@ -355,7 +355,7 @@ static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) { str->push_back('+'); } internal::StringFormatter format; - format(adjusted_exponent, [str](util::string_view formatted) { + format(adjusted_exponent, [str](std::string_view formatted) { str->append(formatted.data(), formatted.size()); }); return; @@ -397,7 +397,7 @@ std::string Decimal128::ToString(int32_t scale) const { // Iterates over input and for each group of kInt64DecimalDigits multiple out by // the appropriate power of 10 necessary to add source parsed as uint64 and // then adds the parsed value of source. -static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[], +static inline void ShiftAndAdd(const std::string_view& input, uint64_t out[], size_t out_size) { for (size_t posn = 0; posn < input.size();) { const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn); @@ -420,8 +420,8 @@ static inline void ShiftAndAdd(const util::string_view& input, uint64_t out[], namespace { struct DecimalComponents { - util::string_view whole_digits; - util::string_view fractional_digits; + std::string_view whole_digits; + std::string_view fractional_digits; int32_t exponent = 0; char sign = 0; bool has_exponent = false; @@ -436,14 +436,14 @@ inline bool IsDigit(char c) { return c >= '0' && c <= '9'; } inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; } inline size_t ParseDigitsRun(const char* s, size_t start, size_t size, - util::string_view* out) { + std::string_view* out) { size_t pos; for (pos = start; pos < size; ++pos) { if (!IsDigit(s[pos])) { break; } } - *out = util::string_view(s + start, pos - start); + *out = std::string_view(s + start, pos - start); return pos; } @@ -508,7 +508,7 @@ inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) { } template -Status DecimalFromString(const char* type_name, const util::string_view& s, Decimal* out, +Status DecimalFromString(const char* type_name, const std::string_view& s, Decimal* out, int32_t* precision, int32_t* scale) { if (s.empty()) { return Status::Invalid("Empty string cannot be converted to ", type_name); @@ -573,33 +573,33 @@ Status DecimalFromString(const char* type_name, const util::string_view& s, Deci } // namespace -Status Decimal128::FromString(const util::string_view& s, Decimal128* out, +Status Decimal128::FromString(const std::string_view& s, Decimal128* out, int32_t* precision, int32_t* scale) { return DecimalFromString("decimal128", s, out, precision, scale); } Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision, int32_t* scale) { - return FromString(util::string_view(s), out, precision, scale); + return FromString(std::string_view(s), out, precision, scale); } Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision, int32_t* scale) { - return FromString(util::string_view(s), out, precision, scale); + return FromString(std::string_view(s), out, precision, scale); } -Result Decimal128::FromString(const util::string_view& s) { +Result Decimal128::FromString(const std::string_view& s) { Decimal128 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); return std::move(out); } Result Decimal128::FromString(const std::string& s) { - return FromString(util::string_view(s)); + return FromString(std::string_view(s)); } Result Decimal128::FromString(const char* s) { - return FromString(util::string_view(s)); + return FromString(std::string_view(s)); } // Helper function used by Decimal128::FromBigEndian @@ -706,33 +706,33 @@ std::string Decimal256::ToString(int32_t scale) const { return str; } -Status Decimal256::FromString(const util::string_view& s, Decimal256* out, +Status Decimal256::FromString(const std::string_view& s, Decimal256* out, int32_t* precision, int32_t* scale) { return DecimalFromString("decimal256", s, out, precision, scale); } Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision, int32_t* scale) { - return FromString(util::string_view(s), out, precision, scale); + return FromString(std::string_view(s), out, precision, scale); } Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision, int32_t* scale) { - return FromString(util::string_view(s), out, precision, scale); + return FromString(std::string_view(s), out, precision, scale); } -Result Decimal256::FromString(const util::string_view& s) { +Result Decimal256::FromString(const std::string_view& s) { Decimal256 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); return std::move(out); } Result Decimal256::FromString(const std::string& s) { - return FromString(util::string_view(s)); + return FromString(std::string_view(s)); } Result Decimal256::FromString(const char* s) { - return FromString(util::string_view(s)); + return FromString(std::string_view(s)); } Result Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) { diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index 5b26f1f543170..9a863c51bf60a 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -21,13 +21,13 @@ #include #include #include +#include #include #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/basic_decimal.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -95,13 +95,13 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 { /// \brief Convert a decimal string to a Decimal128 value, optionally including /// precision and scale if they're passed in and not null. - static Status FromString(const util::string_view& s, Decimal128* out, - int32_t* precision, int32_t* scale = NULLPTR); + static Status FromString(const std::string_view& s, Decimal128* out, int32_t* precision, + int32_t* scale = NULLPTR); static Status FromString(const std::string& s, Decimal128* out, int32_t* precision, int32_t* scale = NULLPTR); static Status FromString(const char* s, Decimal128* out, int32_t* precision, int32_t* scale = NULLPTR); - static Result FromString(const util::string_view& s); + static Result FromString(const std::string_view& s); static Result FromString(const std::string& s); static Result FromString(const char* s); @@ -211,13 +211,13 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 { /// \brief Convert a decimal string to a Decimal256 value, optionally including /// precision and scale if they're passed in and not null. - static Status FromString(const util::string_view& s, Decimal256* out, - int32_t* precision, int32_t* scale = NULLPTR); + static Status FromString(const std::string_view& s, Decimal256* out, int32_t* precision, + int32_t* scale = NULLPTR); static Status FromString(const std::string& s, Decimal256* out, int32_t* precision, int32_t* scale = NULLPTR); static Status FromString(const char* s, Decimal256* out, int32_t* precision, int32_t* scale = NULLPTR); - static Result FromString(const util::string_view& s); + static Result FromString(const std::string_view& s); static Result FromString(const std::string& s); static Result FromString(const char* s); diff --git a/cpp/src/arrow/util/delimiting.cc b/cpp/src/arrow/util/delimiting.cc index fe1b6ea3126bf..4ae3646e3210d 100644 --- a/cpp/src/arrow/util/delimiting.cc +++ b/cpp/src/arrow/util/delimiting.cc @@ -32,14 +32,14 @@ Status StraddlingTooLarge() { class NewlineBoundaryFinder : public BoundaryFinder { public: - Status FindFirst(util::string_view partial, util::string_view block, + Status FindFirst(std::string_view partial, std::string_view block, int64_t* out_pos) override { auto pos = block.find_first_of(newline_delimiters); - if (pos == util::string_view::npos) { + if (pos == std::string_view::npos) { *out_pos = kNoDelimiterFound; } else { auto end = block.find_first_not_of(newline_delimiters, pos); - if (end == util::string_view::npos) { + if (end == std::string_view::npos) { end = block.length(); } *out_pos = static_cast(end); @@ -47,13 +47,13 @@ class NewlineBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindLast(util::string_view block, int64_t* out_pos) override { + Status FindLast(std::string_view block, int64_t* out_pos) override { auto pos = block.find_last_of(newline_delimiters); - if (pos == util::string_view::npos) { + if (pos == std::string_view::npos) { *out_pos = kNoDelimiterFound; } else { auto end = block.find_first_not_of(newline_delimiters, pos); - if (end == util::string_view::npos) { + if (end == std::string_view::npos) { end = block.length(); } *out_pos = static_cast(end); @@ -61,15 +61,15 @@ class NewlineBoundaryFinder : public BoundaryFinder { return Status::OK(); } - Status FindNth(util::string_view partial, util::string_view block, int64_t count, + Status FindNth(std::string_view partial, std::string_view block, int64_t count, int64_t* out_pos, int64_t* num_found) override { - DCHECK(partial.find_first_of(newline_delimiters) == util::string_view::npos); + DCHECK(partial.find_first_of(newline_delimiters) == std::string_view::npos); int64_t found = 0; int64_t pos = kNoDelimiterFound; auto cur_pos = block.find_first_of(newline_delimiters); - while (cur_pos != util::string_view::npos) { + while (cur_pos != std::string_view::npos) { if (block[cur_pos] == '\r' && cur_pos + 1 < block.length() && block[cur_pos + 1] == '\n') { cur_pos += 2; @@ -108,7 +108,7 @@ Chunker::Chunker(std::shared_ptr delimiter) Status Chunker::Process(std::shared_ptr block, std::shared_ptr* whole, std::shared_ptr* partial) { int64_t last_pos = -1; - RETURN_NOT_OK(boundary_finder_->FindLast(util::string_view(*block), &last_pos)); + RETURN_NOT_OK(boundary_finder_->FindLast(std::string_view(*block), &last_pos)); if (last_pos == BoundaryFinder::kNoDelimiterFound) { // No delimiter found *whole = SliceBuffer(block, 0, 0); @@ -132,8 +132,8 @@ Status Chunker::ProcessWithPartial(std::shared_ptr partial, return Status::OK(); } int64_t first_pos = -1; - RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial), - util::string_view(*block), &first_pos)); + RETURN_NOT_OK(boundary_finder_->FindFirst(std::string_view(*partial), + std::string_view(*block), &first_pos)); if (first_pos == BoundaryFinder::kNoDelimiterFound) { // No delimiter in block => the current object is too large for block size return StraddlingTooLarge(); @@ -155,8 +155,8 @@ Status Chunker::ProcessFinal(std::shared_ptr partial, return Status::OK(); } int64_t first_pos = -1; - RETURN_NOT_OK(boundary_finder_->FindFirst(util::string_view(*partial), - util::string_view(*block), &first_pos)); + RETURN_NOT_OK(boundary_finder_->FindFirst(std::string_view(*partial), + std::string_view(*block), &first_pos)); if (first_pos == BoundaryFinder::kNoDelimiterFound) { // No delimiter in block => it's entirely a completion of partial *completion = block; @@ -175,7 +175,7 @@ Status Chunker::ProcessSkip(std::shared_ptr partial, int64_t pos; int64_t num_found; ARROW_RETURN_NOT_OK(boundary_finder_->FindNth( - util::string_view(*partial), util::string_view(*block), *count, &pos, &num_found)); + std::string_view(*partial), std::string_view(*block), *count, &pos, &num_found)); if (pos == BoundaryFinder::kNoDelimiterFound) { return StraddlingTooLarge(); } diff --git a/cpp/src/arrow/util/delimiting.h b/cpp/src/arrow/util/delimiting.h index b4b868340dbdf..161ad0bfddfc5 100644 --- a/cpp/src/arrow/util/delimiting.h +++ b/cpp/src/arrow/util/delimiting.h @@ -19,10 +19,10 @@ #include #include +#include #include "arrow/status.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -43,7 +43,7 @@ class ARROW_EXPORT BoundaryFinder { /// The returned `out_pos` is relative to `block`'s start and should point /// to the first character after the first delimiter. /// `out_pos` will be -1 if no delimiter is found. - virtual Status FindFirst(util::string_view partial, util::string_view block, + virtual Status FindFirst(std::string_view partial, std::string_view block, int64_t* out_pos) = 0; /// \brief Find the position of the last delimiter inside block @@ -51,7 +51,7 @@ class ARROW_EXPORT BoundaryFinder { /// The returned `out_pos` is relative to `block`'s start and should point /// to the first character after the last delimiter. /// `out_pos` will be -1 if no delimiter is found. - virtual Status FindLast(util::string_view block, int64_t* out_pos) = 0; + virtual Status FindLast(std::string_view block, int64_t* out_pos) = 0; /// \brief Find the position of the Nth delimiter inside the block /// @@ -63,8 +63,8 @@ class ARROW_EXPORT BoundaryFinder { /// `out_pos` will be -1 if no delimiter is found. /// /// The returned `num_found` is the number of delimiters actually found - virtual Status FindNth(util::string_view partial, util::string_view block, - int64_t count, int64_t* out_pos, int64_t* num_found) = 0; + virtual Status FindNth(std::string_view partial, std::string_view block, int64_t count, + int64_t* out_pos, int64_t* num_found) = 0; static constexpr int64_t kNoDelimiterFound = -1; diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 335aba8c5e319..a69c7131c3779 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,6 @@ #include "arrow/type_traits.h" #include "arrow/util/double_conversion.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/time.h" #include "arrow/util/visibility.h" #include "arrow/vendored/datetime.h" @@ -60,7 +60,7 @@ template using enable_if_formattable = enable_if_t::value, R>; template -using Return = decltype(std::declval()(util::string_view{})); +using Return = decltype(std::declval()(std::string_view{})); ///////////////////////////////////////////////////////////////////////// // Boolean formatting @@ -76,10 +76,10 @@ class StringFormatter { Return operator()(bool value, Appender&& append) { if (value) { const char string[] = "true"; - return append(util::string_view(string)); + return append(std::string_view(string)); } else { const char string[] = "false"; - return append(util::string_view(string)); + return append(std::string_view(string)); } } }; @@ -135,8 +135,8 @@ void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** curs } template -util::string_view ViewDigitBuffer(const std::array& buffer, - char* cursor) { +std::string_view ViewDigitBuffer(const std::array& buffer, + char* cursor) { auto buffer_end = buffer.data() + BUFFER_SIZE; return {cursor, static_cast(buffer_end - cursor)}; } @@ -260,7 +260,7 @@ class FloatToStringFormatterMixin : public FloatToStringFormatter { Return operator()(value_type value, Appender&& append) { char buffer[buffer_size]; int size = FormatFloat(value, buffer, buffer_size); - return append(util::string_view(buffer, size)); + return append(std::string_view(buffer, size)); } }; diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index a576085999054..eddf76fe84567 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -33,7 +33,7 @@ using internal::StringFormatter; class StringAppender { public: - Status operator()(util::string_view v) { + Status operator()(std::string_view v) { string_.append(v.data(), v.size()); return Status::OK(); } diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h index ca5a6c766bd71..bb04a364cda88 100644 --- a/cpp/src/arrow/util/hashing.h +++ b/cpp/src/arrow/util/hashing.h @@ -103,11 +103,11 @@ struct ScalarHelper::value> template struct ScalarHelper::value>> + enable_if_t::value>> : public ScalarHelperBase { - // ScalarHelper specialization for util::string_view + // ScalarHelper specialization for std::string_view - static hash_t ComputeHash(const util::string_view& value) { + static hash_t ComputeHash(const std::string_view& value) { return ComputeStringHash(value.data(), static_cast(value.size())); } }; @@ -641,7 +641,7 @@ class BinaryMemoTable : public MemoTable { } } - int32_t Get(const util::string_view& value) const { + int32_t Get(const std::string_view& value) const { return Get(value.data(), static_cast(value.length())); } @@ -669,7 +669,7 @@ class BinaryMemoTable : public MemoTable { } template - Status GetOrInsert(const util::string_view& value, Func1&& on_found, + Status GetOrInsert(const std::string_view& value, Func1&& on_found, Func2&& on_not_found, int32_t* out_memo_index) { return GetOrInsert(value.data(), static_cast(value.length()), std::forward(on_found), std::forward(on_not_found), @@ -682,7 +682,7 @@ class BinaryMemoTable : public MemoTable { data, length, [](int32_t i) {}, [](int32_t i) {}, out_memo_index); } - Status GetOrInsert(const util::string_view& value, int32_t* out_memo_index) { + Status GetOrInsert(const std::string_view& value, int32_t* out_memo_index) { return GetOrInsert(value.data(), static_cast(value.length()), out_memo_index); } @@ -817,8 +817,8 @@ class BinaryMemoTable : public MemoTable { } // Visit the stored values in insertion order. - // The visitor function should have the signature `void(util::string_view)` - // or `void(const util::string_view&)`. + // The visitor function should have the signature `void(std::string_view)` + // or `void(const std::string_view&)`. template void VisitValues(int32_t start, VisitFunc&& visit) const { for (int32_t i = start; i < size(); ++i) { @@ -841,8 +841,8 @@ class BinaryMemoTable : public MemoTable { std::pair Lookup(hash_t h, const void* data, builder_offset_type length) const { auto cmp_func = [=](const Payload* payload) { - util::string_view lhs = binary_builder_.GetView(payload->memo_index); - util::string_view rhs(static_cast(data), length); + std::string_view lhs = binary_builder_.GetView(payload->memo_index); + std::string_view rhs(static_cast(data), length); return lhs == rhs; }; return hash_table_.Lookup(h, cmp_func); @@ -850,7 +850,7 @@ class BinaryMemoTable : public MemoTable { public: Status MergeTable(const BinaryMemoTable& other_table) { - other_table.VisitValues(0, [this](const util::string_view& other_value) { + other_table.VisitValues(0, [this](const std::string_view& other_value) { int32_t unused; DCHECK_OK(this->GetOrInsert(other_value, &unused)); }); @@ -918,7 +918,7 @@ struct StringViewHash { // std::hash compatible hasher for use with std::unordered_* // (the std::hash specialization provided by nonstd constructs std::string // temporaries then invokes std::hash against those) - hash_t operator()(const util::string_view& value) const { + hash_t operator()(const std::string_view& value) const { return ComputeStringHash<0>(value.data(), static_cast(value.size())); } }; diff --git a/cpp/src/arrow/util/hashing_test.cc b/cpp/src/arrow/util/hashing_test.cc index 116e305e59e6c..6589f098afd6e 100644 --- a/cpp/src/arrow/util/hashing_test.cc +++ b/cpp/src/arrow/util/hashing_test.cc @@ -440,7 +440,7 @@ TEST(BinaryMemoTable, Basics) { { const int32_t start_offset = 1; std::vector actual; - table.VisitValues(start_offset, [&](const util::string_view& v) { + table.VisitValues(start_offset, [&](const std::string_view& v) { actual.emplace_back(v.data(), v.length()); }); EXPECT_THAT(actual, testing::ElementsAre(B, C, D, E, F, "")); diff --git a/cpp/src/arrow/util/reflection_internal.h b/cpp/src/arrow/util/reflection_internal.h index 0440a2eb563dd..2e994aa4b7005 100644 --- a/cpp/src/arrow/util/reflection_internal.h +++ b/cpp/src/arrow/util/reflection_internal.h @@ -18,11 +18,11 @@ #pragma once #include +#include #include #include #include "arrow/type_traits.h" -#include "arrow/util/string_view.h" namespace arrow { namespace internal { @@ -81,14 +81,14 @@ struct DataMemberProperty { void set(Class* obj, Type value) const { (*obj).*ptr_ = std::move(value); } - constexpr util::string_view name() const { return name_; } + constexpr std::string_view name() const { return name_; } - util::string_view name_; + std::string_view name_; Type Class::*ptr_; }; template -constexpr DataMemberProperty DataMember(util::string_view name, +constexpr DataMemberProperty DataMember(std::string_view name, Type Class::*ptr) { return {name, ptr}; } diff --git a/cpp/src/arrow/util/reflection_test.cc b/cpp/src/arrow/util/reflection_test.cc index 8ca9077ddc679..d2d6379bece85 100644 --- a/cpp/src/arrow/util/reflection_test.cc +++ b/cpp/src/arrow/util/reflection_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include @@ -48,7 +49,7 @@ struct EqualsImpl { template struct ToStringImpl { template - ToStringImpl(util::string_view class_name, const Class& obj, const Properties& props) + ToStringImpl(std::string_view class_name, const Class& obj, const Properties& props) : class_name_(class_name), obj_(obj), members_(props.size()) { props.ForEach(*this); } @@ -61,10 +62,10 @@ struct ToStringImpl { } std::string Finish() { - return class_name_.to_string() + "{" + JoinStrings(members_, ",") + "}"; + return std::string(class_name_) + "{" + JoinStrings(members_, ",") + "}"; } - util::string_view class_name_; + std::string_view class_name_; const Class& obj_; std::vector members_; }; @@ -73,7 +74,7 @@ struct ToStringImpl { template struct FromStringImpl { template - FromStringImpl(util::string_view class_name, util::string_view repr, + FromStringImpl(std::string_view class_name, std::string_view repr, const Properties& props) { Init(class_name, repr, props.size()); props.ForEach(*this); @@ -81,8 +82,8 @@ struct FromStringImpl { void Fail() { obj_ = std::nullopt; } - void Init(util::string_view class_name, util::string_view repr, size_t num_properties) { - if (!repr.starts_with(class_name)) return Fail(); + void Init(std::string_view class_name, std::string_view repr, size_t num_properties) { + if (!StartsWith(repr, class_name)) return Fail(); repr = repr.substr(class_name.size()); if (repr.empty()) return Fail(); @@ -99,7 +100,7 @@ struct FromStringImpl { if (!obj_) return; auto first_colon = members_[i].find_first_of(':'); - if (first_colon == util::string_view::npos) return Fail(); + if (first_colon == std::string_view::npos) return Fail(); auto name = members_[i].substr(0, first_colon); if (name != prop.name()) return Fail(); @@ -107,7 +108,7 @@ struct FromStringImpl { auto value_repr = members_[i].substr(first_colon + 1); typename Property::Type value; try { - std::stringstream ss(value_repr.to_string()); + std::stringstream ss{std::string{value_repr}}; ss >> value; if (!ss.eof()) return Fail(); } catch (...) { @@ -117,7 +118,7 @@ struct FromStringImpl { } std::optional obj_ = Class{}; - std::vector members_; + std::vector members_; }; // unmodified structure which we wish to reflect on: @@ -146,7 +147,7 @@ std::string ToString(const Person& obj) { void PrintTo(const Person& obj, std::ostream* os) { *os << ToString(obj); } -std::optional PersonFromString(util::string_view repr) { +std::optional PersonFromString(std::string_view repr) { return FromStringImpl("Person", repr, kPersonProperties).obj_; } diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc index 09df881a9b078..2055b4f47ea22 100644 --- a/cpp/src/arrow/util/string.cc +++ b/cpp/src/arrow/util/string.cc @@ -69,9 +69,9 @@ std::string HexEncode(const char* data, size_t length) { return HexEncode(reinterpret_cast(data), length); } -std::string HexEncode(util::string_view str) { return HexEncode(str.data(), str.size()); } +std::string HexEncode(std::string_view str) { return HexEncode(str.data(), str.size()); } -std::string Escape(util::string_view str) { return Escape(str.data(), str.size()); } +std::string Escape(std::string_view str) { return Escape(str.data(), str.size()); } Status ParseHexValue(const char* data, uint8_t* out) { char c1 = data[0]; @@ -92,9 +92,9 @@ Status ParseHexValue(const char* data, uint8_t* out) { namespace internal { -std::vector SplitString(util::string_view v, char delimiter, - int64_t limit) { - std::vector parts; +std::vector SplitString(std::string_view v, char delimiter, + int64_t limit) { + std::vector parts; size_t start = 0, end; while (true) { if (limit > 0 && static_cast(limit - 1) <= parts.size()) { @@ -113,7 +113,7 @@ std::vector SplitString(util::string_view v, char delimiter, template static std::string JoinStringLikes(const std::vector& strings, - util::string_view delimiter) { + std::string_view delimiter) { if (strings.size() == 0) { return ""; } @@ -125,13 +125,13 @@ static std::string JoinStringLikes(const std::vector& strings, return out; } -std::string JoinStrings(const std::vector& strings, - util::string_view delimiter) { +std::string JoinStrings(const std::vector& strings, + std::string_view delimiter) { return JoinStringLikes(strings, delimiter); } std::string JoinStrings(const std::vector& strings, - util::string_view delimiter) { + std::string_view delimiter) { return JoinStringLikes(strings, delimiter); } @@ -152,7 +152,7 @@ std::string TrimString(std::string value) { return value; } -bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right) { +bool AsciiEqualsCaseInsensitive(std::string_view left, std::string_view right) { // TODO: ASCII validation if (left.size() != right.size()) { return false; @@ -166,7 +166,7 @@ bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right) return true; } -std::string AsciiToLower(util::string_view value) { +std::string AsciiToLower(std::string_view value) { // TODO: ASCII validation std::string result = std::string(value); std::transform(result.begin(), result.end(), result.begin(), @@ -174,7 +174,7 @@ std::string AsciiToLower(util::string_view value) { return result; } -std::string AsciiToUpper(util::string_view value) { +std::string AsciiToUpper(std::string_view value) { // TODO: ASCII validation std::string result = std::string(value); std::transform(result.begin(), result.end(), result.begin(), @@ -182,17 +182,17 @@ std::string AsciiToUpper(util::string_view value) { return result; } -std::optional Replace(util::string_view s, util::string_view token, - util::string_view replacement) { +std::optional Replace(std::string_view s, std::string_view token, + std::string_view replacement) { size_t token_start = s.find(token); if (token_start == std::string::npos) { return std::nullopt; } - return s.substr(0, token_start).to_string() + replacement.to_string() + - s.substr(token_start + token.size()).to_string(); + return std::string(s.substr(0, token_start)) + std::string(replacement) + + std::string(s.substr(token_start + token.size())); } -Result ParseBoolean(util::string_view value) { +Result ParseBoolean(std::string_view value) { if (AsciiEqualsCaseInsensitive(value, "true") || value == "1") { return true; } else if (AsciiEqualsCaseInsensitive(value, "false") || value == "0") { diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h index fd9a3d1e06366..ec2ccd11ef52f 100644 --- a/cpp/src/arrow/util/string.h +++ b/cpp/src/arrow/util/string.h @@ -19,10 +19,10 @@ #include #include +#include #include #include "arrow/result.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -35,47 +35,59 @@ ARROW_EXPORT std::string Escape(const char* data, size_t length); ARROW_EXPORT std::string HexEncode(const char* data, size_t length); -ARROW_EXPORT std::string HexEncode(util::string_view str); +ARROW_EXPORT std::string HexEncode(std::string_view str); -ARROW_EXPORT std::string Escape(util::string_view str); +ARROW_EXPORT std::string Escape(std::string_view str); ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out); namespace internal { +/// Like std::string_view::starts_with in C++20 +inline bool StartsWith(std::string_view s, std::string_view prefix) { + return s.length() >= prefix.length() && + (s.empty() || s.substr(0, prefix.length()) == prefix); +} + +/// Like std::string_view::ends_with in C++20 +inline bool EndsWith(std::string_view s, std::string_view suffix) { + return s.length() >= suffix.length() && + (s.empty() || s.substr(s.length() - suffix.length()) == suffix); +} + /// \brief Split a string with a delimiter ARROW_EXPORT -std::vector SplitString(util::string_view v, char delim, - int64_t limit = 0); +std::vector SplitString(std::string_view v, char delim, + int64_t limit = 0); /// \brief Join strings with a delimiter ARROW_EXPORT -std::string JoinStrings(const std::vector& strings, - util::string_view delimiter); +std::string JoinStrings(const std::vector& strings, + std::string_view delimiter); /// \brief Join strings with a delimiter ARROW_EXPORT std::string JoinStrings(const std::vector& strings, - util::string_view delimiter); + std::string_view delimiter); /// \brief Trim whitespace from left and right sides of string ARROW_EXPORT std::string TrimString(std::string value); ARROW_EXPORT -bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right); +bool AsciiEqualsCaseInsensitive(std::string_view left, std::string_view right); ARROW_EXPORT -std::string AsciiToLower(util::string_view value); +std::string AsciiToLower(std::string_view value); ARROW_EXPORT -std::string AsciiToUpper(util::string_view value); +std::string AsciiToUpper(std::string_view value); /// \brief Search for the first instance of a token and replace it or return nullopt if /// the token is not found. ARROW_EXPORT -std::optional Replace(util::string_view s, util::string_view token, - util::string_view replacement); +std::optional Replace(std::string_view s, std::string_view token, + std::string_view replacement); /// \brief Get boolean value from string /// @@ -83,6 +95,7 @@ std::optional Replace(util::string_view s, util::string_view token, /// If "0", "false" (case-insensitive), returns false /// Otherwise, returns Status::Invalid ARROW_EXPORT -arrow::Result ParseBoolean(util::string_view value); +arrow::Result ParseBoolean(std::string_view value); + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc index 2aa6fccbd9a0f..a1aac17ab50e4 100644 --- a/cpp/src/arrow/util/string_test.cc +++ b/cpp/src/arrow/util/string_test.cc @@ -166,5 +166,33 @@ TEST(SplitString, LimitZero) { EXPECT_EQ(parts[2], "c"); } +TEST(StartsWith, Basics) { + std::string empty{}; + std::string abc{"abc"}; + std::string abcdef{"abcdef"}; + std::string def{"def"}; + ASSERT_TRUE(StartsWith(empty, empty)); + ASSERT_TRUE(StartsWith(abc, empty)); + ASSERT_TRUE(StartsWith(abc, abc)); + ASSERT_TRUE(StartsWith(abcdef, abc)); + ASSERT_FALSE(StartsWith(abc, abcdef)); + ASSERT_FALSE(StartsWith(def, abcdef)); + ASSERT_FALSE(StartsWith(abcdef, def)); +} + +TEST(EndsWith, Basics) { + std::string empty{}; + std::string abc{"abc"}; + std::string abcdef{"abcdef"}; + std::string def{"def"}; + ASSERT_TRUE(EndsWith(empty, empty)); + ASSERT_TRUE(EndsWith(abc, empty)); + ASSERT_TRUE(EndsWith(abc, abc)); + ASSERT_TRUE(EndsWith(abcdef, def)); + ASSERT_FALSE(EndsWith(abcdef, abc)); + ASSERT_FALSE(EndsWith(def, abcdef)); + ASSERT_FALSE(EndsWith(abcdef, abc)); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/trie.cc b/cpp/src/arrow/util/trie.cc index 7fa7f852eb44d..ec2aed302f5a7 100644 --- a/cpp/src/arrow/util/trie.cc +++ b/cpp/src/arrow/util/trie.cc @@ -91,7 +91,7 @@ Status TrieBuilder::AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& } Status TrieBuilder::CreateChildNode(Trie::Node* parent, uint8_t ch, - util::string_view substring) { + std::string_view substring) { const auto kMaxSubstringLength = Trie::kMaxSubstringLength; while (substring.length() > kMaxSubstringLength) { @@ -112,7 +112,7 @@ Status TrieBuilder::CreateChildNode(Trie::Node* parent, uint8_t ch, } Status TrieBuilder::CreateChildNode(Trie::Node* parent, char ch, - util::string_view substring) { + std::string_view substring) { return CreateChildNode(parent, static_cast(ch), substring); } @@ -147,7 +147,7 @@ Status TrieBuilder::SplitNode(fast_index_type node_index, fast_index_type split_ return Status::OK(); } -Status TrieBuilder::Append(util::string_view s, bool allow_duplicate) { +Status TrieBuilder::Append(std::string_view s, bool allow_duplicate) { // Find or create node for string fast_index_type node_index = 0; fast_index_type pos = 0; diff --git a/cpp/src/arrow/util/trie.h b/cpp/src/arrow/util/trie.h index b250cca647d63..7815d4d1ecc1d 100644 --- a/cpp/src/arrow/util/trie.h +++ b/cpp/src/arrow/util/trie.h @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include "arrow/status.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -45,10 +45,10 @@ class SmallString { template SmallString(const T& v) { // NOLINT implicit constructor - *this = util::string_view(v); + *this = std::string_view(v); } - SmallString& operator=(const util::string_view s) { + SmallString& operator=(const std::string_view s) { #ifndef NDEBUG CheckSize(s.size()); #endif @@ -58,18 +58,16 @@ class SmallString { } SmallString& operator=(const std::string& s) { - *this = util::string_view(s); + *this = std::string_view(s); return *this; } SmallString& operator=(const char* s) { - *this = util::string_view(s); + *this = std::string_view(s); return *this; } - explicit operator util::string_view() const { - return util::string_view(data_, length_); - } + explicit operator std::string_view() const { return std::string_view(data_, length_); } const char* data() const { return data_; } size_t length() const { return length_; } @@ -82,21 +80,21 @@ class SmallString { } SmallString substr(size_t pos) const { - return SmallString(util::string_view(*this).substr(pos)); + return SmallString(std::string_view(*this).substr(pos)); } SmallString substr(size_t pos, size_t count) const { - return SmallString(util::string_view(*this).substr(pos, count)); + return SmallString(std::string_view(*this).substr(pos, count)); } template bool operator==(T&& other) const { - return util::string_view(*this) == util::string_view(std::forward(other)); + return std::string_view(*this) == std::string_view(std::forward(other)); } template bool operator!=(T&& other) const { - return util::string_view(*this) != util::string_view(std::forward(other)); + return std::string_view(*this) != std::string_view(std::forward(other)); } protected: @@ -108,7 +106,7 @@ class SmallString { template std::ostream& operator<<(std::ostream& os, const SmallString& str) { - return os << util::string_view(str); + return os << std::string_view(str); } // A trie class for byte strings, optimized for small sets of short strings. @@ -123,7 +121,7 @@ class ARROW_EXPORT Trie { Trie(Trie&&) = default; Trie& operator=(Trie&&) = default; - int32_t Find(util::string_view s) const { + int32_t Find(std::string_view s) const { const Node* node = &nodes_[0]; fast_index_type pos = 0; if (s.length() > static_cast(kMaxIndex)) { @@ -222,7 +220,7 @@ class ARROW_EXPORT TrieBuilder { public: TrieBuilder(); - Status Append(util::string_view s, bool allow_duplicate = false); + Status Append(std::string_view s, bool allow_duplicate = false); Trie Finish(); protected: @@ -233,8 +231,8 @@ class ARROW_EXPORT TrieBuilder { // Append an already constructed child node to the parent Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node); // Create a matching child node from this parent - Status CreateChildNode(Trie::Node* parent, uint8_t ch, util::string_view substring); - Status CreateChildNode(Trie::Node* parent, char ch, util::string_view substring); + Status CreateChildNode(Trie::Node* parent, uint8_t ch, std::string_view substring); + Status CreateChildNode(Trie::Node* parent, char ch, std::string_view substring); Trie trie_; diff --git a/cpp/src/arrow/util/trie_benchmark.cc b/cpp/src/arrow/util/trie_benchmark.cc index 868accc374477..b938f87d8d1c9 100644 --- a/cpp/src/arrow/util/trie_benchmark.cc +++ b/cpp/src/arrow/util/trie_benchmark.cc @@ -86,7 +86,7 @@ BENCHMARK(TrieLookupNotFound); #ifdef ARROW_WITH_BENCHMARKS_REFERENCE -static inline bool InlinedNullLookup(util::string_view s) { +static inline bool InlinedNullLookup(std::string_view s) { // An inlined version of trie lookup for a specific set of strings // (see AllNulls()) auto size = s.length(); diff --git a/cpp/src/arrow/util/trie_test.cc b/cpp/src/arrow/util/trie_test.cc index cfe66689da59d..9c6b7678a4674 100644 --- a/cpp/src/arrow/util/trie_test.cc +++ b/cpp/src/arrow/util/trie_test.cc @@ -36,7 +36,7 @@ TEST(SmallString, Basics) { { SS s; ASSERT_EQ(s.length(), 0); - ASSERT_EQ(util::string_view(s), util::string_view("")); + ASSERT_EQ(std::string_view(s), std::string_view("")); ASSERT_EQ(s, ""); ASSERT_NE(s, "x"); ASSERT_EQ(sizeof(s), 6); @@ -44,7 +44,7 @@ TEST(SmallString, Basics) { { SS s("abc"); ASSERT_EQ(s.length(), 3); - ASSERT_EQ(util::string_view(s), util::string_view("abc")); + ASSERT_EQ(std::string_view(s), std::string_view("abc")); ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0); ASSERT_EQ(s, "abc"); ASSERT_NE(s, "ab"); @@ -55,23 +55,23 @@ TEST(SmallString, Assign) { using SS = SmallString<5>; auto s = SS(); - s = util::string_view("abc"); + s = std::string_view("abc"); ASSERT_EQ(s.length(), 3); - ASSERT_EQ(util::string_view(s), util::string_view("abc")); + ASSERT_EQ(std::string_view(s), std::string_view("abc")); ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0); ASSERT_EQ(s, "abc"); ASSERT_NE(s, "ab"); s = std::string("ghijk"); ASSERT_EQ(s.length(), 5); - ASSERT_EQ(util::string_view(s), util::string_view("ghijk")); + ASSERT_EQ(std::string_view(s), std::string_view("ghijk")); ASSERT_EQ(std::memcmp(s.data(), "ghijk", 5), 0); ASSERT_EQ(s, "ghijk"); ASSERT_NE(s, ""); s = SS("xy"); ASSERT_EQ(s.length(), 2); - ASSERT_EQ(util::string_view(s), util::string_view("xy")); + ASSERT_EQ(std::string_view(s), std::string_view("xy")); ASSERT_EQ(std::memcmp(s.data(), "xy", 2), 0); ASSERT_EQ(s, "xy"); ASSERT_NE(s, "xyz"); diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc index abfc9de8b4988..ced1b18404c64 100644 --- a/cpp/src/arrow/util/uri.cc +++ b/cpp/src/arrow/util/uri.cc @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" #include "arrow/vendored/uriparser/Uri.h" @@ -31,7 +31,7 @@ namespace internal { namespace { -util::string_view TextRangeToView(const UriTextRangeStructA& range) { +std::string_view TextRangeToView(const UriTextRangeStructA& range) { if (range.first == nullptr) { return ""; } else { @@ -50,7 +50,7 @@ std::string TextRangeToString(const UriTextRangeStructA& range) { bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != nullptr; } #ifdef _WIN32 -bool IsDriveSpec(const util::string_view s) { +bool IsDriveSpec(const std::string_view s) { return (s.length() >= 2 && s[1] == ':' && ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z'))); } @@ -72,7 +72,7 @@ std::string UriEscape(const std::string& s) { return escaped; } -std::string UriUnescape(const util::string_view s) { +std::string UriUnescape(const std::string_view s) { std::string result(s); if (!result.empty()) { auto end = uriUnescapeInPlaceA(&result[0]); @@ -94,7 +94,7 @@ std::string UriEncodeHost(const std::string& host) { } } -bool IsValidUriScheme(const arrow::util::string_view s) { +bool IsValidUriScheme(const std::string_view s) { auto is_alpha = [](char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }; auto is_scheme_char = [&](char c) { return is_alpha(c) || (c >= '0' && c <= '9') || c == '+' || c == '-' || c == '.'; @@ -133,7 +133,7 @@ struct Uri::Impl { std::vector data_; std::string string_rep_; int32_t port_; - std::vector path_segments_; + std::vector path_segments_; bool is_file_uri_; bool is_absolute_path_; }; @@ -162,7 +162,7 @@ int32_t Uri::port() const { return impl_->port_; } std::string Uri::username() const { auto userpass = TextRangeToView(impl_->uri_.userInfo); auto sep_pos = userpass.find_first_of(':'); - if (sep_pos == util::string_view::npos) { + if (sep_pos == std::string_view::npos) { return UriUnescape(userpass); } else { return UriUnescape(userpass.substr(0, sep_pos)); @@ -172,7 +172,7 @@ std::string Uri::username() const { std::string Uri::password() const { auto userpass = TextRangeToView(impl_->uri_.userInfo); auto sep_pos = userpass.find_first_of(':'); - if (sep_pos == util::string_view::npos) { + if (sep_pos == std::string_view::npos) { return std::string(); } else { return UriUnescape(userpass.substr(sep_pos + 1)); diff --git a/cpp/src/arrow/util/uri.h b/cpp/src/arrow/util/uri.h index 50d9eccf82f09..10853b4b7778e 100644 --- a/cpp/src/arrow/util/uri.h +++ b/cpp/src/arrow/util/uri.h @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include "arrow/type_fwd.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -38,7 +38,7 @@ class ARROW_EXPORT Uri { Uri(Uri&&); Uri& operator=(Uri&&); - // XXX Should we use util::string_view instead? These functions are + // XXX Should we use std::string_view instead? These functions are // not performance-critical. /// The URI scheme, such as "http", or the empty string if the URI has no @@ -93,7 +93,7 @@ ARROW_EXPORT std::string UriEscape(const std::string& s); ARROW_EXPORT -std::string UriUnescape(const arrow::util::string_view s); +std::string UriUnescape(const std::string_view s); /// Encode a host for use within a URI, such as "localhost", /// "127.0.0.1", or "[::1]". @@ -102,7 +102,7 @@ std::string UriEncodeHost(const std::string& host); /// Whether the string is a syntactically valid URI scheme according to RFC 3986. ARROW_EXPORT -bool IsValidUriScheme(const arrow::util::string_view s); +bool IsValidUriScheme(const std::string_view s); /// Create a file uri from a given absolute path ARROW_EXPORT diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc index e589e1763e64e..67f04709621b4 100644 --- a/cpp/src/arrow/util/utf8.cc +++ b/cpp/src/arrow/util/utf8.cc @@ -96,7 +96,7 @@ bool ValidateUTF8(const uint8_t* data, int64_t size) { return ValidateUTF8Inline(data, size); } -bool ValidateUTF8(const util::string_view& str) { return ValidateUTF8Inline(str); } +bool ValidateUTF8(const std::string_view& str) { return ValidateUTF8Inline(str); } static const uint8_t kBOM[] = {0xEF, 0xBB, 0xBF}; diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h index eab207d2a02b2..909113055d103 100644 --- a/cpp/src/arrow/util/utf8.h +++ b/cpp/src/arrow/util/utf8.h @@ -20,10 +20,10 @@ #include #include #include +#include #include "arrow/type_fwd.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -41,7 +41,7 @@ ARROW_EXPORT void InitializeUTF8(); ARROW_EXPORT bool ValidateUTF8(const uint8_t* data, int64_t size); -ARROW_EXPORT bool ValidateUTF8(const util::string_view& str); +ARROW_EXPORT bool ValidateUTF8(const std::string_view& str); // Skip UTF8 byte order mark, if any. ARROW_EXPORT diff --git a/cpp/src/arrow/util/utf8_internal.h b/cpp/src/arrow/util/utf8_internal.h index 9d2954e9d1cd8..0ce7dd762001f 100644 --- a/cpp/src/arrow/util/utf8_internal.h +++ b/cpp/src/arrow/util/utf8_internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2) #include @@ -30,7 +31,6 @@ #include "arrow/type_fwd.h" #include "arrow/util/macros.h" #include "arrow/util/simd.h" -#include "arrow/util/string_view.h" #include "arrow/util/ubsan.h" #include "arrow/util/utf8.h" #include "arrow/util/visibility.h" @@ -201,7 +201,7 @@ static inline bool ValidateUTF8Inline(const uint8_t* data, int64_t size) { return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept); } -static inline bool ValidateUTF8Inline(const util::string_view& str) { +static inline bool ValidateUTF8Inline(const std::string_view& str) { const uint8_t* data = reinterpret_cast(str.data()); const size_t length = str.size(); @@ -266,7 +266,7 @@ static inline bool ValidateAscii(const uint8_t* data, int64_t len) { #endif } -static inline bool ValidateAscii(const util::string_view& str) { +static inline bool ValidateAscii(const std::string_view& str) { const uint8_t* data = reinterpret_cast(str.data()); const size_t length = str.size(); diff --git a/cpp/src/arrow/util/value_parsing_benchmark.cc b/cpp/src/arrow/util/value_parsing_benchmark.cc index 40d139316e55f..2c4a32b7a1bfa 100644 --- a/cpp/src/arrow/util/value_parsing_benchmark.cc +++ b/cpp/src/arrow/util/value_parsing_benchmark.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/util/formatting.h" -#include "arrow/util/string_view.h" #include "arrow/util/value_parsing.h" namespace arrow { @@ -218,7 +218,7 @@ static void TimestampParsingStrptime( } struct DummyAppender { - Status operator()(util::string_view v) { + Status operator()(std::string_view v) { if (pos_ >= static_cast(v.size())) { pos_ = 0; } diff --git a/cpp/src/arrow/vendored/base64.cpp b/cpp/src/arrow/vendored/base64.cpp index 0de11955b7d15..6f53c0524e714 100644 --- a/cpp/src/arrow/vendored/base64.cpp +++ b/cpp/src/arrow/vendored/base64.cpp @@ -87,13 +87,13 @@ static std::string base64_encode(unsigned char const* bytes_to_encode, unsigned } -std::string base64_encode(string_view string_to_encode) { +std::string base64_encode(std::string_view string_to_encode) { auto bytes_to_encode = reinterpret_cast(string_to_encode.data()); auto in_len = static_cast(string_to_encode.size()); return base64_encode(bytes_to_encode, in_len); } -std::string base64_decode(string_view encoded_string) { +std::string base64_decode(std::string_view encoded_string) { size_t in_len = encoded_string.size(); int i = 0; int j = 0; diff --git a/cpp/src/arrow/vendored/string_view.hpp b/cpp/src/arrow/vendored/string_view.hpp deleted file mode 100644 index a2d5567854f95..0000000000000 --- a/cpp/src/arrow/vendored/string_view.hpp +++ /dev/null @@ -1,1531 +0,0 @@ -// Vendored from git changeset v1.4.0 - -// Copyright 2017-2020 by Martin Moene -// -// string-view lite, a C++17-like string_view for C++98 and later. -// For more information see https://github.com/martinmoene/string-view-lite -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#ifndef NONSTD_SV_LITE_H_INCLUDED -#define NONSTD_SV_LITE_H_INCLUDED - -#define string_view_lite_MAJOR 1 -#define string_view_lite_MINOR 4 -#define string_view_lite_PATCH 0 - -#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) - -#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) -#define nssv_STRINGIFY_( x ) #x - -// string-view lite configuration: - -#define nssv_STRING_VIEW_DEFAULT 0 -#define nssv_STRING_VIEW_NONSTD 1 -#define nssv_STRING_VIEW_STD 2 - -#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) -# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) -#endif - -#if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW ) -# error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_... -#endif - -#ifndef nssv_CONFIG_STD_SV_OPERATOR -# define nssv_CONFIG_STD_SV_OPERATOR 0 -#endif - -#ifndef nssv_CONFIG_USR_SV_OPERATOR -# define nssv_CONFIG_USR_SV_OPERATOR 1 -#endif - -#ifdef nssv_CONFIG_CONVERSION_STD_STRING -# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING -# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING -#endif - -#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS -# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 -#endif - -#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS -# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 -#endif - -// Control presence of exception handling (try and auto discover): - -#ifndef nssv_CONFIG_NO_EXCEPTIONS -# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) -# define nssv_CONFIG_NO_EXCEPTIONS 0 -# else -# define nssv_CONFIG_NO_EXCEPTIONS 1 -# endif -#endif - -// C++ language version detection (C++20 is speculative): -// Note: VC14.0/1900 (VS2015) lacks too much from C++14. - -#ifndef nssv_CPLUSPLUS -# if defined(_MSVC_LANG ) && !defined(__clang__) -# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) -# else -# define nssv_CPLUSPLUS __cplusplus -# endif -#endif - -#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) -#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) -#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) -#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) -#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) -#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) - -// use C++17 std::string_view if available and requested: - -#if nssv_CPP17_OR_GREATER && defined(__has_include ) -# if __has_include( ) -# define nssv_HAVE_STD_STRING_VIEW 1 -# else -# define nssv_HAVE_STD_STRING_VIEW 0 -# endif -#else -# define nssv_HAVE_STD_STRING_VIEW 0 -#endif - -#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) - -#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) -#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH - -// -// Use C++17 std::string_view: -// - -#if nssv_USES_STD_STRING_VIEW - -#include - -// Extensions for std::string: - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { - -template< class CharT, class Traits, class Allocator = std::allocator > -std::basic_string -to_string( std::basic_string_view v, Allocator const & a = Allocator() ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -template< class CharT, class Traits, class Allocator > -std::basic_string_view -to_string_view( std::basic_string const & s ) -{ - return std::basic_string_view( s.data(), s.size() ); -} - -// Literal operators sv and _sv: - -#if nssv_CONFIG_STD_SV_OPERATOR - -using namespace std::literals::string_view_literals; - -#endif - -#if nssv_CONFIG_USR_SV_OPERATOR - -inline namespace literals { -inline namespace string_view_literals { - - -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) -{ - return std::string_view{ str, len }; -} - -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) -{ - return std::u16string_view{ str, len }; -} - -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) -{ - return std::u32string_view{ str, len }; -} - -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) -{ - return std::wstring_view{ str, len }; -} - -}} // namespace literals::string_view_literals - -#endif // nssv_CONFIG_USR_SV_OPERATOR - -} // namespace nonstd - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { - -using std::string_view; -using std::wstring_view; -using std::u16string_view; -using std::u32string_view; -using std::basic_string_view; - -// literal "sv" and "_sv", see above - -using std::operator==; -using std::operator!=; -using std::operator<; -using std::operator<=; -using std::operator>; -using std::operator>=; - -using std::operator<<; - -} // namespace nonstd - -#else // nssv_HAVE_STD_STRING_VIEW - -// -// Before C++17: use string_view lite: -// - -// Compiler versions: -// -// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) -// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) -// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) -// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) -// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) -// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) -// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) -// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) -// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) -// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) -// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) - -#if defined(_MSC_VER ) && !defined(__clang__) -# define nssv_COMPILER_MSVC_VER (_MSC_VER ) -# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) -#else -# define nssv_COMPILER_MSVC_VER 0 -# define nssv_COMPILER_MSVC_VERSION 0 -#endif - -#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) - -#if defined(__clang__) -# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) -#else -# define nssv_COMPILER_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) -# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#else -# define nssv_COMPILER_GNUC_VERSION 0 -#endif - -// half-open range [lo..hi): -#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) - -// Presence of language and library features: - -#ifdef _HAS_CPP0X -# define nssv_HAS_CPP0X _HAS_CPP0X -#else -# define nssv_HAS_CPP0X 0 -#endif - -// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: - -#if nssv_COMPILER_MSVC_VER >= 1900 -# undef nssv_CPP11_OR_GREATER -# define nssv_CPP11_OR_GREATER 1 -#endif - -#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) -#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) -#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) -#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) -#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) -#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) - -#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) -#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) - -// Presence of C++11 language features: - -#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 -#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 -#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 -#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 -#define nssv_HAVE_NULLPTR nssv_CPP11_100 -#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 -#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 -#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 -#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 -#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 - -#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) -# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 -#else -# define nssv_HAVE_STD_DEFINED_LITERALS 0 -#endif - -// Presence of C++14 language features: - -#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 - -// Presence of C++17 language features: - -#define nssv_HAVE_NODISCARD nssv_CPP17_000 - -// Presence of C++ library features: - -#define nssv_HAVE_STD_HASH nssv_CPP11_120 - -// C++ feature usage: - -#if nssv_HAVE_CONSTEXPR_11 -# define nssv_constexpr constexpr -#else -# define nssv_constexpr /*constexpr*/ -#endif - -#if nssv_HAVE_CONSTEXPR_14 -# define nssv_constexpr14 constexpr -#else -# define nssv_constexpr14 /*constexpr*/ -#endif - -#if nssv_HAVE_EXPLICIT_CONVERSION -# define nssv_explicit explicit -#else -# define nssv_explicit /*explicit*/ -#endif - -#if nssv_HAVE_INLINE_NAMESPACE -# define nssv_inline_ns inline -#else -# define nssv_inline_ns /*inline*/ -#endif - -#if nssv_HAVE_NOEXCEPT -# define nssv_noexcept noexcept -#else -# define nssv_noexcept /*noexcept*/ -#endif - -//#if nssv_HAVE_REF_QUALIFIER -//# define nssv_ref_qual & -//# define nssv_refref_qual && -//#else -//# define nssv_ref_qual /*&*/ -//# define nssv_refref_qual /*&&*/ -//#endif - -#if nssv_HAVE_NULLPTR -# define nssv_nullptr nullptr -#else -# define nssv_nullptr NULL -#endif - -#if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] -#else -# define nssv_nodiscard /*[[nodiscard]]*/ -#endif - -// Additional includes: - -#include -#include -#include -#include -#include -#include // std::char_traits<> - -#if ! nssv_CONFIG_NO_EXCEPTIONS -# include -#endif - -#if nssv_CPP11_OR_GREATER -# include -#endif - -// Clang, GNUC, MSVC warning suppression macros: - -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wuser-defined-literals" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wliteral-suffix" -#endif // __clang__ - -#if nssv_COMPILER_MSVC_VERSION >= 140 -# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] -# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) -# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) -#else -# define nssv_SUPPRESS_MSGSL_WARNING(expr) -# define nssv_SUPPRESS_MSVC_WARNING(code, descr) -# define nssv_DISABLE_MSVC_WARNINGS(codes) -#endif - -#if defined(__clang__) -# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") -#elif defined(__GNUC__) -# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") -#elif nssv_COMPILER_MSVC_VERSION >= 140 -# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) -#else -# define nssv_RESTORE_WARNINGS() -#endif - -// Suppress the following MSVC (GSL) warnings: -// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not -// start with an underscore are reserved -// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; -// use brace initialization, gsl::narrow_cast or gsl::narow -// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead - -nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) -//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) -//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) - -namespace nonstd { namespace sv_lite { - -#if nssv_CPP11_OR_GREATER - -namespace detail { - -#if nssv_CPP14_OR_GREATER - -template< typename CharT > -inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) -{ - CharT * v = s; - std::size_t r = result; - while ( *v != '\0' ) { - ++v; - ++r; - } - return r; -} - -#else // nssv_CPP14_OR_GREATER - -// Expect tail call optimization to make length() non-recursive: - -template< typename CharT > -inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) -{ - return *s == '\0' ? result : length( s + 1, result + 1 ); -} - -#endif // nssv_CPP14_OR_GREATER - -} // namespace detail - -#endif // nssv_CPP11_OR_GREATER - -template -< - class CharT, - class Traits = std::char_traits -> -class basic_string_view; - -// -// basic_string_view: -// - -template -< - class CharT, - class Traits /* = std::char_traits */ -> -class basic_string_view -{ -public: - // Member types: - - typedef Traits traits_type; - typedef CharT value_type; - - typedef CharT * pointer; - typedef CharT const * const_pointer; - typedef CharT & reference; - typedef CharT const & const_reference; - - typedef const_pointer iterator; - typedef const_pointer const_iterator; - typedef std::reverse_iterator< const_iterator > reverse_iterator; - typedef std::reverse_iterator< const_iterator > const_reverse_iterator; - - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - // 24.4.2.1 Construction and assignment: - - nssv_constexpr basic_string_view() nssv_noexcept - : data_( nssv_nullptr ) - , size_( 0 ) - {} - -#if nssv_CPP11_OR_GREATER - nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; -#else - nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept - : data_( other.data_) - , size_( other.size_) - {} -#endif - - nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept - : data_( s ) - , size_( count ) - {} - - nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept - : data_( s ) -#if nssv_CPP17_OR_GREATER - , size_( Traits::length(s) ) -#elif nssv_CPP11_OR_GREATER - , size_( detail::length(s) ) -#else - , size_( Traits::length(s) ) -#endif - {} - - // Assignment: - -#if nssv_CPP11_OR_GREATER - nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; -#else - nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept - { - data_ = other.data_; - size_ = other.size_; - return *this; - } -#endif - - // 24.4.2.2 Iterator support: - - nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } - nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } - - nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } - nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } - - nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } - nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } - - nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } - nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } - - // 24.4.2.3 Capacity: - - nssv_constexpr size_type size() const nssv_noexcept { return size_; } - nssv_constexpr size_type length() const nssv_noexcept { return size_; } - nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } - - // since C++20 - nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept - { - return 0 == size_; - } - - // 24.4.2.4 Element access: - - nssv_constexpr const_reference operator[]( size_type pos ) const - { - return data_at( pos ); - } - - nssv_constexpr14 const_reference at( size_type pos ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos < size() ); -#else - if ( pos >= size() ) - { - throw std::out_of_range("nonstd::string_view::at()"); - } -#endif - return data_at( pos ); - } - - nssv_constexpr const_reference front() const { return data_at( 0 ); } - nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } - - nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } - - // 24.4.2.5 Modifiers: - - nssv_constexpr14 void remove_prefix( size_type n ) - { - assert( n <= size() ); - data_ += n; - size_ -= n; - } - - nssv_constexpr14 void remove_suffix( size_type n ) - { - assert( n <= size() ); - size_ -= n; - } - - nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept - { - using std::swap; - swap( data_, other.data_ ); - swap( size_, other.size_ ); - } - - // 24.4.2.6 String operations: - - size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos <= size() ); -#else - if ( pos > size() ) - { - throw std::out_of_range("nonstd::string_view::copy()"); - } -#endif - const size_type rlen = (std::min)( n, size() - pos ); - - (void) Traits::copy( dest, data() + pos, rlen ); - - return rlen; - } - - nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos <= size() ); -#else - if ( pos > size() ) - { - throw std::out_of_range("nonstd::string_view::substr()"); - } -#endif - return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); - } - - // compare(), 6x: - - nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) - { - if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) - { - return result; - } - - return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; - } - - nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) - { - return substr( pos1, n1 ).compare( other ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) - { - return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); - } - - nssv_constexpr int compare( CharT const * s ) const // (4) - { - return compare( basic_string_view( s ) ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) - { - return substr( pos1, n1 ).compare( basic_string_view( s ) ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) - { - return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); - } - - // 24.4.2.7 Searching: - - // starts_with(), 3x, since C++20: - - nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) - { - return size() >= v.size() && compare( 0, v.size(), v ) == 0; - } - - nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) - { - return starts_with( basic_string_view( &c, 1 ) ); - } - - nssv_constexpr bool starts_with( CharT const * s ) const // (3) - { - return starts_with( basic_string_view( s ) ); - } - - // ends_with(), 3x, since C++20: - - nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) - { - return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; - } - - nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) - { - return ends_with( basic_string_view( &c, 1 ) ); - } - - nssv_constexpr bool ends_with( CharT const * s ) const // (3) - { - return ends_with( basic_string_view( s ) ); - } - - // find(), 4x: - - nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return assert( v.size() == 0 || v.data() != nssv_nullptr ) - , pos >= size() - ? npos - : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) - { - return find( basic_string_view( s, n ), pos ); - } - - nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) - { - return find( basic_string_view( s ), pos ); - } - - // rfind(), 4x: - - nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - if ( size() < v.size() ) - { - return npos; - } - - if ( v.empty() ) - { - return (std::min)( size(), pos ); - } - - const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); - const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); - - return result != last ? size_type( result - cbegin() ) : npos; - } - - nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return rfind( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) - { - return rfind( basic_string_view( s, n ), pos ); - } - - nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) - { - return rfind( basic_string_view( s ), pos ); - } - - // find_first_of(), 4x: - - nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return pos >= size() - ? npos - : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find_first_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) - { - return find_first_of( basic_string_view( s, n ), pos ); - } - - nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) - { - return find_first_of( basic_string_view( s ), pos ); - } - - // find_last_of(), 4x: - - nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - return empty() - ? npos - : pos >= size() - ? find_last_of( v, size() - 1 ) - : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return find_last_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_last_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) - { - return find_last_of( basic_string_view( s ), pos ); - } - - // find_first_not_of(), 4x: - - nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return pos >= size() - ? npos - : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); - } - - nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find_first_not_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_first_not_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) - { - return find_first_not_of( basic_string_view( s ), pos ); - } - - // find_last_not_of(), 4x: - - nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - return empty() - ? npos - : pos >= size() - ? find_last_not_of( v, size() - 1 ) - : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); - } - - nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return find_last_not_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_last_not_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) - { - return find_last_not_of( basic_string_view( s ), pos ); - } - - // Constants: - -#if nssv_CPP17_OR_GREATER - static nssv_constexpr size_type npos = size_type(-1); -#elif nssv_CPP11_OR_GREATER - enum : size_type { npos = size_type(-1) }; -#else - enum { npos = size_type(-1) }; -#endif - -private: - struct not_in_view - { - const basic_string_view v; - - nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} - - nssv_constexpr bool operator()( CharT c ) const - { - return npos == v.find_first_of( c ); - } - }; - - nssv_constexpr size_type to_pos( const_iterator it ) const - { - return it == cend() ? npos : size_type( it - cbegin() ); - } - - nssv_constexpr size_type to_pos( const_reverse_iterator it ) const - { - return it == crend() ? npos : size_type( crend() - it - 1 ); - } - - nssv_constexpr const_reference data_at( size_type pos ) const - { -#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) - return data_[pos]; -#else - return assert( pos < size() ), data_[pos]; -#endif - } - -private: - const_pointer data_; - size_type size_; - -public: -#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS - - template< class Allocator > - basic_string_view( std::basic_string const & s ) nssv_noexcept - : data_( s.data() ) - , size_( s.size() ) - {} - -#if nssv_HAVE_EXPLICIT_CONVERSION - - template< class Allocator > - explicit operator std::basic_string() const - { - return to_string( Allocator() ); - } - -#endif // nssv_HAVE_EXPLICIT_CONVERSION - -#if nssv_CPP11_OR_GREATER - - template< class Allocator = std::allocator > - std::basic_string - to_string( Allocator const & a = Allocator() ) const - { - return std::basic_string( begin(), end(), a ); - } - -#else - - std::basic_string - to_string() const - { - return std::basic_string( begin(), end() ); - } - - template< class Allocator > - std::basic_string - to_string( Allocator const & a ) const - { - return std::basic_string( begin(), end(), a ); - } - -#endif // nssv_CPP11_OR_GREATER - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS -}; - -// -// Non-member functions: -// - -// 24.4.3 Non-member comparison functions: -// lexicographically compare two string views (function template): - -template< class CharT, class Traits > -nssv_constexpr bool operator== ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0 ; } - -template< class CharT, class Traits > -nssv_constexpr bool operator!= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0 ; } - -template< class CharT, class Traits > -nssv_constexpr bool operator< ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } - -template< class CharT, class Traits > -nssv_constexpr bool operator<= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } - -template< class CharT, class Traits > -nssv_constexpr bool operator> ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } - -template< class CharT, class Traits > -nssv_constexpr bool operator>= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } - -// Let S be basic_string_view, and sv be an instance of S. -// Implementations shall provide sufficient additional overloads marked -// constexpr and noexcept so that an object t with an implicit conversion -// to S can be compared according to Table 67. - -#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) - -// accomodate for older compilers: - -// == - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -// != - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - char const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) != 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; } - -// < - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - char const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) > 0; } - -// <= - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - char const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) >= 0; } - -// > - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - char const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) < 0; } - -// >= - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - char const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) <= 0; } - -#else // newer compilers: - -#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type - -#if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 ) -# define nssv_MSVC_ORDER(x) , int=x -#else -# define nssv_MSVC_ORDER(x) /*, int=x*/ -#endif - -// == - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator==( - basic_string_view lhs, - nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator==( - nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -// != - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator!= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator!= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0 ; } - -// < - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator< ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator< ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } - -// <= - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator<= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator<= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } - -// > - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator> ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator> ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } - -// >= - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator>= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator>= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } - -#undef nssv_MSVC_ORDER -#undef nssv_BASIC_STRING_VIEW_I - -#endif // compiler-dependent approach to comparisons - -// 24.4.4 Inserters and extractors: - -namespace detail { - -template< class Stream > -void write_padding( Stream & os, std::streamsize n ) -{ - for ( std::streamsize i = 0; i < n; ++i ) - os.rdbuf()->sputc( os.fill() ); -} - -template< class Stream, class View > -Stream & write_to_stream( Stream & os, View const & sv ) -{ - typename Stream::sentry sentry( os ); - - if ( !os ) - return os; - - const std::streamsize length = static_cast( sv.length() ); - - // Whether, and how, to pad: - const bool pad = ( length < os.width() ); - const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; - - if ( left_pad ) - write_padding( os, os.width() - length ); - - // Write span characters: - os.rdbuf()->sputn( sv.begin(), length ); - - if ( pad && !left_pad ) - write_padding( os, os.width() - length ); - - // Reset output stream width: - os.width( 0 ); - - return os; -} - -} // namespace detail - -template< class CharT, class Traits > -std::basic_ostream & -operator<<( - std::basic_ostream& os, - basic_string_view sv ) -{ - return detail::write_to_stream( os, sv ); -} - -// Several typedefs for common character types are provided: - -typedef basic_string_view string_view; -typedef basic_string_view wstring_view; -#if nssv_HAVE_WCHAR16_T -typedef basic_string_view u16string_view; -typedef basic_string_view u32string_view; -#endif - -}} // namespace nonstd::sv_lite - -// -// 24.4.6 Suffix for basic_string_view literals: -// - -#if nssv_HAVE_USER_DEFINED_LITERALS - -namespace nonstd { -nssv_inline_ns namespace literals { -nssv_inline_ns namespace string_view_literals { - -#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS - -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) -{ - return nonstd::sv_lite::string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) -{ - return nonstd::sv_lite::u16string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) -{ - return nonstd::sv_lite::u32string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) -{ - return nonstd::sv_lite::wstring_view{ str, len }; -} - -#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS - -#if nssv_CONFIG_USR_SV_OPERATOR - -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) -{ - return nonstd::sv_lite::string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) -{ - return nonstd::sv_lite::u16string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) -{ - return nonstd::sv_lite::u32string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) -{ - return nonstd::sv_lite::wstring_view{ str, len }; -} - -#endif // nssv_CONFIG_USR_SV_OPERATOR - -}}} // namespace nonstd::literals::string_view_literals - -#endif - -// -// Extensions for std::string: -// - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { -namespace sv_lite { - -// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): - -#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 - -template< class CharT, class Traits, class Allocator = std::allocator > -std::basic_string -to_string( basic_string_view v, Allocator const & a = Allocator() ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -#else - -template< class CharT, class Traits > -std::basic_string -to_string( basic_string_view v ) -{ - return std::basic_string( v.begin(), v.end() ); -} - -template< class CharT, class Traits, class Allocator > -std::basic_string -to_string( basic_string_view v, Allocator const & a ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -#endif // nssv_CPP11_OR_GREATER - -template< class CharT, class Traits, class Allocator > -basic_string_view -to_string_view( std::basic_string const & s ) -{ - return basic_string_view( s.data(), s.size() ); -} - -}} // namespace nonstd::sv_lite - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -// -// make types and algorithms available in namespace nonstd: -// - -namespace nonstd { - -using sv_lite::basic_string_view; -using sv_lite::string_view; -using sv_lite::wstring_view; - -#if nssv_HAVE_WCHAR16_T -using sv_lite::u16string_view; -#endif -#if nssv_HAVE_WCHAR32_T -using sv_lite::u32string_view; -#endif - -// literal "sv" - -using sv_lite::operator==; -using sv_lite::operator!=; -using sv_lite::operator<; -using sv_lite::operator<=; -using sv_lite::operator>; -using sv_lite::operator>=; - -using sv_lite::operator<<; - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS -using sv_lite::to_string; -using sv_lite::to_string_view; -#endif - -} // namespace nonstd - -// 24.4.5 Hash support (C++11): - -// Note: The hash value of a string view object is equal to the hash value of -// the corresponding string object. - -#if nssv_HAVE_STD_HASH - -#include - -namespace std { - -template<> -struct hash< nonstd::string_view > -{ -public: - std::size_t operator()( nonstd::string_view v ) const nssv_noexcept - { - return std::hash()( std::string( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::wstring_view > -{ -public: - std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept - { - return std::hash()( std::wstring( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::u16string_view > -{ -public: - std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept - { - return std::hash()( std::u16string( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::u32string_view > -{ -public: - std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept - { - return std::hash()( std::u32string( v.data(), v.size() ) ); - } -}; - -} // namespace std - -#endif // nssv_HAVE_STD_HASH - -nssv_RESTORE_WARNINGS() - -#endif // nssv_HAVE_STD_STRING_VIEW -#endif // NONSTD_SV_LITE_H_INCLUDED diff --git a/cpp/src/arrow/visit_data_inline.h b/cpp/src/arrow/visit_data_inline.h index 2919f3d96fe5a..7d37698f14de0 100644 --- a/cpp/src/arrow/visit_data_inline.h +++ b/cpp/src/arrow/visit_data_inline.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "arrow/array.h" #include "arrow/status.h" #include "arrow/type.h" @@ -25,7 +27,6 @@ #include "arrow/util/bit_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/functional.h" -#include "arrow/util/string_view.h" namespace arrow { namespace internal { @@ -89,7 +90,7 @@ struct ArraySpanInlineVisitor { // Binary, String... template struct ArraySpanInlineVisitor> { - using c_type = util::string_view; + using c_type = std::string_view; template static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func, @@ -114,7 +115,7 @@ struct ArraySpanInlineVisitor> { arr.buffers[0].data, arr.offset, arr.length, [&](int64_t i) { ARROW_UNUSED(i); - auto value = util::string_view(data + cur_offset, *offsets - cur_offset); + auto value = std::string_view(data + cur_offset, *offsets - cur_offset); cur_offset = *offsets++; return valid_func(value); }, @@ -146,8 +147,8 @@ struct ArraySpanInlineVisitor> { VisitBitBlocksVoid( arr.buffers[0].data, arr.offset, arr.length, [&](int64_t i) { - auto value = util::string_view(reinterpret_cast(data + offsets[i]), - offsets[i + 1] - offsets[i]); + auto value = std::string_view(reinterpret_cast(data + offsets[i]), + offsets[i + 1] - offsets[i]); valid_func(value); }, std::forward(null_func)); @@ -157,7 +158,7 @@ struct ArraySpanInlineVisitor> { // FixedSizeBinary, Decimal128 template struct ArraySpanInlineVisitor> { - using c_type = util::string_view; + using c_type = std::string_view; template static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func, @@ -168,7 +169,7 @@ struct ArraySpanInlineVisitor> { return VisitBitBlocks( arr.buffers[0].data, arr.offset, arr.length, [&](int64_t i) { - auto value = util::string_view(data, byte_width); + auto value = std::string_view(data, byte_width); data += byte_width; return valid_func(value); }, @@ -187,7 +188,7 @@ struct ArraySpanInlineVisitor> { VisitBitBlocksVoid( arr.buffers[0].data, arr.offset, arr.length, [&](int64_t i) { - valid_func(util::string_view(data, byte_width)); + valid_func(std::string_view(data, byte_width)); data += byte_width; }, [&]() { @@ -222,7 +223,7 @@ VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& nu // The scalar value's type depends on the array data type: // - the type's `c_type`, if any // - for boolean arrays, a `bool` -// - for binary, string and fixed-size binary arrays, a `util::string_view` +// - for binary, string and fixed-size binary arrays, a `std::string_view` template struct ArraySpanVisitor { diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index cf0f4f9b917c3..a2a0216d3145c 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -120,7 +120,7 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, } gandiva::InHolder* holder = reinterpret_cast*>(ptr); - return holder->HasValue(arrow::util::string_view(data, data_len)); + return holder->HasValue(std::string_view(data, data_len)); } int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr, @@ -205,8 +205,7 @@ const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t return ""; } // use arrow method to encode base64 string - std::string encoded_str = - arrow::util::base64_encode(arrow::util::string_view(in, in_len)); + std::string encoded_str = arrow::util::base64_encode(std::string_view(in, in_len)); *out_len = static_cast(encoded_str.length()); // allocate memory for response char* ret = reinterpret_cast( @@ -233,8 +232,7 @@ const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t i return ""; } // use arrow method to decode base64 string - std::string decoded_str = - arrow::util::base64_decode(arrow::util::string_view(in, in_len)); + std::string decoded_str = arrow::util::base64_decode(std::string_view(in, in_len)); *out_len = static_cast(decoded_str.length()); // allocate memory for response char* ret = reinterpret_cast( diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc b/cpp/src/gandiva/gdv_string_function_stubs.cc index 0c963f4417f2e..cf04de3a8e15c 100644 --- a/cpp/src/gandiva/gdv_string_function_stubs.cc +++ b/cpp/src/gandiva/gdv_string_function_stubs.cc @@ -21,11 +21,11 @@ #include #include +#include #include #include #include "arrow/util/double_conversion.h" -#include "arrow/util/string_view.h" #include "arrow/util/utf8_internal.h" #include "arrow/util/value_parsing.h" @@ -102,7 +102,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt *out_len = 0; \ return ""; \ } \ - arrow::Status status = formatter(value, [&](arrow::util::string_view v) { \ + arrow::Status status = formatter(value, [&](std::string_view v) { \ int64_t size = static_cast(v.size()); \ *out_len = static_cast(len < size ? len : size); \ memcpy(ret, v.data(), *out_len); \ @@ -138,7 +138,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt *out_len = 0; \ return ""; \ } \ - arrow::Status status = formatter(value, [&](arrow::util::string_view v) { \ + arrow::Status status = formatter(value, [&](std::string_view v) { \ int64_t size = static_cast(v.size()); \ *out_len = static_cast(len < size ? len : size); \ memcpy(ret, v.data(), *out_len); \ diff --git a/cpp/src/gandiva/in_holder.h b/cpp/src/gandiva/in_holder.h index d55ab5ec55f27..65262969c5ded 100644 --- a/cpp/src/gandiva/in_holder.h +++ b/cpp/src/gandiva/in_holder.h @@ -72,19 +72,17 @@ class InHolder { } } - bool HasValue(arrow::util::string_view value) const { - return values_lookup_.count(value) == 1; - } + bool HasValue(std::string_view value) const { return values_lookup_.count(value) == 1; } private: struct string_view_hash { public: - std::size_t operator()(arrow::util::string_view v) const { + std::size_t operator()(std::string_view v) const { return arrow::internal::ComputeStringHash<0>(v.data(), v.length()); } }; - std::unordered_set values_lookup_; + std::unordered_set values_lookup_; const std::unordered_set values_; }; diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index 64fcc451808a2..e428c206bfc2a 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -43,7 +44,6 @@ #include "arrow/util/endian.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "arrow/util/ubsan.h" #include "parquet/arrow/reader.h" diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 716083f8a580e..79c18c9b410a9 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -30,6 +30,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" +#include "arrow/util/string.h" #include "arrow/util/value_parsing.h" #include "parquet/arrow/schema_internal.h" @@ -44,6 +45,7 @@ using arrow::FieldVector; using arrow::KeyValueMetadata; using arrow::Status; using arrow::internal::checked_cast; +using arrow::internal::EndsWith; using ArrowType = arrow::DataType; using ArrowTypeId = arrow::Type; @@ -496,8 +498,8 @@ Status PopulateLeaf(int column_index, const std::shared_ptr& field, // If the name is array or ends in _tuple, this should be a list of struct // even for single child elements. bool HasStructListName(const GroupNode& node) { - ::arrow::util::string_view name{node.name()}; - return name == "array" || name.ends_with("_tuple"); + ::std::string_view name{node.name()}; + return name == "array" || EndsWith(name, "_tuple"); } Status GroupToStruct(const GroupNode& node, LevelInfo current_levels, diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 5a0184b186038..bcefc68fa03d7 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -42,7 +43,6 @@ #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" #include "arrow/util/rle_encoding.h" -#include "arrow/util/string_view.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/exception.h" @@ -56,7 +56,7 @@ using arrow::Status; using arrow::VisitNullBitmapInline; using arrow::internal::AddWithOverflow; using arrow::internal::checked_cast; -using arrow::util::string_view; +using std::string_view; template using ArrowPoolVector = std::vector>; @@ -154,7 +154,7 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder { PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline( *array.data(), - [&](::arrow::util::string_view view) { + [&](::std::string_view view) { if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) { return Status::Invalid("Parquet cannot store strings with size 2GB or more"); } @@ -617,7 +617,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { void PutBinaryArray(const ArrayType& array) { PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline( *array.data(), - [&](::arrow::util::string_view view) { + [&](::std::string_view view) { if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) { return Status::Invalid("Parquet cannot store strings with size 2GB or more"); } @@ -658,7 +658,7 @@ void DictEncoderImpl::WriteDict(uint8_t* buffer) { // ByteArray and FLBA already have the dictionary encoded in their data heaps template <> void DictEncoderImpl::WriteDict(uint8_t* buffer) { - memo_table_.VisitValues(0, [&buffer](const ::arrow::util::string_view& v) { + memo_table_.VisitValues(0, [&buffer](const ::std::string_view& v) { uint32_t len = static_cast(v.length()); memcpy(buffer, &len, sizeof(len)); buffer += sizeof(len); @@ -669,7 +669,7 @@ void DictEncoderImpl::WriteDict(uint8_t* buffer) { template <> void DictEncoderImpl::WriteDict(uint8_t* buffer) { - memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) { + memo_table_.VisitValues(0, [&](const ::std::string_view& v) { DCHECK_EQ(v.length(), static_cast(type_length_)); memcpy(buffer, v.data(), type_length_); buffer += type_length_; diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 384516bff47d6..316793c73db3a 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. +#include + #include "arrow/result.h" #include "arrow/util/logging.h" #include "arrow/util/string.h" -#include "arrow/util/string_view.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/encryption_internal.h" @@ -94,7 +95,7 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) { ColumnPathToEncryptionPropertiesMap encrypted_columns; - std::vector<::arrow::util::string_view> key_to_columns = + std::vector<::std::string_view> key_to_columns = ::arrow::internal::SplitString(column_keys, ';'); for (size_t i = 0; i < key_to_columns.size(); ++i) { std::string cur_key_to_columns = @@ -103,7 +104,7 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties continue; } - std::vector<::arrow::util::string_view> parts = + std::vector<::std::string_view> parts = ::arrow::internal::SplitString(cur_key_to_columns, ':'); if (parts.size() != 2) { std::ostringstream message; @@ -118,7 +119,7 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties } std::string column_names_str = ::arrow::internal::TrimString(std::string(parts[1])); - std::vector<::arrow::util::string_view> column_names = + std::vector<::std::string_view> column_names = ::arrow::internal::SplitString(column_names_str, ','); if (0 == column_names.size()) { throw ParquetException("No columns to encrypt defined for key: " + column_key_id); diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index dc9c070e7a3f1..6e0e4e6c65e1e 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -45,7 +45,7 @@ std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& m static_cast(aad.size()), reinterpret_cast(&encrypted_key[0])); return ::arrow::util::base64_encode( - ::arrow::util::string_view(encrypted_key.data(), encrypted_key_len)); + ::std::string_view(encrypted_key.data(), encrypted_key_len)); } std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 1b2a3df9c4355..fed45fa2d8281 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include "arrow/io/memory.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" -#include "arrow/util/string_view.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/internal_file_decryptor.h" #include "parquet/exception.h" @@ -1050,8 +1050,8 @@ class ApplicationVersionParser { private: bool IsSpace(const std::string& string, const size_t& offset) { - auto target = ::arrow::util::string_view(string).substr(offset, 1); - return target.find_first_of(spaces_) != ::arrow::util::string_view::npos; + auto target = ::std::string_view(string).substr(offset, 1); + return target.find_first_of(spaces_) != ::std::string_view::npos; } void RemovePrecedingSpaces(const std::string& string, size_t& start, diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 7776d995c023e..a43238d1369c6 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -153,7 +153,7 @@ TEST_F(TestTextDeltaLengthByteArray, TestTextScanner) { ASSERT_FALSE(is_null); std::string expected = expected_prefix + std::to_string(i * i); ASSERT_TRUE(val.len == expected.length()); - ASSERT_EQ(::arrow::util::string_view(reinterpret_cast(val.ptr), val.len), + ASSERT_EQ(::std::string_view(reinterpret_cast(val.ptr), val.len), expected); } ASSERT_FALSE(scanner->HasNext()); @@ -200,9 +200,9 @@ TEST_F(TestTextDeltaLengthByteArray, TestBatchRead) { auto expected = expected_prefix + std::to_string((i + values_read) * (i + values_read)); ASSERT_TRUE(values[i].len == expected.length()); - ASSERT_EQ(::arrow::util::string_view(reinterpret_cast(values[i].ptr), - values[i].len), - expected); + ASSERT_EQ( + ::std::string_view(reinterpret_cast(values[i].ptr), values[i].len), + expected); } values_read += curr_batch_read; } diff --git a/cpp/src/parquet/stream_writer.cc b/cpp/src/parquet/stream_writer.cc index 253ebf1bc9186..dc76c2935d477 100644 --- a/cpp/src/parquet/stream_writer.cc +++ b/cpp/src/parquet/stream_writer.cc @@ -136,7 +136,7 @@ StreamWriter& StreamWriter::operator<<(const std::string& v) { return WriteVariableLength(v.data(), v.size()); } -StreamWriter& StreamWriter::operator<<(::arrow::util::string_view v) { +StreamWriter& StreamWriter::operator<<(::std::string_view v) { return WriteVariableLength(v.data(), v.size()); } diff --git a/cpp/src/parquet/stream_writer.h b/cpp/src/parquet/stream_writer.h index 5801011e16676..f95d39fd1d504 100644 --- a/cpp/src/parquet/stream_writer.h +++ b/cpp/src/parquet/stream_writer.h @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include "arrow/util/string_view.h" #include "parquet/column_writer.h" #include "parquet/file_writer.h" @@ -123,7 +123,7 @@ class PARQUET_EXPORT StreamWriter { /// \brief Helper class to write fixed length strings. /// This is useful as the standard string view (such as - /// arrow::util::string_view) is for variable length data. + /// std::string_view) is for variable length data. struct PARQUET_EXPORT FixedStringView { FixedStringView() = default; @@ -149,7 +149,7 @@ class PARQUET_EXPORT StreamWriter { /// \brief Output operators for variable length strings. StreamWriter& operator<<(const char* v); StreamWriter& operator<<(const std::string& v); - StreamWriter& operator<<(::arrow::util::string_view v); + StreamWriter& operator<<(::std::string_view v); /// \brief Output operator for optional fields. template diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 349fc682aad23..532fd4c3d7b43 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -73,7 +73,7 @@ std::unique_ptr GetCodec(Compression::type codec, int compression_level) return result; } -std::string FormatStatValue(Type::type parquet_type, ::arrow::util::string_view val) { +std::string FormatStatValue(Type::type parquet_type, ::std::string_view val) { std::stringstream result; const char* bytes = val.data(); diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index b419bf5dcf901..183a370529167 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include - -#include "arrow/util/string_view.h" +#include #include "parquet/platform.h" #include "parquet/type_fwd.h" @@ -538,7 +538,7 @@ struct ByteArray { ByteArray() : len(0), ptr(NULLPTR) {} ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {} - ByteArray(::arrow::util::string_view view) // NOLINT implicit conversion + ByteArray(::std::string_view view) // NOLINT implicit conversion : ByteArray(static_cast(view.size()), reinterpret_cast(view.data())) {} uint32_t len; @@ -743,7 +743,7 @@ PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t); PARQUET_EXPORT std::string TypeToString(Type::type t); PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type, - ::arrow::util::string_view val); + ::std::string_view val); PARQUET_EXPORT int GetTypeByteSize(Type::type t); diff --git a/docs/source/cpp/gdb.rst b/docs/source/cpp/gdb.rst index 609f11a993ab2..ed1810a672082 100644 --- a/docs/source/cpp/gdb.rst +++ b/docs/source/cpp/gdb.rst @@ -165,4 +165,3 @@ Important utility classes are also covered: * :class:`arrow::Status` and :class:`arrow::Result` * :class:`arrow::Buffer` and subclasses * :class:`arrow::Decimal128`, :class:`arrow::Decimal256` -* :class:`arrow::util::string_view`, :class:`arrow::util::Variant` diff --git a/python/pyarrow/src/arrow_to_pandas.cc b/python/pyarrow/src/arrow_to_pandas.cc index 437f0f11925ca..ba67eb10553a6 100644 --- a/python/pyarrow/src/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow_to_pandas.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,6 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/parallel.h" -#include "arrow/util/string_view.h" #include "arrow/visit_type_inline.h" #include "arrow/compute/api.h" @@ -586,7 +586,7 @@ template struct MemoizationTraits> { // For binary, we memoize string_view as a scalar value to avoid having to // unnecessarily copy the memory into the memo table data structure - using Scalar = util::string_view; + using Scalar = std::string_view; }; // Generic Array -> PyObject** converter that handles object deduplication, if @@ -1018,7 +1018,7 @@ struct ObjectWriterVisitor { enable_if_t::value || is_fixed_size_binary_type::value, Status> Visit(const Type& type) { - auto WrapValue = [](const util::string_view& view, PyObject** out) { + auto WrapValue = [](const std::string_view& view, PyObject** out) { *out = WrapBytes::Wrap(view.data(), view.length()); if (*out == nullptr) { PyErr_Clear(); diff --git a/python/pyarrow/src/common.h b/python/pyarrow/src/common.h index 768ff8dce440b..59f15c8a135f6 100644 --- a/python/pyarrow/src/common.h +++ b/python/pyarrow/src/common.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include "arrow/buffer.h" diff --git a/python/pyarrow/src/datetime.cc b/python/pyarrow/src/datetime.cc index 9604b529753a9..c4591ab50e007 100644 --- a/python/pyarrow/src/datetime.cc +++ b/python/pyarrow/src/datetime.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include "arrow/array.h" #include "arrow/scalar.h" @@ -40,14 +41,14 @@ namespace { // Same as Regex '([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$'. // GCC 4.9 doesn't support regex, so handcode until support for it // is dropped. -bool MatchFixedOffset(const std::string& tz, util::string_view* sign, - util::string_view* hour, util::string_view* minute) { +bool MatchFixedOffset(const std::string& tz, std::string_view* sign, + std::string_view* hour, std::string_view* minute) { if (tz.size() < 5) { return false; } const char* iter = tz.data(); if (*iter == '+' || *iter == '-') { - *sign = util::string_view(iter, 1); + *sign = std::string_view(iter, 1); iter++; if (tz.size() < 6) { return false; @@ -55,7 +56,7 @@ bool MatchFixedOffset(const std::string& tz, util::string_view* sign, } if ((((*iter == '0' || *iter == '1') && *(iter + 1) >= '0' && *(iter + 1) <= '9') || (*iter == '2' && *(iter + 1) >= '0' && *(iter + 1) <= '3'))) { - *hour = util::string_view(iter, 2); + *hour = std::string_view(iter, 2); iter += 2; } else { return false; @@ -66,7 +67,7 @@ bool MatchFixedOffset(const std::string& tz, util::string_view* sign, iter++; if (*iter >= '0' && *iter <= '5' && *(iter + 1) >= '0' && *(iter + 1) <= '9') { - *minute = util::string_view(iter, 2); + *minute = std::string_view(iter, 2); iter += 2; } else { return false; @@ -389,7 +390,7 @@ Result PyTZInfo_utcoffset_hhmm(PyObject* pytzinfo) { // Converted from python. See https://github.com/apache/arrow/pull/7604 // for details. Result StringToTzinfo(const std::string& tz) { - util::string_view sign_str, hour_str, minute_str; + std::string_view sign_str, hour_str, minute_str; OwnedRef pytz; OwnedRef zoneinfo; OwnedRef datetime; diff --git a/python/pyarrow/src/gdb.cc b/python/pyarrow/src/gdb.cc index c681dfe9caa83..16530a032d71e 100644 --- a/python/pyarrow/src/gdb.cc +++ b/python/pyarrow/src/gdb.cc @@ -34,7 +34,6 @@ #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" namespace arrow { @@ -81,7 +80,7 @@ class UuidType : public ExtensionType { }; std::shared_ptr SliceArrayFromJSON(const std::shared_ptr& ty, - util::string_view json, int64_t offset = 0, + std::string_view json, int64_t offset = 0, int64_t length = -1) { auto array = *ArrayFromJSON(ty, json); if (length != -1) { @@ -121,12 +120,9 @@ void TestSession() { auto error_detail_result = Result(error_detail_status); // String views - util::string_view string_view_empty{}; - util::string_view string_view_abc{"abc"}; + std::string_view string_view_abc{"abc"}; std::string special_chars = std::string("foo\"bar") + '\x00' + "\r\n\t\x1f"; - util::string_view string_view_special_chars(special_chars); - std::string very_long = "abc" + std::string(5000, 'K') + "xyz"; - util::string_view string_view_very_long(very_long); + std::string_view string_view_special_chars(special_chars); // Buffers Buffer buffer_null{nullptr, 0}; diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index 3056cb4326dc2..d0d241cc56438 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -264,20 +264,6 @@ def test_status(gdb_arrow): 'detail=[custom-detail-id] "This is a detail"))') -def test_string_view(gdb_arrow): - check_stack_repr(gdb_arrow, "string_view_empty", - "arrow::util::string_view of size 0") - check_stack_repr(gdb_arrow, "string_view_abc", - 'arrow::util::string_view of size 3, "abc"') - check_stack_repr( - gdb_arrow, "string_view_special_chars", - r'arrow::util::string_view of size 12, "foo\"bar\000\r\n\t\037"') - check_stack_repr( - gdb_arrow, "string_view_very_long", - 'arrow::util::string_view of size 5006, ' - '"abc", \'K\' ...') - - def test_buffer_stack(gdb_arrow): check_stack_repr(gdb_arrow, "buffer_null", "arrow::Buffer of size 0, read-only") diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 97bb72b3df79d..8e19d13a0efd1 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -779,7 +779,7 @@ struct AltrepVectorString : public AltrepVectorBase> { std::string stripped_string_; const bool strip_out_nuls_; bool nul_was_stripped_; - util::string_view view_; + std::string_view view_; }; // Get a single string, as a CHARSXP SEXP from data2. diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index dccc29537ede6..d7c51e79359c7 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -374,11 +374,11 @@ struct Converter_String : public Converter { bool Parallel() const { return false; } private: - static SEXP r_string_from_view(arrow::util::string_view view) { + static SEXP r_string_from_view(std::string_view view) { return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8); } - static SEXP r_string_from_view_strip_nul(arrow::util::string_view view, + static SEXP r_string_from_view_strip_nul(std::string_view view, bool* nul_was_stripped) { const char* old_string = view.data(); @@ -391,7 +391,7 @@ struct Converter_String : public Converter { if (nul_count == 1) { // first nul spotted: allocate stripped string storage - stripped_string = view.to_string(); + stripped_string = std::string(view); stripped_len = i; }