New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
common: add for_each_substr() for cheap string split #18798
Changes from all commits
0b39e47
a942dfd
09448e4
c4e54b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,26 @@ | |
#include <set> | ||
#include <string> | ||
#include <vector> | ||
#include <boost/utility/string_view.hpp> | ||
|
||
|
||
namespace ceph { | ||
|
||
/// Split a string using the given delimiters, passing each piece as a | ||
/// (non-null-terminated) boost::string_view to the callback. | ||
template <typename Func> // where Func(boost::string_view) is a valid call | ||
void for_each_substr(boost::string_view s, const char *delims, Func&& f) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I approve. |
||
{ | ||
auto pos = s.find_first_not_of(delims); | ||
while (pos != s.npos) { | ||
s.remove_prefix(pos); // trim delims from the front | ||
auto end = s.find_first_of(delims); | ||
f(s.substr(0, end)); | ||
pos = s.find_first_not_of(delims, end); | ||
} | ||
} | ||
|
||
} // namespace ceph | ||
|
||
/** | ||
* Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**. | ||
|
@@ -26,6 +46,9 @@ extern void get_str_list(const std::string& str, | |
const char *delims, | ||
std::list<std::string>& str_list); | ||
|
||
std::list<std::string> get_str_list(const std::string& str, | ||
const char *delims = ";,= \t"); | ||
|
||
/** | ||
* Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_vec**. | ||
* | ||
|
@@ -46,6 +69,8 @@ extern void get_str_vec(const std::string& str, | |
const char *delims, | ||
std::vector<std::string>& str_vec); | ||
|
||
std::vector<std::string> get_str_vec(const std::string& str, | ||
const char *delims = ";,= \t"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you feel about delims being a flat set and/or just taking any sequence of characters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm guessing the vast majority of cases will pass them a string literal, so i'd prefer not to copy them into something like a flat_set. but taking them as a string_view would be an easy option There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i decided to leave it as |
||
/** | ||
* Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**. | ||
* | ||
|
@@ -66,6 +91,9 @@ extern void get_str_set(const std::string& str, | |
const char *delims, | ||
std::set<std::string>& str_list); | ||
|
||
std::set<std::string> get_str_set(const std::string& str, | ||
const char *delims = ";,= \t"); | ||
|
||
/** | ||
* Return a String containing the vector **v** joined with **sep** | ||
* | ||
|
@@ -90,12 +118,4 @@ inline std::string str_join(const std::vector<std::string>& v, const std::string | |
return r; | ||
} | ||
|
||
static inline std::vector<std::string> get_str_vec(const std::string& str) | ||
{ | ||
std::vector<std::string> str_vec; | ||
const char *delims = ";,= \t"; | ||
get_str_vec(str, delims, str_vec); | ||
return str_vec; | ||
} | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,50 @@ | ||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | ||
// vim: ts=8 sw=2 smarttab | ||
|
||
#include "include/types.h" | ||
#include "include/str_list.h" | ||
|
||
#include <list> | ||
#include <vector> | ||
#include <string> | ||
|
||
#include "gtest/gtest.h" | ||
|
||
// SplitTest is parameterized for list/vector/set | ||
using Types = ::testing::Types<std::list<std::string>, | ||
std::vector<std::string>, | ||
std::set<std::string>>; | ||
|
||
const char *tests[][10] = { | ||
{ "foo,bar", "foo", "bar", 0 }, | ||
{ "foo", "foo", 0 }, | ||
{ "foo;bar", "foo", "bar", 0 }, | ||
{ "foo bar", "foo", "bar", 0 }, | ||
{ " foo bar", "foo", "bar", 0 }, | ||
{ " foo bar ", "foo", "bar", 0 }, | ||
{ "a,b,c", "a", "b", "c", 0 }, | ||
{ " a\tb\tc\t", "a", "b", "c", 0 }, | ||
{ "a, b, c", "a", "b", "c", 0 }, | ||
{ "a b c", "a", "b", "c", 0 }, | ||
{ "a=b=c", "a", "b", "c", 0 }, | ||
{ 0 }, | ||
template <typename T> | ||
struct SplitTest : ::testing::Test { | ||
void test(const char* input, const char *delim, | ||
const std::list<std::string>& expected) { | ||
EXPECT_EQ(expected, get_str_list(input, delim)); | ||
} | ||
void test(const char* input, const char *delim, | ||
const std::vector<std::string>& expected) { | ||
EXPECT_EQ(expected, get_str_vec(input, delim)); | ||
} | ||
void test(const char* input, const char *delim, | ||
const std::set<std::string>& expected) { | ||
EXPECT_EQ(expected, get_str_set(input, delim)); | ||
} | ||
}; | ||
|
||
TEST(StrList, get_str_list) | ||
{ | ||
for (unsigned i=0; tests[i][0]; ++i) { | ||
std::string src = tests[i][0]; | ||
std::list<std::string> expected; | ||
for (unsigned j=1; tests[i][j]; ++j) | ||
expected.push_back(tests[i][j]); | ||
std::list<std::string> actual; | ||
get_str_list(src, actual); | ||
std::cout << "'" << src << "' -> " << actual << std::endl; | ||
ASSERT_EQ(actual, expected); | ||
} | ||
} | ||
TYPED_TEST_CASE(SplitTest, Types); | ||
|
||
TEST(StrList, get_str_vec) | ||
TYPED_TEST(SplitTest, Get) | ||
{ | ||
for (unsigned i=0; tests[i][0]; ++i) { | ||
std::string src = tests[i][0]; | ||
std::vector<std::string> expected; | ||
for (unsigned j=1; tests[i][j]; ++j) | ||
expected.push_back(tests[i][j]); | ||
std::vector<std::string> actual; | ||
get_str_vec (src, actual); | ||
std::cout << "'" << src << "' -> " << actual << std::endl; | ||
ASSERT_EQ(actual, expected); | ||
} | ||
|
||
this->test("", " ", TypeParam{}); | ||
this->test(" ", " ", TypeParam{}); | ||
this->test("foo", " ", TypeParam{"foo"}); | ||
this->test("foo bar", " ", TypeParam{"foo","bar"}); | ||
this->test(" foo bar", " ", TypeParam{"foo","bar"}); | ||
this->test("foo bar ", " ", TypeParam{"foo","bar"}); | ||
this->test("foo bar ", " ", TypeParam{"foo","bar"}); | ||
|
||
// default delimiter | ||
const char *delims = ";,= \t"; | ||
this->test(" ; , = \t ", delims, TypeParam{}); | ||
this->test(" ; foo = \t ", delims, TypeParam{"foo"}); | ||
this->test("a,b,c", delims, TypeParam{"a","b","c"}); | ||
this->test("a\tb\tc\t", delims, TypeParam{"a","b","c"}); | ||
this->test("a, b, c", delims, TypeParam{"a","b","c"}); | ||
this->test("a b c", delims, TypeParam{"a","b","c"}); | ||
this->test("a=b=c", delims, TypeParam{"a","b","c"}); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we pass token by reference? like:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
string_view is just a pointer/len pair, so it's trivial to copy and can be passed in registers. some advice about this from the cpp core guidelines: