Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make butil::BasicStringPiece<T> support string split functions-family #1295

Merged
merged 2 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/butil/strings/string_piece.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
: ptr_(str.data()), length_(str.size()) {}
BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
BasicStringPiece(const BasicStringPiece& str, size_type pos, size_type len = npos)
: ptr_(str.data() + pos), length_(std::min(len, str.length() - pos)) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end)
: ptr_((end > begin) ? &(*begin) : NULL),
Expand All @@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
ptr_ = NULL;
length_ = 0;
}
BasicStringPiece& assign(const BasicStringPiece& str, size_type pos, size_type len = npos) {
ptr_ = str.data() + pos;
length_ = std::min(len, str.length() - pos);
return *this;
}
void set(const value_type* data, size_type len) {
ptr_ = data;
length_ = len;
Expand Down
103 changes: 89 additions & 14 deletions src/butil/strings/string_split.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
}
}

bool SplitStringIntoKeyValue(const std::string& line,
char key_value_delimiter,
std::string* key,
std::string* value) {
template <typename STR>
bool SplitStringIntoKeyValueT(const STR& line,
typename STR::value_type key_value_delimiter,
STR* key,
STR* value) {
key->clear();
value->clear();

// Find the delimiter.
size_t end_key_pos = line.find_first_of(key_value_delimiter);
if (end_key_pos == std::string::npos) {
if (end_key_pos == STR::npos) {
DVLOG(1) << "cannot find delimiter in: " << line;
return false; // no delimiter
}
key->assign(line, 0, end_key_pos);

// Find the value string.
std::string remains(line, end_key_pos, line.size() - end_key_pos);
STR remains(line, end_key_pos, line.size() - end_key_pos);
size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
if (begin_value_pos == std::string::npos) {
if (begin_value_pos == STR::npos) {
DVLOG(1) << "cannot parse value from line: " << line;
return false; // no value
}
Expand Down Expand Up @@ -134,6 +135,13 @@ void SplitString(const string16& str,
SplitStringT(str, c, true, r);
}

void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, true, r);
}

void SplitString(const std::string& str,
char c,
std::vector<std::string>* r) {
Expand All @@ -144,13 +152,24 @@ void SplitString(const std::string& str,
SplitStringT(str, c, true, r);
}

bool SplitStringIntoKeyValuePairs(const std::string& line,
void SplitString(const StringPiece& str,
char c,
std::vector<StringPiece>* r) {
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, true, r);
}

template<typename STR>
bool SplitStringIntoKeyValuePairsT(const STR& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
std::vector<std::pair<STR, STR> >* key_value_pairs) {
key_value_pairs->clear();

std::vector<std::string> pairs;
std::vector<STR> pairs;
SplitString(line, key_value_pair_delimiter, &pairs);

bool success = true;
Expand All @@ -159,37 +178,72 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
if (pairs[i].empty())
continue;

std::string key;
std::string value;
if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
STR key;
STR value;
if (!SplitStringIntoKeyValueT(pairs[i], key_value_delimiter, &key, &value)) {
// Don't return here, to allow for pairs without associated
// value or key; just record that the split failed.
success = false;
}
key_value_pairs->push_back(make_pair(key, value));
key_value_pairs->push_back(std::make_pair(key, value));
}
return success;
}

bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}

bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}

void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r) {
Expand All @@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r) {
DCHECK(IsStringUTF8(str));
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, false, r);
}

void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result) {
SplitStringAlongWhitespaceT(str, result);
}

} // namespace butil
28 changes: 28 additions & 0 deletions src/butil/strings/string_split.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "butil/base_export.h"
#include "butil/strings/string16.h"
#include "butil/strings/string_piece.h"

namespace butil {

Expand All @@ -23,6 +24,9 @@ namespace butil {
BUTIL_EXPORT void SplitString(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);

// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
Expand All @@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
BUTIL_EXPORT void SplitString(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);

typedef std::vector<std::pair<std::string, std::string> > StringPairs;
typedef std::vector<std::pair<butil::StringPiece, butil::StringPiece> > StringPiecePairs;

// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
Expand All @@ -42,27 +50,43 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);
BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs);

// The same as SplitString, but use a substring delimiter instead of a char.
BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r);

// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);

// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then
Expand All @@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
// characters is added to result.
BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result);

} // namespace butil

Expand Down
21 changes: 20 additions & 1 deletion src/butil/strings/string_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
output);
}

TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output) {
return TrimStringT(input, butil::StringPiece16(kWhitespaceUTF16), positions,
output);
}

TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output) {
return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
}

TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimStringT(input, butil::StringPiece(kWhitespaceASCII), positions, output);
}

// This function is only for backward-compatibility.
// To be removed when all callers are updated.
TrimPositions TrimWhitespace(const std::string& input,
Expand All @@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
return TrimWhitespaceASCII(input, positions, output);
}

TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimWhitespaceASCII(input, positions, output);
}

template<typename STR>
STR CollapseWhitespaceT(const STR& text,
bool trim_sequences_with_line_breaks) {
Expand Down Expand Up @@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
return DoIsStringASCII(str);
}

bool IsStringUTF8(const std::string& str) {
bool IsStringUTF8(const StringPiece& str) {
const char *src = str.data();
int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0;
Expand Down
11 changes: 10 additions & 1 deletion src/butil/strings/string_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,24 @@ enum TrimPositions {
BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input,
TrimPositions positions,
butil::string16* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);

// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);

// Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text
Expand Down Expand Up @@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
// to have the maximum 'discriminating' power from other encodings. If
// there's a use case for just checking the structural validity, we have to
// add a new function for that.
BUTIL_EXPORT bool IsStringUTF8(const std::string& str);
BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const string16& str);

Expand Down
Loading