Skip to content

Commit

Permalink
Merge pull request #1295 from lrita/l_s
Browse files Browse the repository at this point in the history
make butil::BasicStringPiece<T> support string split functions-family
  • Loading branch information
jamesge committed Nov 30, 2020
2 parents e64eb3c + 32c05be commit 29ab898
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 16 deletions.
7 changes: 7 additions & 0 deletions src/butil/strings/string_piece.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
: ptr_(str.data()), length_(str.size()) {}
BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
BasicStringPiece(const BasicStringPiece& str, size_type pos, size_type len = npos)
: ptr_(str.data() + pos), length_(std::min(len, str.length() - pos)) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end)
: ptr_((end > begin) ? &(*begin) : NULL),
Expand All @@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
ptr_ = NULL;
length_ = 0;
}
BasicStringPiece& assign(const BasicStringPiece& str, size_type pos, size_type len = npos) {
ptr_ = str.data() + pos;
length_ = std::min(len, str.length() - pos);
return *this;
}
void set(const value_type* data, size_type len) {
ptr_ = data;
length_ = len;
Expand Down
103 changes: 89 additions & 14 deletions src/butil/strings/string_split.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
}
}

bool SplitStringIntoKeyValue(const std::string& line,
char key_value_delimiter,
std::string* key,
std::string* value) {
template <typename STR>
bool SplitStringIntoKeyValueT(const STR& line,
typename STR::value_type key_value_delimiter,
STR* key,
STR* value) {
key->clear();
value->clear();

// Find the delimiter.
size_t end_key_pos = line.find_first_of(key_value_delimiter);
if (end_key_pos == std::string::npos) {
if (end_key_pos == STR::npos) {
DVLOG(1) << "cannot find delimiter in: " << line;
return false; // no delimiter
}
key->assign(line, 0, end_key_pos);

// Find the value string.
std::string remains(line, end_key_pos, line.size() - end_key_pos);
STR remains(line, end_key_pos, line.size() - end_key_pos);
size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
if (begin_value_pos == std::string::npos) {
if (begin_value_pos == STR::npos) {
DVLOG(1) << "cannot parse value from line: " << line;
return false; // no value
}
Expand Down Expand Up @@ -134,6 +135,13 @@ void SplitString(const string16& str,
SplitStringT(str, c, true, r);
}

void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, true, r);
}

void SplitString(const std::string& str,
char c,
std::vector<std::string>* r) {
Expand All @@ -144,13 +152,24 @@ void SplitString(const std::string& str,
SplitStringT(str, c, true, r);
}

bool SplitStringIntoKeyValuePairs(const std::string& line,
void SplitString(const StringPiece& str,
char c,
std::vector<StringPiece>* r) {
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, true, r);
}

template<typename STR>
bool SplitStringIntoKeyValuePairsT(const STR& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
std::vector<std::pair<STR, STR> >* key_value_pairs) {
key_value_pairs->clear();

std::vector<std::string> pairs;
std::vector<STR> pairs;
SplitString(line, key_value_pair_delimiter, &pairs);

bool success = true;
Expand All @@ -159,37 +178,72 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
if (pairs[i].empty())
continue;

std::string key;
std::string value;
if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
STR key;
STR value;
if (!SplitStringIntoKeyValueT(pairs[i], key_value_delimiter, &key, &value)) {
// Don't return here, to allow for pairs without associated
// value or key; just record that the split failed.
success = false;
}
key_value_pairs->push_back(make_pair(key, value));
key_value_pairs->push_back(std::make_pair(key, value));
}
return success;
}

bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}

bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}

void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r) {
Expand All @@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r) {
DCHECK(IsStringUTF8(str));
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, false, r);
}

void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result) {
SplitStringAlongWhitespaceT(str, result);
}

} // namespace butil
28 changes: 28 additions & 0 deletions src/butil/strings/string_split.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "butil/base_export.h"
#include "butil/strings/string16.h"
#include "butil/strings/string_piece.h"

namespace butil {

Expand All @@ -23,6 +24,9 @@ namespace butil {
BUTIL_EXPORT void SplitString(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);

// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
Expand All @@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
BUTIL_EXPORT void SplitString(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);

typedef std::vector<std::pair<std::string, std::string> > StringPairs;
typedef std::vector<std::pair<butil::StringPiece, butil::StringPiece> > StringPiecePairs;

// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
Expand All @@ -42,27 +50,43 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);
BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs);

// The same as SplitString, but use a substring delimiter instead of a char.
BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r);

// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);

// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then
Expand All @@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
// characters is added to result.
BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result);

} // namespace butil

Expand Down
21 changes: 20 additions & 1 deletion src/butil/strings/string_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
output);
}

TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output) {
return TrimStringT(input, butil::StringPiece16(kWhitespaceUTF16), positions,
output);
}

TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output) {
return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
}

TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimStringT(input, butil::StringPiece(kWhitespaceASCII), positions, output);
}

// This function is only for backward-compatibility.
// To be removed when all callers are updated.
TrimPositions TrimWhitespace(const std::string& input,
Expand All @@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
return TrimWhitespaceASCII(input, positions, output);
}

TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimWhitespaceASCII(input, positions, output);
}

template<typename STR>
STR CollapseWhitespaceT(const STR& text,
bool trim_sequences_with_line_breaks) {
Expand Down Expand Up @@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
return DoIsStringASCII(str);
}

bool IsStringUTF8(const std::string& str) {
bool IsStringUTF8(const StringPiece& str) {
const char *src = str.data();
int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0;
Expand Down
11 changes: 10 additions & 1 deletion src/butil/strings/string_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,24 @@ enum TrimPositions {
BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input,
TrimPositions positions,
butil::string16* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);

// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);

// Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text
Expand Down Expand Up @@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
// to have the maximum 'discriminating' power from other encodings. If
// there's a use case for just checking the structural validity, we have to
// add a new function for that.
BUTIL_EXPORT bool IsStringUTF8(const std::string& str);
BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const string16& str);

Expand Down
Loading

0 comments on commit 29ab898

Please sign in to comment.