-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
stringutil.h
284 lines (252 loc) · 9.77 KB
/
stringutil.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/**
* @file
* @brief String manipulation functions that don't fit elsewhere.
**/
#pragma once
#include "config.h"
#include "libutil.h" // always_true
#ifdef CRAWL_HAVE_STRLCPY
#include <cstring>
#else
size_t strlcpy(char *dst, const char *src, size_t n);
#endif
string lowercase_string(const string &s);
string &lowercase(string &s);
string &uppercase(string &s);
string uppercase_string(string s);
string lowercase_first(string);
string uppercase_first(string);
/**
* Returns 1 + the index of the first suffix that matches the given string,
* 0 if no suffixes match.
*/
int ends_with(const string &s, const char * const suffixes[]);
string wordwrap_line(string &s, int cols, bool tags = false,
bool indent = false);
string strip_filename_unsafe_chars(const string &s);
string vmake_stringf(const char *format, va_list args);
string make_stringf(PRINTF(0, ));
bool strip_suffix(string &s, const string &suffix);
string replace_all(string s, const string &tofind, const string &replacement);
string replace_all_of(string s, const string &tofind, const string &replacement);
string replace_keys(const string &text, const map<string, string>& replacements);
string maybe_capitalise_substring(string s);
string maybe_pick_random_substring(string s);
int count_occurrences(const string &text, const string &searchfor);
string &trim_string(string &str);
string &trim_string_right(string &str);
string trimmed_string(string s);
/**
* Find the enumerator e between begin and end that satisfies pred(e) and
* whose name, as given by namefunc(e), has the earliest occurrence of the
* substring spec.
*
* @tparam Enum An integer-like or C-style enum type no larger than
* size_t. More specifically, Enum must be implicitly
* convertible to size_t, and must be explicitly convertible
* from size_t with static_cast. There should be no gaps
* in enumerator values between begin and end.
*
* @param spec The substring to search for.
* @param begin The beginning of the enumerator range to search in.
* @param end One past the end of the enum range to search in.
* @param pred A function from Enum to bool. Enumerators that do not
* satisfy the predicate are ignored.
* @param namefunc A function from Enum to string or const char * giving
* the name of the enumerator.
* @return The enumerator that satisfies pred and whose name contains the
* spec substring beginning at the earliest position. If no such
* enumerator exists, returns end. If there are multiple strings
* containing the spec as a prefix, returns the shortest such string
* (so exact matches are preferred); otherwise ties are broken in
* an unspecified manner.
*/
template<class Enum, class Pred, class NameFunc>
Enum find_earliest_match(const string &spec, Enum begin, Enum end,
Pred pred, NameFunc namefunc)
{
Enum selected = end;
const size_t speclen = spec.length();
size_t bestpos = string::npos;
size_t bestlen = string::npos;
for (size_t i = begin; i < (size_t) end; ++i)
{
const Enum curr = static_cast<Enum>(i);
if (!pred(curr))
continue;
const string name = lowercase_string(namefunc(curr));
const size_t pos = name.find(spec);
const size_t len = name.length();
if (pos < bestpos || pos == 0 && len < bestlen)
{
// Exit early if we found an exact match.
if (pos == 0 && len == speclen)
return curr;
// npos is never less than bestpos, so the spec was found.
bestpos = pos;
if (pos == 0)
bestlen = len;
selected = curr;
}
}
return selected;
}
/**
* Join together strings computed by a function applied to some elements
* of a range.
*
* @tparam Z An iterator or pointer type.
* @tparam F A callable type that takes whatever Z points to, and
* returns a string or null-terminated char *.
* @tparam G A callable type that takes whatever Z points to, and
* returns some type that is explicitly convertable to bool
*
* @param start An iterator to the beginning of the range of elements to
* consider.
* @param end An iterator to one spot past the end of the range of
* elements to consider.
* @param stringify A function or function-like object that takes an
* element from the range and returns a string or C string. Will be
* called once per selected element.
* @param andc The separator to use before the last selected element.
* @param comma The separator to use between elements other than the last.
* @param filter A function or function-like object to select elements.
* Should accept as a single argument an element from the range, and
* return true if the element should be included in the result string.
* Will be called between N and 2N times, where N is the total number
* of elements in the range.
*
* @return A string containing the stringifications of all the elements
* for which filter returns true, with andc separating the last two
* elements and comma separating the other elements. If the range is
* empty, returns the empty string.
*/
template <typename Z, typename F, typename G>
string comma_separated_fn(Z start, Z end, F stringify,
const string &andc, const string &comma,
G filter)
{
string text;
bool first = true;
for (Z i = start; i != end; ++i)
{
if (!filter(*i))
continue;
if (first)
first = false;
else
{
Z tmp = i;
// Advance until we find an item selected by the filter.
//
// This loop iterates (and calls filter) a linear number of times
// over the entire call to comma_separated_fn. Some cases:
//
// filter is always true: do loop iterates once, is reached N-1
// times: N-1 iterations total.
//
// filter is true half the time: do loop iterates twice on average,
// is reached N/2 - 1 times: N-2 iterations total.
//
// filter is true for sqrt(N) elements: do loop iterates sqrt(N)
// times on average, is reached sqrt(N) - 1 times: N - sqrt(N)
// iterations total.
//
// filter is always false: do loop is never reached: 0 iterations.
do
{
// TODO: really, we could update i here (one fewer time than
// tmp): if the filter returns false, we might as well have
// the outer for loop skip that element, so it doesn't have
// to call the filter again before deciding to "continue;".
++tmp;
}
while (tmp != end && !filter(*tmp));
if (tmp != end)
text += comma;
else
text += andc;
}
text += stringify(*i);
}
return text;
}
template <typename Z, typename F>
string comma_separated_fn(Z start, Z end, F stringify,
const string &andc = " and ",
const string &comma = ", ")
{
return comma_separated_fn(start, end, stringify, andc, comma,
always_true<decltype(*start)>);
}
template <typename Z>
string comma_separated_line(Z start, Z end, const string &andc = " and ",
const string &comma = ", ")
{
return comma_separated_fn(start, end, [] (const string &s) { return s; },
andc, comma);
}
static inline bool starts_with(const string &s, const string &prefix)
{
return s.rfind(prefix, 0) != string::npos;
}
static inline bool ends_with(const string &s, const string &suffix)
{
if (s.length() < suffix.length())
return false;
return s.find(suffix, s.length() - suffix.length()) != string::npos;
}
// Splits string 's' on the separator 'sep'. If trim == true, trims each
// segment. If accept_empties == true, accepts empty segments. If nsplits >= 0,
// splits on the first nsplits occurrences of the separator, and stores the
// remainder of the string as the last segment; negative values of nsplits
// split on all occurrences of the separator.
vector<string> split_string(const string &sep, string s, bool trim = true,
bool accept_empties = false, int nsplits = -1);
// time
string make_time_string(time_t abs_time, bool terse = false);
string make_file_time(time_t when);
// Work around older Cygwin's missing std::to_string, resulting from a lack
// of long double support. Newer versions do provide long double and
// std::to_string.
//
// See https://cygwin.com/ml/cygwin/2015-01/msg00245.html for more info.
#ifdef _GLIBCXX_HAVE_BROKEN_VSWPRINTF
// Inject into std:: because we sometimes use std::to_string to
// disambiguate.
namespace std
{
static inline string to_string(int value)
{
return make_stringf("%d", value);
}
static inline string to_string(long value)
{
return make_stringf("%ld", value);
}
static inline string to_string(long long value)
{
return make_stringf("%lld", value);
}
static inline string to_string(unsigned value)
{
return make_stringf("%u", value);
}
static inline string to_string(unsigned long value)
{
return make_stringf("%lu", value);
}
static inline string to_string(unsigned long long value)
{
return make_stringf("%llu", value);
}
static inline string to_string(float value)
{
return make_stringf("%f", value);
}
static inline string to_string(double value)
{
return make_stringf("%f", value);
}
}
#endif