-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
string.clj
377 lines (329 loc) · 12.5 KB
/
string.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
; Copyright (c) Rich Hickey. All rights reserved.
; The use and distribution terms for this software are covered by the
; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
; which can be found in the file epl-v10.html at the root of this distribution.
; By using this software in any fashion, you are agreeing to be bound by
; the terms of this license.
; You must not remove this notice, or any other, from this software.
(ns ^{:doc "Clojure String utilities
It is poor form to (:use clojure.string). Instead, use require
with :as to specify a prefix, e.g.
(ns your.namespace.here
(:require [clojure.string :as str]))
Design notes for clojure.string:
1. Strings are objects (as opposed to sequences). As such, the
string being manipulated is the first argument to a function;
passing nil will result in a NullPointerException unless
documented otherwise. If you want sequence-y behavior instead,
use a sequence.
2. Functions are generally not lazy, and call straight to host
methods where those are available and efficient.
3. Functions take advantage of String implementation details to
write high-performing loop/recurs instead of using higher-order
functions. (This is not idiomatic in general-purpose application
code.)
4. When a function is documented to accept a string argument, it
will take any implementation of the correct *interface* on the
host platform. In Java, this is CharSequence, which is more
general than String. In ordinary usage you will almost always
pass concrete strings. If you are doing something unusual,
e.g. passing a mutable implementation of CharSequence, then
thread-safety is your responsibility."
:author "Stuart Sierra, Stuart Halloway, David Liebke"}
clojure.string
(:refer-clojure :exclude (replace reverse))
(:import (java.util.regex Pattern Matcher)
clojure.lang.LazilyPersistentVector))
(set! *warn-on-reflection* true)
(defn ^String reverse
"Returns s with its characters reversed."
{:added "1.2"}
[^CharSequence s]
(.toString (.reverse (StringBuilder. s))))
(defn ^String re-quote-replacement
"Given a replacement string that you wish to be a literal
replacement for a pattern match in replace or replace-first, do the
necessary escaping of special characters in the replacement."
{:added "1.5"}
[^CharSequence replacement]
(Matcher/quoteReplacement (.toString ^CharSequence replacement)))
(defn- replace-by
[^CharSequence s re f]
(let [m (re-matcher re s)]
(if (.find m)
(let [buffer (StringBuffer. (.length s))]
(loop [found true]
(if found
(do (.appendReplacement m buffer (Matcher/quoteReplacement (f (re-groups m))))
(recur (.find m)))
(do (.appendTail m buffer)
(.toString buffer)))))
s)))
(defn ^String replace
"Replaces all instance of match with replacement in s.
match/replacement can be:
string / string
char / char
pattern / (string or function of match).
See also replace-first.
The replacement is literal (i.e. none of its characters are treated
specially) for all cases above except pattern / string.
For pattern / string, $1, $2, etc. in the replacement string are
substituted with the string that matched the corresponding
parenthesized group in the pattern. If you wish your replacement
string r to be used literally, use (re-quote-replacement r) as the
replacement argument. See also documentation for
java.util.regex.Matcher's appendReplacement method.
Example:
(clojure.string/replace \"Almost Pig Latin\" #\"\\b(\\w)(\\w+)\\b\" \"$2$1ay\")
-> \"lmostAay igPay atinLay\""
{:added "1.2"}
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match) (.replace s ^Character match ^Character replacement)
(instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
(instance? Pattern match) (if (instance? CharSequence replacement)
(.replaceAll (re-matcher ^Pattern match s)
(.toString ^CharSequence replacement))
(replace-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
(defn- replace-first-by
[^CharSequence s ^Pattern re f]
(let [m (re-matcher re s)]
(if (.find m)
(let [buffer (StringBuffer. (.length s))
rep (Matcher/quoteReplacement (f (re-groups m)))]
(.appendReplacement m buffer rep)
(.appendTail m buffer)
(str buffer))
s)))
(defn- replace-first-char
[^CharSequence s ^Character match replace]
(let [s (.toString s)
i (.indexOf s (int match))]
(if (= -1 i)
s
(str (subs s 0 i) replace (subs s (inc i))))))
(defn- replace-first-str
[^CharSequence s ^String match ^String replace]
(let [^String s (.toString s)
i (.indexOf s match)]
(if (= -1 i)
s
(str (subs s 0 i) replace (subs s (+ i (.length match)))))))
(defn ^String replace-first
"Replaces the first instance of match with replacement in s.
match/replacement can be:
char / char
string / string
pattern / (string or function of match).
See also replace.
The replacement is literal (i.e. none of its characters are treated
specially) for all cases above except pattern / string.
For pattern / string, $1, $2, etc. in the replacement string are
substituted with the string that matched the corresponding
parenthesized group in the pattern. If you wish your replacement
string r to be used literally, use (re-quote-replacement r) as the
replacement argument. See also documentation for
java.util.regex.Matcher's appendReplacement method.
Example:
(clojure.string/replace-first \"swap first two words\"
#\"(\\w+)(\\s+)(\\w+)\" \"$3$2$1\")
-> \"first swap two words\""
{:added "1.2"}
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match)
(replace-first-char s match replacement)
(instance? CharSequence match)
(replace-first-str s (.toString ^CharSequence match)
(.toString ^CharSequence replacement))
(instance? Pattern match)
(if (instance? CharSequence replacement)
(.replaceFirst (re-matcher ^Pattern match s)
(.toString ^CharSequence replacement))
(replace-first-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
(defn ^String join
"Returns a string of all elements in coll, as returned by (seq coll),
separated by an optional separator."
{:added "1.2"}
([coll]
(apply str coll))
([separator coll]
(loop [sb (StringBuilder. (str (first coll)))
more (next coll)
sep (str separator)]
(if more
(recur (-> sb (.append sep) (.append (str (first more))))
(next more)
sep)
(str sb)))))
(defn ^String capitalize
"Converts first character of the string to upper-case, all other
characters to lower-case."
{:added "1.2"}
[^CharSequence s]
(let [s (.toString s)]
(if (< (count s) 2)
(.toUpperCase s)
(str (.toUpperCase (subs s 0 1))
(.toLowerCase (subs s 1))))))
(defn ^String upper-case
"Converts string to all upper-case."
{:added "1.2"}
[^CharSequence s]
(.. s toString toUpperCase))
(defn ^String lower-case
"Converts string to all lower-case."
{:added "1.2"}
[^CharSequence s]
(.. s toString toLowerCase))
(defn split
"Splits string on a regular expression. Optional argument limit is
the maximum number of parts. Not lazy. Returns vector of the parts.
Trailing empty strings are not returned - pass limit of -1 to return all."
{:added "1.2"}
([^CharSequence s ^Pattern re]
(LazilyPersistentVector/createOwning (.split re s)))
([ ^CharSequence s ^Pattern re limit]
(LazilyPersistentVector/createOwning (.split re s limit))))
(defn split-lines
"Splits s on \\n or \\r\\n. Trailing empty lines are not returned."
{:added "1.2"}
[^CharSequence s]
(split s #"\r?\n"))
(defn ^String trim
"Removes whitespace from both ends of string."
{:added "1.2"}
[^CharSequence s]
(let [len (.length s)]
(loop [rindex len]
(if (zero? rindex)
""
(if (Character/isWhitespace (.charAt s (dec rindex)))
(recur (dec rindex))
;; there is at least one non-whitespace char in the string,
;; so no need to check for lindex reaching len.
(loop [lindex 0]
(if (Character/isWhitespace (.charAt s lindex))
(recur (inc lindex))
(.. s (subSequence lindex rindex) toString))))))))
(defn ^String triml
"Removes whitespace from the left side of string."
{:added "1.2"}
[^CharSequence s]
(let [len (.length s)]
(loop [index 0]
(if (= len index)
""
(if (Character/isWhitespace (.charAt s index))
(recur (unchecked-inc index))
(.. s (subSequence index len) toString))))))
(defn ^String trimr
"Removes whitespace from the right side of string."
{:added "1.2"}
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(if (Character/isWhitespace (.charAt s (unchecked-dec index)))
(recur (unchecked-dec index))
(.. s (subSequence 0 index) toString)))))
(defn ^String trim-newline
"Removes all trailing newline \\n or return \\r characters from
string. Similar to Perl's chomp."
{:added "1.2"}
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(let [ch (.charAt s (dec index))]
(if (or (= ch \newline) (= ch \return))
(recur (dec index))
(.. s (subSequence 0 index) toString))))))
(defn blank?
"True if s is nil, empty, or contains only whitespace."
{:added "1.2"}
[^CharSequence s]
(if s
(loop [index (int 0)]
(if (= (.length s) index)
true
(if (Character/isWhitespace (.charAt s index))
(recur (inc index))
false)))
true))
(defn ^String escape
"Return a new string, using cmap to escape each character ch
from s as follows:
If (cmap ch) is nil, append ch to the new string.
If (cmap ch) is non-nil, append (str (cmap ch)) instead."
{:added "1.2"}
[^CharSequence s cmap]
(loop [index (int 0)
buffer (StringBuilder. (.length s))]
(if (= (.length s) index)
(.toString buffer)
(let [ch (.charAt s index)]
(if-let [replacement (cmap ch)]
(.append buffer replacement)
(.append buffer ch))
(recur (inc index) buffer)))))
(defn index-of
"Return index of value (string or char) in s, optionally searching
forward from from-index. Return nil if value not found."
{:added "1.8"}
([^CharSequence s value]
(let [result ^long
(if (instance? Character value)
(.indexOf (.toString s) ^int (.charValue ^Character value))
(.indexOf (.toString s) ^String value))]
(if (= result -1)
nil
result)))
([^CharSequence s value ^long from-index]
(let [result ^long
(if (instance? Character value)
(.indexOf (.toString s) ^int (.charValue ^Character value) (unchecked-int from-index))
(.indexOf (.toString s) ^String value (unchecked-int from-index)))]
(if (= result -1)
nil
result))))
(defn last-index-of
"Return last index of value (string or char) in s, optionally
searching backward from from-index. Return nil if value not found."
{:added "1.8"}
([^CharSequence s value]
(let [result ^long
(if (instance? Character value)
(.lastIndexOf (.toString s) ^int (.charValue ^Character value))
(.lastIndexOf (.toString s) ^String value))]
(if (= result -1)
nil
result)))
([^CharSequence s value ^long from-index]
(let [result ^long
(if (instance? Character value)
(.lastIndexOf (.toString s) ^int (.charValue ^Character value) (unchecked-int from-index))
(.lastIndexOf (.toString s) ^String value (unchecked-int from-index)))]
(if (= result -1)
nil
result))))
(defn starts-with?
"True if s starts with substr."
{:added "1.8"}
[^CharSequence s ^String substr]
(.startsWith (.toString s) substr))
(defn ends-with?
"True if s ends with substr."
{:added "1.8"}
[^CharSequence s ^String substr]
(.endsWith (.toString s) substr))
(defn includes?
"True if s includes substr."
{:added "1.8"}
[^CharSequence s ^CharSequence substr]
(.contains (.toString s) substr))