/
string.clj
254 lines (220 loc) · 8.02 KB
/
string.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
; Copyright (c) Rich Hickey. All rights reserved.
; The use and distribution terms for this software are covered by the
; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
; which can be found in the file epl-v10.html at the root of this distribution.
; By using this software in any fashion, you are agreeing to be bound by
; the terms of this license.
; You must not remove this notice, or any other, from this software.
(ns ^{:doc "Clojure String utilities
It is poor form to (:use clojure.string). Instead, use require
with :as to specify a prefix, e.g.
(ns your.namespace.here
(:require [clojure.string :as str]))
Design notes for clojure.string:
1. Strings are objects (as opposed to sequences). As such, the
string being manipulated is the first argument to a function;
passing nil will result in a NullPointerException unless
documented otherwise. If you want sequence-y behavior instead,
use a sequence.
2. Functions are generally not lazy, and call straight to host
methods where those are available and efficient.
3. Functions take advantage of String implementation details to
write high-performing loop/recurs instead of using higher-order
functions. (This is not idiomatic in general-purpose application
code.)
4. When a function is documented to accept a string argument, it
will take any implementation of the correct *interface* on the
host platform. In Java, this is CharSequence, which is more
general than String. In ordinary usage you will almost always
pass concrete strings. If you are doing something unusual,
e.g. passing a mutable implementation of CharSequence, then
thead-safety is your responsibility."
:author "Stuart Sierra, Stuart Halloway, David Liebke"}
clojure.string
(:refer-clojure :exclude (replace reverse))
(:import (java.util.regex Pattern)
clojure.lang.LazilyPersistentVector))
(defn ^String reverse
"Returns s with its characters reversed."
{:added "1.2"}
[^CharSequence s]
(.toString (.reverse (StringBuilder. s))))
(defn- replace-by
[^CharSequence s re f]
(let [m (re-matcher re s)]
(let [buffer (StringBuffer. (.length s))]
(loop []
(if (.find m)
(do (.appendReplacement m buffer (f (re-groups m)))
(recur))
(do (.appendTail m buffer)
(.toString buffer)))))))
(defn ^String replace
"Replaces all instance of match with replacement in s.
match/replacement can be:
string / string
char / char
pattern / (string or function of match).
See also replace-first."
{:added "1.2"}
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match) (.replace s ^Character match ^Character replacement)
(instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
(instance? Pattern match) (if (instance? CharSequence replacement)
(.replaceAll (re-matcher ^Pattern match s)
(.toString ^CharSequence replacement))
(replace-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
(defn- replace-first-by
[^CharSequence s ^Pattern re f]
(let [m (re-matcher re s)]
(let [buffer (StringBuffer. (.length s))]
(if (.find m)
(let [rep (f (re-groups m))]
(.appendReplacement m buffer rep)
(.appendTail m buffer)
(str buffer))))))
(defn- replace-first-char
[^CharSequence s ^Character match replace]
(let [s (.toString s)
i (.indexOf s (int match))]
(if (= -1 i)
s
(str (subs s 0 i) replace (subs s (inc i))))))
(defn ^String replace-first
"Replaces the first instance of match with replacement in s.
match/replacement can be:
char / char
string / string
pattern / (string or function of match).
See also replace-all."
{:added "1.2"}
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match)
(replace-first-char s match replacement)
(instance? CharSequence match)
(.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
(.toString ^CharSequence replacement))
(instance? Pattern match)
(if (instance? CharSequence replacement)
(.replaceFirst (re-matcher ^Pattern match s)
(.toString ^CharSequence replacement))
(replace-first-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
(defn ^String join
"Returns a string of all elements in coll, as returned by (seq coll),
separated by an optional separator."
{:added "1.2"}
([coll]
(apply str coll))
([separator coll]
(loop [sb (StringBuilder. (str (first coll)))
more (next coll)
sep (str separator)]
(if more
(recur (-> sb (.append sep) (.append (str (first more))))
(next more)
sep)
(str sb)))))
(defn ^String capitalize
"Converts first character of the string to upper-case, all other
characters to lower-case."
{:added "1.2"}
[^CharSequence s]
(let [s (.toString s)]
(if (< (count s) 2)
(.toUpperCase s)
(str (.toUpperCase (subs s 0 1))
(.toLowerCase (subs s 1))))))
(defn ^String upper-case
"Converts string to all upper-case."
{:added "1.2"}
[^CharSequence s]
(.. s toString toUpperCase))
(defn ^String lower-case
"Converts string to all lower-case."
{:added "1.2"}
[^CharSequence s]
(.. s toString toLowerCase))
(defn split
"Splits string on a regular expression. Optional argument limit is
the maximum number of splits. Not lazy. Returns vector of the splits."
{:added "1.2"}
([^CharSequence s ^Pattern re]
(LazilyPersistentVector/createOwning (.split re s)))
([ ^CharSequence s ^Pattern re limit]
(LazilyPersistentVector/createOwning (.split re s limit))))
(defn split-lines
"Splits s on \\n or \\r\\n."
{:added "1.2"}
[^CharSequence s]
(split s #"\r?\n"))
(defn ^String trim
"Removes whitespace from both ends of string."
{:added "1.2"}
[^CharSequence s]
(.. s toString trim))
(defn ^String triml
"Removes whitespace from the left side of string."
{:added "1.2"}
[^CharSequence s]
(loop [index (int 0)]
(if (= (.length s) index)
""
(if (Character/isWhitespace (.charAt s index))
(recur (inc index))
(.. s (subSequence index (.length s)) toString)))))
(defn ^String trimr
"Removes whitespace from the right side of string."
{:added "1.2"}
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(if (Character/isWhitespace (.charAt s (dec index)))
(recur (dec index))
(.. s (subSequence 0 index) toString)))))
(defn ^String trim-newline
"Removes all trailing newline \\n or return \\r characters from
string. Similar to Perl's chomp."
{:added "1.2"}
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(let [ch (.charAt s (dec index))]
(if (or (= ch \newline) (= ch \return))
(recur (dec index))
(.. s (subSequence 0 index) toString))))))
(defn blank?
"True if s is nil, empty, or contains only whitespace."
{:added "1.2"}
[^CharSequence s]
(if s
(loop [index (int 0)]
(if (= (.length s) index)
true
(if (Character/isWhitespace (.charAt s index))
(recur (inc index))
false)))
true))
(defn ^String escape
"Return a new string, using cmap to escape each character ch
from s as follows:
If (cmap ch) is nil, append ch to the new string.
If (cmap ch) is non-nil, append (str (cmap ch)) instead."
{:added "1.2"}
[^CharSequence s cmap]
(loop [index (int 0)
buffer (StringBuilder. (.length s))]
(if (= (.length s) index)
(.toString buffer)
(let [ch (.charAt s index)]
(if-let [replacement (cmap ch)]
(.append buffer replacement)
(.append buffer ch))
(recur (inc index) buffer)))))