Skip to content

Commit

Permalink
#359 driven to abstraction: CharSequence, not string
Browse files Browse the repository at this point in the history
Signed-off-by: Stuart Halloway <stu@thinkrelevance.com>
  • Loading branch information
stuarthalloway committed Jun 4, 2010
1 parent 314aa4c commit 4860f9e
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 57 deletions.
2 changes: 1 addition & 1 deletion src/clj/clojure/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3925,7 +3925,7 @@
{:added "1.0"}
[v] (instance? clojure.lang.Var v))

(defn subs
(defn ^String subs
"Returns the substring of s beginning at start inclusive, and ending
at end (defaults to length of string), exclusive."
{:added "1.0"}
Expand Down
143 changes: 87 additions & 56 deletions src/clj/clojure/string.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,39 @@
; the terms of this license.
; You must not remove this notice, or any other, from this software.

(ns ^{:doc "String utilities"
:author "Stuart Sierra"}
(ns ^{:doc "Clojure String utilities
clojure.string adheres to the following design principles:
1. Strings are objects (as opposed to sequences). As such, the
string being manipulated is the first argument to a function;
passing nil will result in a NullPointerException. If you
want sequence-y behavior instead, use a sequence.
2. Functions are generally not lazy, and call straight to host
methods where those are available and efficient.
3. When a function is documented to accept a string argument, it
will take any implementation of the correct *interface* on the
host platform. In Java, this is CharSequence, which is more
general than String. In ordinary usage you will almost always
pass concrete strings. If you are doing something unusual,
e.g. passing a mutable implementation of CharSequence, then
thead-safety is your responsibility."
:author "Stuart Sierra, Stuart Halloway, David Liebke"}
clojure.string
(:refer-clojure :exclude (replace reverse))
(:import (java.util.regex Pattern)
clojure.lang.LazilyPersistentVector))

(defn ^String reverse
(defn ^CharSequence reverse
"Returns s with its characters reversed."
{:added "1.2"}
[^String s]
[^CharSequence s]
(.toString (.reverse (StringBuilder. s))))

(defn- replace-by
[^String s re f]
[^CharSequence s re f]
(let [m (re-matcher re s)]
(let [buffer (StringBuffer. (.length s))]
(loop []
Expand All @@ -41,19 +59,18 @@
See also replace-first."
{:added "1.2"}
[^String s match replacement]
(cond
(instance? Character match) (.replace s ^Character match ^Character replacement)
(instance? String match) (.replace s ^String match ^String replacement)
(instance? Pattern match) (if (string? replacement)
(.replaceAll (re-matcher ^Pattern match s) ^String replacement)
(replace-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match)))))
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match) (.replace s ^Character match ^Character replacement)
(instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
(instance? Pattern match) (if (string? replacement)
(.replaceAll (re-matcher ^Pattern match s) ^CharSequence replacement)
(replace-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))

(defn- replace-first-by
"Replace first match of re in s with the result of
(f (re-groups the-match))."
[^String s ^Pattern re f]
[^CharSequence s ^Pattern re f]
(let [m (re-matcher re s)]
(let [buffer (StringBuffer.)]
(if (.find m)
Expand All @@ -62,29 +79,42 @@
(.appendTail m buffer)
(str buffer))))))

(defn- replace-first-char
[^CharSequence s ^Character match replace]
(let [s (.toString s)
i (.indexOf s (int match))]
(if (= -1 i)
s
(str (subs s 0 i) replace (subs s (inc i))))))

(defn replace-first
"Replaces the first instance of match with replacement in s.
match/replacement can be:
string / string
char / char
string / string
pattern / (string or function of match).
See also replace-all."
{:added "1.2"}
[^String s match replacement]
(cond
(instance? String match)
(.replaceFirst s (Pattern/quote ^String match) ^String replacement)
(instance? Pattern match)
(if (string? replacement)
(.replaceFirst (re-matcher ^Pattern match s) ^String replacement)
(replace-first-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match)))))


(defn ^String join
[^CharSequence s match replacement]
(let [s (.toString s)]
(cond
(instance? Character match)
(replace-first-char s match replacement)
(instance? CharSequence match)
(.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
(.toString ^CharSequence replacement))
(instance? Pattern match)
(if (instance? CharSequence replacement)
(.replaceFirst (re-matcher ^Pattern match s)
(.toString ^CharSequence replacement))
(replace-first-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))


(defn ^CharSequence join
"Returns a string of all elements in coll, separated by
an optional separator. Like Perl's join."
{:added "1.2"}
Expand All @@ -100,76 +130,77 @@
sep)
(str sb)))))

(defn ^String capitalize
(defn ^CharSequence capitalize
"Converts first character of the string to upper-case, all other
characters to lower-case."
{:added "1.2"}
[^String s]
(if (< (count s) 2)
(.toUpperCase s)
(str (.toUpperCase ^String (subs s 0 1))
(.toLowerCase ^String (subs s 1)))))

(defn ^String upper-case
[^CharSequence s]
(let [s (.toString s)]
(if (< (count s) 2)
(.toUpperCase s)
(str (.toUpperCase (subs s 0 1))
(.toLowerCase (subs s 1))))))

(defn ^CharSequence upper-case
"Converts string to all upper-case."
{:added "1.2"}
[^String s]
(.toUpperCase s))
[^CharSequence s]
(.. s toString toUpperCase))

(defn ^String lower-case
(defn ^CharSequence lower-case
"Converts string to all lower-case."
{:added "1.2"}
[^String s]
(.toLowerCase s))
[^CharSequence s]
(.. s toString toLowerCase))

(defn split
"Splits string on a regular expression. Optional argument limit is
the maximum number of splits. Not lazy. Returns vector of the splits."
{:added "1.2"}
([^String s ^Pattern re]
([^CharSequence s ^Pattern re]
(LazilyPersistentVector/createOwning (.split re s)))
([ ^String s ^Pattern re limit]
([ ^CharSequence s ^Pattern re limit]
(LazilyPersistentVector/createOwning (.split re s limit))))

(defn ^String trim
(defn ^CharSequence trim
"Removes whitespace from both ends of string."
{:added "1.2"}
[^String s]
(.trim s))
[^CharSequence s]
(.. s toString trim))

(defn ^String triml
(defn ^CharSequence triml
"Removes whitespace from the left side of string."
{:added "1.2"}
[^String s]
[^CharSequence s]
(loop [index (int 0)]
(if (= (.length s) index)
""
(if (Character/isWhitespace (.charAt s index))
(recur (inc index))
(.substring s index)))))
(.. s (subSequence index (.length s)) toString)))))

(defn ^String trimr
(defn ^CharSequence trimr
"Removes whitespace from the right side of string."
{:added "1.2"}
[^String s]
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(if (Character/isWhitespace (.charAt s (dec index)))
(recur (dec index))
(.substring s 0 index)))))
(.. s (subSequence 0 index) toString)))))

(defn ^String trim-newline
(defn ^CharSequence trim-newline
"Removes all trailing newline \\n or return \\r characters from
string. Similar to Perl's chomp."
{:added "1.2"}
[^String s]
[^CharSequence s]
(loop [index (.length s)]
(if (zero? index)
""
(let [ch (.charAt s (dec index))]
(if (or (= ch \newline) (= ch \return))
(recur (dec index))
(.substring s 0 index))))))
(.. s (subSequence 0 index) toString))))))


19 changes: 19 additions & 0 deletions test/clojure/test_clojure/string.clj
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
(deftest t-replace-first
(is (= "barbarfoo" (s/replace-first "foobarfoo" "foo" "bar")))
(is (= "barbarfoo" (s/replace-first "foobarfoo" #"foo" "bar")))
(is (= "z.ology" (s/replace-first "zoology" \o \.)))
(is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case))))

(deftest t-join
Expand Down Expand Up @@ -73,3 +74,21 @@
s/triml [nil]
s/trimr [nil]
s/trim-newline [nil]))

(deftest char-sequence-handling
(are [result f args] (let [[^CharSequence s & more] args]
(= result (apply f (StringBuffer. s) more)))
"paz" s/reverse ["zap"]
"foo:bar" s/replace ["foo-bar" \- \:]
"ABC" s/replace ["abc" #"\w" s/upper-case]
"baz::quux" s/replace-first ["baz--quux" #"--" "::"]
"baz::quux" s/replace-first ["baz--quux" (StringBuffer. "--") (StringBuffer. "::")]
"zim-zam" s/replace-first ["zim zam" #" " (StringBuffer. "-")]
"Pow" s/capitalize ["POW"]
"BOOM" s/upper-case ["boom"]
"whimper" s/lower-case ["whimPER"]
["foo" "bar"] s/split ["foo-bar" #"-"]
"calvino" s/trim [" calvino "]
"calvino " s/triml [" calvino "]
" calvino" s/trimr [" calvino "]
"the end" s/trim-newline ["the end\r\n\r\r\n"]))

0 comments on commit 4860f9e

Please sign in to comment.