Skip to content

Commit

Permalink
#359 driven to abstraction: CharSequence, not string
Browse files Browse the repository at this point in the history
Signed-off-by: Stuart Halloway <stu@thinkrelevance.com>
  • Loading branch information
stuarthalloway committed Jun 4, 2010
1 parent 314aa4c commit 4860f9e
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 57 deletions.
2 changes: 1 addition & 1 deletion src/clj/clojure/core.clj
Expand Up @@ -3925,7 +3925,7 @@
{:added "1.0"} {:added "1.0"}
[v] (instance? clojure.lang.Var v)) [v] (instance? clojure.lang.Var v))


(defn subs (defn ^String subs
"Returns the substring of s beginning at start inclusive, and ending "Returns the substring of s beginning at start inclusive, and ending
at end (defaults to length of string), exclusive." at end (defaults to length of string), exclusive."
{:added "1.0"} {:added "1.0"}
Expand Down
143 changes: 87 additions & 56 deletions src/clj/clojure/string.clj
Expand Up @@ -6,21 +6,39 @@
; the terms of this license. ; the terms of this license.
; You must not remove this notice, or any other, from this software. ; You must not remove this notice, or any other, from this software.


(ns ^{:doc "String utilities" (ns ^{:doc "Clojure String utilities
:author "Stuart Sierra"}
clojure.string adheres to the following design principles:
1. Strings are objects (as opposed to sequences). As such, the
string being manipulated is the first argument to a function;
passing nil will result in a NullPointerException. If you
want sequence-y behavior instead, use a sequence.
2. Functions are generally not lazy, and call straight to host
methods where those are available and efficient.
3. When a function is documented to accept a string argument, it
will take any implementation of the correct *interface* on the
host platform. In Java, this is CharSequence, which is more
general than String. In ordinary usage you will almost always
pass concrete strings. If you are doing something unusual,
e.g. passing a mutable implementation of CharSequence, then
thead-safety is your responsibility."
:author "Stuart Sierra, Stuart Halloway, David Liebke"}
clojure.string clojure.string
(:refer-clojure :exclude (replace reverse)) (:refer-clojure :exclude (replace reverse))
(:import (java.util.regex Pattern) (:import (java.util.regex Pattern)
clojure.lang.LazilyPersistentVector)) clojure.lang.LazilyPersistentVector))


(defn ^String reverse (defn ^CharSequence reverse
"Returns s with its characters reversed." "Returns s with its characters reversed."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(.toString (.reverse (StringBuilder. s)))) (.toString (.reverse (StringBuilder. s))))


(defn- replace-by (defn- replace-by
[^String s re f] [^CharSequence s re f]
(let [m (re-matcher re s)] (let [m (re-matcher re s)]
(let [buffer (StringBuffer. (.length s))] (let [buffer (StringBuffer. (.length s))]
(loop [] (loop []
Expand All @@ -41,19 +59,18 @@
See also replace-first." See also replace-first."
{:added "1.2"} {:added "1.2"}
[^String s match replacement] [^CharSequence s match replacement]
(cond (let [s (.toString s)]
(instance? Character match) (.replace s ^Character match ^Character replacement) (cond
(instance? String match) (.replace s ^String match ^String replacement) (instance? Character match) (.replace s ^Character match ^Character replacement)
(instance? Pattern match) (if (string? replacement) (instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
(.replaceAll (re-matcher ^Pattern match s) ^String replacement) (instance? Pattern match) (if (string? replacement)
(replace-by s match replacement)) (.replaceAll (re-matcher ^Pattern match s) ^CharSequence replacement)
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))) (replace-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))


(defn- replace-first-by (defn- replace-first-by
"Replace first match of re in s with the result of [^CharSequence s ^Pattern re f]
(f (re-groups the-match))."
[^String s ^Pattern re f]
(let [m (re-matcher re s)] (let [m (re-matcher re s)]
(let [buffer (StringBuffer.)] (let [buffer (StringBuffer.)]
(if (.find m) (if (.find m)
Expand All @@ -62,29 +79,42 @@
(.appendTail m buffer) (.appendTail m buffer)
(str buffer)))))) (str buffer))))))


(defn- replace-first-char
[^CharSequence s ^Character match replace]
(let [s (.toString s)
i (.indexOf s (int match))]
(if (= -1 i)
s
(str (subs s 0 i) replace (subs s (inc i))))))

(defn replace-first (defn replace-first
"Replaces the first instance of match with replacement in s. "Replaces the first instance of match with replacement in s.
match/replacement can be: match/replacement can be:
string / string
char / char char / char
string / string
pattern / (string or function of match). pattern / (string or function of match).
See also replace-all." See also replace-all."
{:added "1.2"} {:added "1.2"}
[^String s match replacement] [^CharSequence s match replacement]
(cond (let [s (.toString s)]
(instance? String match) (cond
(.replaceFirst s (Pattern/quote ^String match) ^String replacement) (instance? Character match)
(instance? Pattern match) (replace-first-char s match replacement)
(if (string? replacement) (instance? CharSequence match)
(.replaceFirst (re-matcher ^Pattern match s) ^String replacement) (.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
(replace-first-by s match replacement)) (.toString ^CharSequence replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))) (instance? Pattern match)

(if (instance? CharSequence replacement)

(.replaceFirst (re-matcher ^Pattern match s)
(defn ^String join (.toString ^CharSequence replacement))
(replace-first-by s match replacement))
:else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))


(defn ^CharSequence join
"Returns a string of all elements in coll, separated by "Returns a string of all elements in coll, separated by
an optional separator. Like Perl's join." an optional separator. Like Perl's join."
{:added "1.2"} {:added "1.2"}
Expand All @@ -100,76 +130,77 @@
sep) sep)
(str sb))))) (str sb)))))


(defn ^String capitalize (defn ^CharSequence capitalize
"Converts first character of the string to upper-case, all other "Converts first character of the string to upper-case, all other
characters to lower-case." characters to lower-case."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(if (< (count s) 2) (let [s (.toString s)]
(.toUpperCase s) (if (< (count s) 2)
(str (.toUpperCase ^String (subs s 0 1)) (.toUpperCase s)
(.toLowerCase ^String (subs s 1))))) (str (.toUpperCase (subs s 0 1))

(.toLowerCase (subs s 1))))))
(defn ^String upper-case
(defn ^CharSequence upper-case
"Converts string to all upper-case." "Converts string to all upper-case."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(.toUpperCase s)) (.. s toString toUpperCase))


(defn ^String lower-case (defn ^CharSequence lower-case
"Converts string to all lower-case." "Converts string to all lower-case."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(.toLowerCase s)) (.. s toString toLowerCase))


(defn split (defn split
"Splits string on a regular expression. Optional argument limit is "Splits string on a regular expression. Optional argument limit is
the maximum number of splits. Not lazy. Returns vector of the splits." the maximum number of splits. Not lazy. Returns vector of the splits."
{:added "1.2"} {:added "1.2"}
([^String s ^Pattern re] ([^CharSequence s ^Pattern re]
(LazilyPersistentVector/createOwning (.split re s))) (LazilyPersistentVector/createOwning (.split re s)))
([ ^String s ^Pattern re limit] ([ ^CharSequence s ^Pattern re limit]
(LazilyPersistentVector/createOwning (.split re s limit)))) (LazilyPersistentVector/createOwning (.split re s limit))))


(defn ^String trim (defn ^CharSequence trim
"Removes whitespace from both ends of string." "Removes whitespace from both ends of string."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(.trim s)) (.. s toString trim))


(defn ^String triml (defn ^CharSequence triml
"Removes whitespace from the left side of string." "Removes whitespace from the left side of string."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(loop [index (int 0)] (loop [index (int 0)]
(if (= (.length s) index) (if (= (.length s) index)
"" ""
(if (Character/isWhitespace (.charAt s index)) (if (Character/isWhitespace (.charAt s index))
(recur (inc index)) (recur (inc index))
(.substring s index))))) (.. s (subSequence index (.length s)) toString)))))


(defn ^String trimr (defn ^CharSequence trimr
"Removes whitespace from the right side of string." "Removes whitespace from the right side of string."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(loop [index (.length s)] (loop [index (.length s)]
(if (zero? index) (if (zero? index)
"" ""
(if (Character/isWhitespace (.charAt s (dec index))) (if (Character/isWhitespace (.charAt s (dec index)))
(recur (dec index)) (recur (dec index))
(.substring s 0 index))))) (.. s (subSequence 0 index) toString)))))


(defn ^String trim-newline (defn ^CharSequence trim-newline
"Removes all trailing newline \\n or return \\r characters from "Removes all trailing newline \\n or return \\r characters from
string. Similar to Perl's chomp." string. Similar to Perl's chomp."
{:added "1.2"} {:added "1.2"}
[^String s] [^CharSequence s]
(loop [index (.length s)] (loop [index (.length s)]
(if (zero? index) (if (zero? index)
"" ""
(let [ch (.charAt s (dec index))] (let [ch (.charAt s (dec index))]
(if (or (= ch \newline) (= ch \return)) (if (or (= ch \newline) (= ch \return))
(recur (dec index)) (recur (dec index))
(.substring s 0 index)))))) (.. s (subSequence 0 index) toString))))))




19 changes: 19 additions & 0 deletions test/clojure/test_clojure/string.clj
Expand Up @@ -18,6 +18,7 @@
(deftest t-replace-first (deftest t-replace-first
(is (= "barbarfoo" (s/replace-first "foobarfoo" "foo" "bar"))) (is (= "barbarfoo" (s/replace-first "foobarfoo" "foo" "bar")))
(is (= "barbarfoo" (s/replace-first "foobarfoo" #"foo" "bar"))) (is (= "barbarfoo" (s/replace-first "foobarfoo" #"foo" "bar")))
(is (= "z.ology" (s/replace-first "zoology" \o \.)))
(is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case)))) (is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case))))


(deftest t-join (deftest t-join
Expand Down Expand Up @@ -73,3 +74,21 @@
s/triml [nil] s/triml [nil]
s/trimr [nil] s/trimr [nil]
s/trim-newline [nil])) s/trim-newline [nil]))

(deftest char-sequence-handling
(are [result f args] (let [[^CharSequence s & more] args]
(= result (apply f (StringBuffer. s) more)))
"paz" s/reverse ["zap"]
"foo:bar" s/replace ["foo-bar" \- \:]
"ABC" s/replace ["abc" #"\w" s/upper-case]
"baz::quux" s/replace-first ["baz--quux" #"--" "::"]
"baz::quux" s/replace-first ["baz--quux" (StringBuffer. "--") (StringBuffer. "::")]
"zim-zam" s/replace-first ["zim zam" #" " (StringBuffer. "-")]
"Pow" s/capitalize ["POW"]
"BOOM" s/upper-case ["boom"]
"whimper" s/lower-case ["whimPER"]
["foo" "bar"] s/split ["foo-bar" #"-"]
"calvino" s/trim [" calvino "]
"calvino " s/triml [" calvino "]
" calvino" s/trimr [" calvino "]
"the end" s/trim-newline ["the end\r\n\r\r\n"]))

0 comments on commit 4860f9e

Please sign in to comment.