Skip to content
Browse files

Merge branch 'bencode-improvements2' of https://github.com/kotarak/to…

  • Loading branch information...
2 parents 3b60ef7 + 1de83a8 commit 275f79fcee80bf96dc098616ba9346f3a2e83a7f @cemerick cemerick committed Aug 20, 2012
View
2 pom.xml
@@ -83,4 +83,4 @@
</plugin>
</plugins>
</build>
-</project>
+</project>
View
206 src/main/clojure/clojure/tools/nrepl/bencode.clj
@@ -60,6 +60,76 @@
;; sized message buffer. The trailing comma serves as a hint to detect
;; incorrect netstrings.
;;
+;; ## Low-level reading
+;;
+;; We will need some low-level reading helpers to read the bytes from
+;; the input stream. These are `read-byte` as well as `read-bytes`. They
+;; are split out, because doing such a simple task as reading a byte is
+;; mild catastrophe in Java. So it would add some clutter to the algorithm
+;; `read-netstring`.
+;;
+;; On the other hand they might be also useful elsewhere.
+;;
+;; To remove some magic numbers from the code below.
+
+(def #^{:const true} i 105)
+(def #^{:const true} l 108)
+(def #^{:const true} d 100)
+(def #^{:const true} comma 44)
+(def #^{:const true} minus 45)
+
+;; These two are only used boxed. So we keep them extra here.
+
+(def e 101)
+(def colon 58)
+
+(defn #^{:private true} read-byte
+ #^long [#^InputStream input]
+ (let [c (.read input)]
+ (when (neg? c)
+ (throw
+ (Exception. "Invalid netstring. Unexpected end of input.")))
+ ;; Here we have a quirk for example. `.read` returns -1 on end of
+ ;; input. However the Java `Byte` has only a range from -128 to 127.
+ ;; How does the fit together?
+ ;;
+ ;; The whole thing is shifted. `.read` actually returns an int
+ ;; between zero and 255. Everything below the value 128 stands
+ ;; for itself. But larger values are actually negative byte values.
+ ;;
+ ;; So we have to do some translation here. `Byte/byteValue` would
+ ;; do that for us, but we want to avoid boxing here.
+ (if (< 127 c) (- c 256) c)))
+
+(defn #^{:private true :tag "[B"} read-bytes
+ #^Object [#^InputStream input n]
+ (let [content (byte-array n)]
+ (loop [offset (int 0)
+ len (int n)]
+ (let [result (.read input content offset len)]
+ (when (neg? result)
+ (throw
+ (Exception.
+ "Invalid netstring. Less data available than expected.")))
+ (when (not= result len)
+ (recur (+ offset result) (- len result)))))
+ content))
+
+;; `read-long` is used for reading integers from the stream as well
+;; as the byte count prefixes of byte strings. The delimiter is \:
+;; for byte count prefixes and \e for integers.
+
+(defn #^{:private true} read-long
+ #^long [#^InputStream input delim]
+ (loop [n (long 0)]
+ ;; We read repeatedly a byte from the input…
+ (let [b (read-byte input)]
+ ;; …and stop at the delimiter.
+ (cond
+ (= b minus) (- (read-long input delim))
+ (= b delim) n
+ :else (recur (+ (* n (long 10)) (- (long b) (long 48))))))))
+
;; ## Reading a netstring
;;
;; Let's dive straight into reading a netstring from an `InputStream`.
@@ -80,39 +150,12 @@
;;
;; With this in mind we define the inner helper function first.
-(declare read-byte
- read-bytes
- #^"[B" string>payload
+(declare #^"[B" string>payload
#^String string<payload)
-(def i (Byte/valueOf (byte 105)))
-(def l (Byte/valueOf (byte 108)))
-(def d (Byte/valueOf (byte 100)))
-(def e (Byte/valueOf (byte 101)))
-(def colon (Byte/valueOf (byte 58)))
-(def comma (Byte/valueOf (byte 44)))
-
(defn #^{:private true} read-netstring*
[input]
- (let [reader #(read-byte input)
- ;; We read repeatedly a byte from the input…
- prefix (->> reader
- repeatedly
- ;; …and stop at the colon following the prefix of
- ;; the byte count.
- (take-while (complement #{colon}))
- (into-array Byte/TYPE))
- ;; The byte count is now obtained by interpreting the bytes
- ;; of the prefix as a number encoded in decimal format in
- ;; an UTF-8 string.
- ;;
- ;; *Note:* We **always** encode strings into unicode by virtue
- ;; of UTF-8 when sending things over the wire. In this case
- ;; it wouldn't make a difference, because the digits are the
- ;; same in UTF-8 and ASCII, but for the general case it is
- ;; important to keep it in mind.
- cnt (-> prefix string<payload Integer/valueOf)]
- (read-bytes input cnt)))
+ (read-bytes input (read-long input colon)))
;; And the public facing API: `read-netstring`.
@@ -125,46 +168,6 @@
(throw (Exception. "Invalid netstring. ',' expected.")))
content))
-;; The astute reader might have noticed that there are several helpers
-;; which are mentioned, but not defined, yet. These are `read-byte`
-;; as well as `read-bytes`. They are split out, because doing such
-;; a simple task as reading a byte is mild catastrophe in Java. So
-;; it would add some clutter to the algorithm `read-netstring`.
-;;
-;; On the other hand they might be also useful elsewhere.
-
-(defn #^{:private true :tag Byte} read-byte
- [#^InputStream input]
- (let [c (.read input)]
- (when (neg? c)
- (throw
- (Exception. "Invalid netstring. Unexpected end of input.")))
- ;; Here we have a quirk for example. `.read` returns -1 on end of
- ;; input. However the Java `Byte` has only a range from -128 to 127.
- ;; How does the fit together?
- ;;
- ;; The whole thing is shifted. `.read` actually returns an int
- ;; between zero and 255. Everything below the value 128 stands
- ;; for itself. But larger values are actually negative byte values.
- ;;
- ;; So we have to do some translation here. Luckily `.byteValue`
- ;; does that for us.
- (Byte/valueOf (.byteValue c))))
-
-(defn #^{:private true :tag "[B"} read-bytes
- [#^InputStream input n]
- (let [content (byte-array n)]
- (loop [offset 0
- len n]
- (let [result (.read input content offset len)]
- (when (neg? result)
- (throw
- (Exception.
- "Invalid netstring. Less data available than expected.")))
- (when (not= result len)
- (recur (+ offset result) (- len result)))))
- content))
-
;; Similarly the `string>payload` and `string<payload` functions
;; are defined as follows to simplify the conversion between strings
;; and byte arrays in various parts of the code.
@@ -236,14 +239,14 @@
(defn #^{:private true} read-token
[#^PushbackInputStream input]
(let [ch (read-byte input)]
- (condp = ch
- i :integer
- l :list
- d :map
- e nil
- (do
- (.unread input (int ch))
- (string<payload (read-netstring* input))))))
+ (cond
+ (= (long e) ch) nil
+ (= i ch) :integer
+ (= l ch) :list
+ (= d ch) :map
+ :else (do
+ (.unread input (int ch))
+ (read-netstring* input)))))
;; To read the bencode encoded data we walk a long the sequence of tokens
;; and act according to the found tags.
@@ -266,12 +269,7 @@
(defn #^{:private true} read-integer
[input]
- (->> #(read-byte input)
- repeatedly
- (take-while (complement #{e}))
- (into-array Byte/TYPE)
- string<payload
- Integer/valueOf))
+ (read-long input e))
;; *Note:* integers are an ugly special case, which cannot be
;; handled with `read-token` or `read-netstring*`.
@@ -284,11 +282,15 @@
[input]
(vec (token-seq input)))
-;; Maps are sequences of key/value pairs.
+;; Maps are sequences of key/value pairs. The keys are always
+;; decoded into strings. The values are kept as is.
(defn #^{:private true} read-map
[input]
- (apply hash-map (token-seq input)))
+ (->> (token-seq input)
+ (partition 2)
+ (map (fn [[k v]] [(string<payload k) v]))
+ (into {})))
;; The final missing piece is `token-seq`. This a just a simple
;; sequence which reads tokens until the next `\e`.
@@ -321,9 +323,8 @@
(symbol? thing) :named
(keyword? thing) :named
(map? thing) :map
- (or (coll? thing)
- (.isArray (class thing)))
- :list)))
+ (or (nil? thing) (coll? thing) (.isArray (class thing))) :list
+ :else (type thing))))
(defmethod write-bencode :default
[output x]
@@ -417,32 +418,3 @@
(if (zero? x)
(recur (inc i))
x))))))
-
-;; ## Special cases
-;;
-;; Sometimes one really wants to read the byte array coming from the wire.
-;; Without converting it to a UTF-8 string. A use case would be enhanced
-;; REPL interaction transferring not only strings, but also eg. picture
-;; information as opaque byte information.
-;;
-;; To accomodate for theses uses we expose here some special cases.
-;;
-;; `read-bencode-netstring` is really only a public façade for
-;; `read-netstring*`. However, the latter is only an implementation detail
-;; while the former is a promise.
-
-(defn read-bencode-netstring
- "Read a netstring in bencode format. That means without trailing comma.
- Returns the byte array of read bytes."
- [input]
- (read-netstring* input))
-
-;; Of course this is complemented by `write-bencode-netstring` for writing
-;; binary data in bencode netstring format. And similar this is only a
-;; façade.
-
-(defn write-bencode-netstring
- "Write binary content in bencode netstring format. That means without
- trailing comma. Takes a byte array as content."
- [output content]
- (write-netstring* output content))
View
32 src/main/clojure/clojure/tools/nrepl/transport.clj
@@ -10,7 +10,8 @@
PushbackReader)
java.net.Socket
(java.util.concurrent SynchronousQueue LinkedBlockingQueue
- BlockingQueue TimeUnit)))
+ BlockingQueue TimeUnit)
+ clojure.lang.RT))
(defprotocol Transport
"Defines the interface for a wire protocol implementation for use
@@ -42,6 +43,26 @@
write
close))))
+(defmulti #^{:private true} <bytes class)
+
+(defmethod <bytes :default
+ [input]
+ input)
+
+(defmethod <bytes (RT/classForName "[B")
+ [#^"[B" input]
+ (String. input "UTF-8"))
+
+(defmethod <bytes clojure.lang.IPersistentVector
+ [input]
+ (vec (map <bytes input)))
+
+(defmethod <bytes clojure.lang.IPersistentMap
+ [input]
+ (->> input
+ (map (fn [[k v]] [k (<bytes v)]))
+ (into {})))
+
(defn bencode
"Returns a Transport implementation that serializes messages
over the given Socket or InputStream/OutputStream using bencode."
@@ -50,7 +71,10 @@
(let [in (PushbackInputStream. (io/input-stream in))
out (io/output-stream out)]
(fn-transport
- #(be/read-bencode in)
+ #(let [payload (be/read-bencode in)
+ unencoded (<bytes (payload "-unencoded"))
+ to-decode (apply dissoc payload "-unencoded" unencoded)]
+ (merge payload {"-unencoded" unencoded} (<bytes to-decode)))
#(locking out
(doto out
(be/write-bencode %)
@@ -89,7 +113,7 @@
(deliver head (first s))
(rest s)))
@head)]
- (fn-transport read write
+ (fn-transport read write
(when s
(swap! read-seq (partial cons {:session @session-id :op "close"}))
#(.close s))))))
@@ -116,4 +140,4 @@
[]
(let [a (LinkedBlockingQueue.)
b (LinkedBlockingQueue.)]
- [(QueueTransport. a b) (QueueTransport. b a)]))
+ [(QueueTransport. a b) (QueueTransport. b a)]))
View
41 src/test/clojure/clojure/tools/nrepl/bencode_test.clj
@@ -12,7 +12,8 @@
(:import
java.io.ByteArrayInputStream
java.io.ByteArrayOutputStream
- java.io.PushbackInputStream)
+ java.io.PushbackInputStream
+ clojure.lang.RT)
(:use
[clojure.test :only [deftest is are]]
clojure.tools.nrepl.bencode))
@@ -21,20 +22,37 @@
[#^String input]
(.getBytes input "UTF-8"))
-(defn #^{:private true} <bytes
+(defmulti #^{:private true} <bytes class)
+
+(defmethod <bytes :default
+ [input]
+ input)
+
+(defmethod <bytes (RT/classForName "[B")
[#^"[B" input]
(String. input "UTF-8"))
+(defmethod <bytes clojure.lang.IPersistentVector
+ [input]
+ (vec (map <bytes input)))
+
+(defmethod <bytes clojure.lang.IPersistentMap
+ [input]
+ (->> input
+ (map (fn [[k v]] [k (<bytes v)]))
+ (into {})))
+
(defn #^{:private true} >input
[^String input & {:keys [reader]}]
(-> input
(.getBytes "UTF-8")
ByteArrayInputStream.
PushbackInputStream.
- reader))
+ reader
+ <bytes))
(deftest test-netstring-reading
- (are [x y] (= (<bytes (>input x :reader read-netstring)) y)
+ (are [x y] (= (>input x :reader read-netstring) y)
"0:," ""
"13:Hello, World!," "Hello, World!"
"16:Hällö, Würld!," "Hällö, Würld!"
@@ -69,13 +87,6 @@
"l6:cheesei42ed3:ham4:eggsee" ["cheese" 42 {"ham" "eggs"}]
"d6:cheesei42e3:haml4:eggsee" {"cheese" 42 "ham" ["eggs"]}))
-(deftest test-bencode-netstring-reading
- (are [x y] (= (<bytes (>input x :reader read-bencode-netstring)) y)
- "0:" ""
- "13:Hello, World!" "Hello, World!"
- "16:Hällö, Würld!" "Hällö, Würld!"
- "25:Здравей, Свят!" "Здравей, Свят!"))
-
(defn #^{:private true} >output
[thing & {:keys [writer]}]
(let [stream (ByteArrayOutputStream.)]
@@ -134,6 +145,7 @@
(deftest test-list-writing
(are [x y] (= (>output x :writer write-bencode) y)
+ nil "le"
[] "le"
["cheese"] "l6:cheesee"
["cheese" "ham" "eggs"] "l6:cheese3:ham4:eggse"))
@@ -148,13 +160,6 @@
["cheese" 42 {"ham" "eggs"}] "l6:cheesei42ed3:ham4:eggsee"
{"cheese" 42 "ham" ["eggs"]} "d6:cheesei42e3:haml4:eggsee"))
-(deftest test-bencode-netstring-writing
- (are [x y] (= (>output (>bytes x) :writer write-bencode-netstring) y)
- "" "0:"
- "Hello, World!" "13:Hello, World!"
- "Hällö, Würld!" "16:Hällö, Würld!"
- "Здравей, Свят!" "25:Здравей, Свят!"))
-
(deftest test-lexicographic-sorting
(let [source ["ham" "eggs" "hamburg" "hamburger" "cheese"]
expected ["cheese" "eggs" "ham" "hamburg" "hamburger"]

0 comments on commit 275f79f

Please sign in to comment.
Something went wrong with that request. Please try again.