Skip to content

Commit

Permalink
Merge branch 'bencode-improvements2' of https://github.com/kotarak/to…
Browse files Browse the repository at this point in the history
  • Loading branch information
cemerick committed Aug 21, 2012
2 parents 3b60ef7 + 1de83a8 commit 275f79f
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 140 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -83,4 +83,4 @@
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
</project> </project>
206 changes: 89 additions & 117 deletions src/main/clojure/clojure/tools/nrepl/bencode.clj
Expand Up @@ -60,6 +60,76 @@
;; sized message buffer. The trailing comma serves as a hint to detect ;; sized message buffer. The trailing comma serves as a hint to detect
;; incorrect netstrings. ;; incorrect netstrings.
;; ;;
;; ## Low-level reading
;;
;; We will need some low-level reading helpers to read the bytes from
;; the input stream. These are `read-byte` as well as `read-bytes`. They
;; are split out, because doing such a simple task as reading a byte is
;; mild catastrophe in Java. So it would add some clutter to the algorithm
;; `read-netstring`.
;;
;; On the other hand they might be also useful elsewhere.
;;
;; To remove some magic numbers from the code below.

(def #^{:const true} i 105)
(def #^{:const true} l 108)
(def #^{:const true} d 100)
(def #^{:const true} comma 44)
(def #^{:const true} minus 45)

;; These two are only used boxed. So we keep them extra here.

(def e 101)
(def colon 58)

(defn #^{:private true} read-byte
#^long [#^InputStream input]
(let [c (.read input)]
(when (neg? c)
(throw
(Exception. "Invalid netstring. Unexpected end of input.")))
;; Here we have a quirk for example. `.read` returns -1 on end of
;; input. However the Java `Byte` has only a range from -128 to 127.
;; How does the fit together?
;;
;; The whole thing is shifted. `.read` actually returns an int
;; between zero and 255. Everything below the value 128 stands
;; for itself. But larger values are actually negative byte values.
;;
;; So we have to do some translation here. `Byte/byteValue` would
;; do that for us, but we want to avoid boxing here.
(if (< 127 c) (- c 256) c)))

(defn #^{:private true :tag "[B"} read-bytes
#^Object [#^InputStream input n]
(let [content (byte-array n)]
(loop [offset (int 0)
len (int n)]
(let [result (.read input content offset len)]
(when (neg? result)
(throw
(Exception.
"Invalid netstring. Less data available than expected.")))
(when (not= result len)
(recur (+ offset result) (- len result)))))
content))

;; `read-long` is used for reading integers from the stream as well
;; as the byte count prefixes of byte strings. The delimiter is \:
;; for byte count prefixes and \e for integers.

(defn #^{:private true} read-long
#^long [#^InputStream input delim]
(loop [n (long 0)]
;; We read repeatedly a byte from the input…
(let [b (read-byte input)]
;; …and stop at the delimiter.
(cond
(= b minus) (- (read-long input delim))
(= b delim) n
:else (recur (+ (* n (long 10)) (- (long b) (long 48))))))))

;; ## Reading a netstring ;; ## Reading a netstring
;; ;;
;; Let's dive straight into reading a netstring from an `InputStream`. ;; Let's dive straight into reading a netstring from an `InputStream`.
Expand All @@ -80,39 +150,12 @@
;; ;;
;; With this in mind we define the inner helper function first. ;; With this in mind we define the inner helper function first.


(declare read-byte (declare #^"[B" string>payload
read-bytes
#^"[B" string>payload
#^String string<payload) #^String string<payload)


(def i (Byte/valueOf (byte 105)))
(def l (Byte/valueOf (byte 108)))
(def d (Byte/valueOf (byte 100)))
(def e (Byte/valueOf (byte 101)))
(def colon (Byte/valueOf (byte 58)))
(def comma (Byte/valueOf (byte 44)))

(defn #^{:private true} read-netstring* (defn #^{:private true} read-netstring*
[input] [input]
(let [reader #(read-byte input) (read-bytes input (read-long input colon)))
;; We read repeatedly a byte from the input…
prefix (->> reader
repeatedly
;; …and stop at the colon following the prefix of
;; the byte count.
(take-while (complement #{colon}))
(into-array Byte/TYPE))
;; The byte count is now obtained by interpreting the bytes
;; of the prefix as a number encoded in decimal format in
;; an UTF-8 string.
;;
;; *Note:* We **always** encode strings into unicode by virtue
;; of UTF-8 when sending things over the wire. In this case
;; it wouldn't make a difference, because the digits are the
;; same in UTF-8 and ASCII, but for the general case it is
;; important to keep it in mind.
cnt (-> prefix string<payload Integer/valueOf)]
(read-bytes input cnt)))


;; And the public facing API: `read-netstring`. ;; And the public facing API: `read-netstring`.


Expand All @@ -125,46 +168,6 @@
(throw (Exception. "Invalid netstring. ',' expected."))) (throw (Exception. "Invalid netstring. ',' expected.")))
content)) content))


;; The astute reader might have noticed that there are several helpers
;; which are mentioned, but not defined, yet. These are `read-byte`
;; as well as `read-bytes`. They are split out, because doing such
;; a simple task as reading a byte is mild catastrophe in Java. So
;; it would add some clutter to the algorithm `read-netstring`.
;;
;; On the other hand they might be also useful elsewhere.

(defn #^{:private true :tag Byte} read-byte
[#^InputStream input]
(let [c (.read input)]
(when (neg? c)
(throw
(Exception. "Invalid netstring. Unexpected end of input.")))
;; Here we have a quirk for example. `.read` returns -1 on end of
;; input. However the Java `Byte` has only a range from -128 to 127.
;; How does the fit together?
;;
;; The whole thing is shifted. `.read` actually returns an int
;; between zero and 255. Everything below the value 128 stands
;; for itself. But larger values are actually negative byte values.
;;
;; So we have to do some translation here. Luckily `.byteValue`
;; does that for us.
(Byte/valueOf (.byteValue c))))

(defn #^{:private true :tag "[B"} read-bytes
[#^InputStream input n]
(let [content (byte-array n)]
(loop [offset 0
len n]
(let [result (.read input content offset len)]
(when (neg? result)
(throw
(Exception.
"Invalid netstring. Less data available than expected.")))
(when (not= result len)
(recur (+ offset result) (- len result)))))
content))

;; Similarly the `string>payload` and `string<payload` functions ;; Similarly the `string>payload` and `string<payload` functions
;; are defined as follows to simplify the conversion between strings ;; are defined as follows to simplify the conversion between strings
;; and byte arrays in various parts of the code. ;; and byte arrays in various parts of the code.
Expand Down Expand Up @@ -236,14 +239,14 @@
(defn #^{:private true} read-token (defn #^{:private true} read-token
[#^PushbackInputStream input] [#^PushbackInputStream input]
(let [ch (read-byte input)] (let [ch (read-byte input)]
(condp = ch (cond
i :integer (= (long e) ch) nil
l :list (= i ch) :integer
d :map (= l ch) :list
e nil (= d ch) :map
(do :else (do
(.unread input (int ch)) (.unread input (int ch))
(string<payload (read-netstring* input)))))) (read-netstring* input)))))


;; To read the bencode encoded data we walk a long the sequence of tokens ;; To read the bencode encoded data we walk a long the sequence of tokens
;; and act according to the found tags. ;; and act according to the found tags.
Expand All @@ -266,12 +269,7 @@


(defn #^{:private true} read-integer (defn #^{:private true} read-integer
[input] [input]
(->> #(read-byte input) (read-long input e))
repeatedly
(take-while (complement #{e}))
(into-array Byte/TYPE)
string<payload
Integer/valueOf))


;; *Note:* integers are an ugly special case, which cannot be ;; *Note:* integers are an ugly special case, which cannot be
;; handled with `read-token` or `read-netstring*`. ;; handled with `read-token` or `read-netstring*`.
Expand All @@ -284,11 +282,15 @@
[input] [input]
(vec (token-seq input))) (vec (token-seq input)))


;; Maps are sequences of key/value pairs. ;; Maps are sequences of key/value pairs. The keys are always
;; decoded into strings. The values are kept as is.


(defn #^{:private true} read-map (defn #^{:private true} read-map
[input] [input]
(apply hash-map (token-seq input))) (->> (token-seq input)
(partition 2)
(map (fn [[k v]] [(string<payload k) v]))
(into {})))


;; The final missing piece is `token-seq`. This a just a simple ;; The final missing piece is `token-seq`. This a just a simple
;; sequence which reads tokens until the next `\e`. ;; sequence which reads tokens until the next `\e`.
Expand Down Expand Up @@ -321,9 +323,8 @@
(symbol? thing) :named (symbol? thing) :named
(keyword? thing) :named (keyword? thing) :named
(map? thing) :map (map? thing) :map
(or (coll? thing) (or (nil? thing) (coll? thing) (.isArray (class thing))) :list
(.isArray (class thing))) :else (type thing))))
:list)))


(defmethod write-bencode :default (defmethod write-bencode :default
[output x] [output x]
Expand Down Expand Up @@ -417,32 +418,3 @@
(if (zero? x) (if (zero? x)
(recur (inc i)) (recur (inc i))
x)))))) x))))))

;; ## Special cases
;;
;; Sometimes one really wants to read the byte array coming from the wire.
;; Without converting it to a UTF-8 string. A use case would be enhanced
;; REPL interaction transferring not only strings, but also eg. picture
;; information as opaque byte information.
;;
;; To accomodate for theses uses we expose here some special cases.
;;
;; `read-bencode-netstring` is really only a public façade for
;; `read-netstring*`. However, the latter is only an implementation detail
;; while the former is a promise.

(defn read-bencode-netstring
"Read a netstring in bencode format. That means without trailing comma.
Returns the byte array of read bytes."
[input]
(read-netstring* input))

;; Of course this is complemented by `write-bencode-netstring` for writing
;; binary data in bencode netstring format. And similar this is only a
;; façade.

(defn write-bencode-netstring
"Write binary content in bencode netstring format. That means without
trailing comma. Takes a byte array as content."
[output content]
(write-netstring* output content))
32 changes: 28 additions & 4 deletions src/main/clojure/clojure/tools/nrepl/transport.clj
Expand Up @@ -10,7 +10,8 @@
PushbackReader) PushbackReader)
java.net.Socket java.net.Socket
(java.util.concurrent SynchronousQueue LinkedBlockingQueue (java.util.concurrent SynchronousQueue LinkedBlockingQueue
BlockingQueue TimeUnit))) BlockingQueue TimeUnit)
clojure.lang.RT))


(defprotocol Transport (defprotocol Transport
"Defines the interface for a wire protocol implementation for use "Defines the interface for a wire protocol implementation for use
Expand Down Expand Up @@ -42,6 +43,26 @@
write write
close)))) close))))


(defmulti #^{:private true} <bytes class)

(defmethod <bytes :default
[input]
input)

(defmethod <bytes (RT/classForName "[B")
[#^"[B" input]
(String. input "UTF-8"))

(defmethod <bytes clojure.lang.IPersistentVector
[input]
(vec (map <bytes input)))

(defmethod <bytes clojure.lang.IPersistentMap
[input]
(->> input
(map (fn [[k v]] [k (<bytes v)]))
(into {})))

(defn bencode (defn bencode
"Returns a Transport implementation that serializes messages "Returns a Transport implementation that serializes messages
over the given Socket or InputStream/OutputStream using bencode." over the given Socket or InputStream/OutputStream using bencode."
Expand All @@ -50,7 +71,10 @@
(let [in (PushbackInputStream. (io/input-stream in)) (let [in (PushbackInputStream. (io/input-stream in))
out (io/output-stream out)] out (io/output-stream out)]
(fn-transport (fn-transport
#(be/read-bencode in) #(let [payload (be/read-bencode in)
unencoded (<bytes (payload "-unencoded"))
to-decode (apply dissoc payload "-unencoded" unencoded)]
(merge payload {"-unencoded" unencoded} (<bytes to-decode)))
#(locking out #(locking out
(doto out (doto out
(be/write-bencode %) (be/write-bencode %)
Expand Down Expand Up @@ -89,7 +113,7 @@
(deliver head (first s)) (deliver head (first s))
(rest s))) (rest s)))
@head)] @head)]
(fn-transport read write (fn-transport read write
(when s (when s
(swap! read-seq (partial cons {:session @session-id :op "close"})) (swap! read-seq (partial cons {:session @session-id :op "close"}))
#(.close s)))))) #(.close s))))))
Expand All @@ -116,4 +140,4 @@
[] []
(let [a (LinkedBlockingQueue.) (let [a (LinkedBlockingQueue.)
b (LinkedBlockingQueue.)] b (LinkedBlockingQueue.)]
[(QueueTransport. a b) (QueueTransport. b a)])) [(QueueTransport. a b) (QueueTransport. b a)]))

0 comments on commit 275f79f

Please sign in to comment.