-
Notifications
You must be signed in to change notification settings - Fork 33
/
char_sequence.clj
122 lines (110 loc) · 3.69 KB
/
char_sequence.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
(ns byte-streams.char-sequence
(:refer-clojure :exclude [flush])
(:import
[java.util.concurrent.locks
ReentrantLock]
[java.io
ByteArrayOutputStream]
[java.nio
ByteBuffer
CharBuffer]
[java.nio.charset
Charset
CharsetDecoder
CoderResult
CodingErrorAction]))
(set! *unchecked-math* true)
(defn coding-error-action [action]
(case
:report CodingErrorAction/REPORT
:ignore CodingErrorAction/IGNORE
:replace CodingErrorAction/REPLACE))
(defn parse-result [^CoderResult result]
(cond
(.isUnderflow result) :underflow
(.isOverflow result) :overflow
:else (throw (IllegalArgumentException. "Malformed byte-stream input to CharsetDecoder"))))
(defn decode
[^CharsetDecoder decoder ^ByteBuffer in ^CharBuffer out]
(parse-result (.decode decoder in out false)))
(defn flush
[^CharsetDecoder decoder ^ByteBuffer in ^CharBuffer out]
(parse-result (.decode decoder (or in (ByteBuffer/allocate 0)) out true))
(parse-result (.flush decoder out)))
(defn concat-bytes [^ByteBuffer a ^ByteBuffer b]
(let [buf (ByteBuffer/allocate (+ (.remaining a) (.remaining b)))]
(.put buf a)
(.put buf b)
(.flip buf)))
(defn lazy-char-buffer-sequence
[^CharsetDecoder decoder
chunk-size
^ByteBuffer extra-bytes
close-fn
byte-source]
(lazy-seq
(let [num-bytes (+ (long
(if extra-bytes
(.remaining extra-bytes)
0))
(long chunk-size))
len (long
(Math/ceil
(/ num-bytes
(.averageCharsPerByte decoder))))
out (CharBuffer/allocate len)]
(if (and extra-bytes (= :overflow (decode decoder extra-bytes out)))
;; we didn't even exhaust the overflow bytes, try again
(cons
out
(lazy-char-buffer-sequence decoder chunk-size extra-bytes close-fn byte-source))
(if-let [in (byte-source chunk-size)]
(let [in (if (and extra-bytes (.hasRemaining extra-bytes))
(concat-bytes extra-bytes in)
in)
result (decode decoder in out)]
(cons
(.flip out)
(lazy-char-buffer-sequence
decoder
chunk-size
(when (.hasRemaining ^ByteBuffer in) in)
close-fn
byte-source)))
(do
(flush decoder extra-bytes out)
(when close-fn (close-fn))
(.flip out)))))))
(defn decode-byte-source
[byte-source
close-fn
{:keys [chunk-size encoding on-encoding-error]
:or {chunk-size 1024
on-encoding-error :replace
encoding "UTF-8"}}]
(let [action (coding-error-action on-encoding-error)
decoder (doto (.newDecoder (Charset/forName encoding))
(.onMalformedInput action)
(.onUnmappableCharacter action))
s (lazy-char-buffer-sequence decoder chunk-size nil close-fn byte-source)]
(reify
java.io.Closeable
(close [_] (when close-fn (close-fn)))
CharSequence
(charAt [_ idx]
(loop [remaining idx, s s]
(if (empty? s)
(throw (IndexOutOfBoundsException. (str idx)))
(let [^CharBuffer buf (first s)]
(if (< (.remaining buf) remaining)
(.charAt buf remaining)
(recur (- remaining (.remaining buf)) (rest s)))))))
(length [_]
(reduce + (map #(.remaining ^CharBuffer %) s)))
#_(subSequence [_ start end]
)
(toString [_]
(let [buf (StringBuffer.)]
(doseq [b s]
(.append buf b))
(.toString buf))))))