-
Notifications
You must be signed in to change notification settings - Fork 2
/
string.cljc
130 lines (67 loc) · 2.59 KB
/
string.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at https://mozilla.org/MPL/2.0/.
(ns helins.binf.string
"Decoding a string directly to a buffer and vice-versa."
{:author "Adam Helinski"}
#?(:clj (:import (java.nio.charset Charset
StandardCharsets))))
;;;;;;;;;; Selecting encodings
(defn decoder
"A decoder knows how to translate a buffer into a string given an encoding.
Supported cross-platform encodings are:
\"iso-8859-1\"
\"utf-8\"
\"utf-16be\"
\"utf-16le\"
Other encodings are platform dependent. On the JVM, it can be anything accepted
by the `Charset` class while in JS it can be anything accepted by a `TextDecoder`.
Throws if the encoding is not supported."
[encoding]
#?(:clj (Charset/forName encoding)
:cljs (js/TextDecoder. encoding)))
#?(:clj (defn encoder
"An encoder knows how to translate into a buffer a string encoded in a given encoding.
In JS, strings can only be encoded as UTF-8 which is why string encoding utilities
from this library use exclusively UTF-8.
However, on the JVM, any encoding can be used in [[encode]].
See [[decoder]] for available encodings."
[encoding]
(Charset/forName encoding)))
;;;;;;;;;; Default encoding is UTF-8
(def decoder-utf-8
"Default decoder used by this library (UTF-8)."
#?(:clj StandardCharsets/UTF_8
:cljs (js/TextDecoder.)))
#?(:clj (def ^Charset encoder-utf-8
"Default encoder used by this library (UTF-8)."
StandardCharsets/UTF_8))
#?(:cljs (def encoder-utf-8
"Default encoder used by this library (UTF-8)."
(js/TextEncoder.)))
;;;;;;;;; Translation between strings and buffers
(defn decode
"Interprets the given `buffer` as a string.
Defaults to UTF-8.
See [[decoder]]."
([buffer]
(decode buffer
decoder-utf-8))
([buffer decoder]
#?(:clj (String. ^bytes buffer
^Charset decoder)
:cljs (.decode decoder
buffer))))
(defn encode
"Returns a buffer containing the given `string` encoded in UTF-8.
On the JVM, an alternative encoder can be provided (see [[encoder]])."
(^bytes
[string]
#?(:clj (.getBytes ^String string
^Charset encoder-utf-8)
:cljs (.-buffer (.encode encoder-utf-8
string))))
#?(:clj (^bytes
[string encoder]
(.getBytes ^String string
^Charset encoder))))