-
Notifications
You must be signed in to change notification settings - Fork 39
/
utils.clj
295 lines (253 loc) · 9.5 KB
/
utils.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
(ns tesser.utils
"Toolbox."
(:refer-clojure :exclude [update])
(:import (java.lang.reflect Array))
(:require [clojure [set :as set]
[string :as str]
[walk :as walk]]
[clojure.core.reducers :as r]))
(defn prepend
"Prepends a single value to the beginning of a sequence. O(1) for sequences
using cons, O(n) for vectors. Returns a singleton vector when coll is nil."
[coll element]
(cond (nil? coll) [element]
(vector? coll) (vec (cons element coll))
true (cons element coll)))
(defn append
"Appends a single value to the end of a sequence. O(1); uses conj for
vectors, concat for other seqs."
[coll element]
(if (vector? coll)
(conj coll element)
(concat coll (list element))))
;; A mutable pair datatype, intended for use during singlethreaded reductions.
(defprotocol Pair
(a [pair] "Returns the first element in the Pair.")
(b [pair] "Returns the second element in the Pair.")
(set-a! [pair a'] "Set the first element in the Pair.")
(set-b! [pair b'] "Set the second element in the Pair.")
(set-both! [pair a' b'] "Set both the first and second element in the pair."))
(deftype UnsafePair [^:unsynchronized-mutable a ^:unsynchronized-mutable b]
Pair
(a [_] a)
(b [_] b)
(set-a! [this a'] (set! a a') this)
(set-b! [this b'] (set! b b') this)
(set-both! [this a' b']
(set! a a')
(set! b b')
this))
(defn unsafe-pair
"Constructs a new unsynchronized mutable pair object, suitable for
single-threaded mutation."
([] (UnsafePair. nil nil))
([a b] (UnsafePair. a b)))
(defn successive-pairs
"A much faster version of (partition 2 1 coll) which generates vectors, not
lazy seqs."
([coll] (successive-pairs (first coll) (next coll)))
([prev coll]
(lazy-seq
(when-let [s (seq coll)]
(let [x (first s)]
(cons [prev x] (successive-pairs x (next coll))))))))
(defn differences
"A seq of the differences between successive elements in a collection. For
example,
(differences [1 2 4 5 2])
; (1 2 1 -3)"
[coll]
(->> coll
successive-pairs
(map (fn [[x x']] (- x' x)))))
(defn cumulative-sums
"A seq of the cumulative sums of all elements in `coll`, starting at `init`
or the first element of `coll` if `init` is not provided. If `differences`
provides differentials, `cumulative-sums` provides integrals.
(cumulative-sums 1 [1 2 1 -3])
; (1 2 4 5 2)"
([coll]
(reductions + coll))
([init coll]
(reductions + init coll)))
(defn map-vals
"Maps over a key-value map, returning a new map by transforming each value
with (f v)."
[f m]
(->> m
(reduce (fn [m [k v]]
(assoc! m k (f v)))
(transient {}))
persistent!))
(defn index-by
"Given an indexing function f and a collection of xs, return a map of (f x)
-> x."
[f xs]
(persistent!
(reduce (fn [m x] (assoc! m (f x) x))
(transient {})
xs)))
(defn path-fn
"Takes a path for get-in and converts it to a function that operates on
associative structures."
[path]
(fn [x] (get-in x path)))
(defn var->sym
"Converts a var to fully qualified symbol."
[^clojure.lang.Var v]
(symbol (name (.name (.ns v))) (name (.sym v))))
(defn complete-triangular-matrix
"Given a map of `[x y]` keys to values, returns a map where *both* `[x y]`
and `[y x]` point to identical values. Useful for pairwise comparisons which
compute triangular matrices but want to return a full matrix."
[m]
(->> m (map (fn [[[x y] value]] [[y x] value])) (into m)))
(defn first-non-nil-reducer
"A reducing function that simply returns the first non-nil element in the
collection."
[_ x]
(when-not (nil? x) (reduced x)))
(defn reduce-first
"clojure.core/first, but for for reducibles."
[reducible]
(reduce (fn [_ x] (reduced x)) nil reducible))
(defmacro scred
"Helper for short-circuiting nested reduction functions which can emit
reduced values. Given the name of a function that could emit a reduced
value, and an expression:
(scred rfn [1 (rfn x y)])
Expands to code that converts the expression to a reduced value whenever
the underlying function emits a reduced value:
(let [acc (rfn x y)]
(if (reduced? acc)
(let [acc @acc] (reduced [1 acc]))
[1 acc]))
scred does not interpret lexical scope, so don't rebind rfn in expr.
Uses prewalk, so the outermost fn is where scred will cut out an expr.
Keep this as simple as possible, haha."
[rfn-name expr]
(let [acc (gensym 'acc)
reduced-expr (promise)
expr (walk/prewalk (fn [form]
; Match (rfn ...)
(if (and (list? form)
(= rfn-name (first form)))
; Snarf the expression for later
(do (assert
(not (realized? reduced-expr)))
(deliver reduced-expr form)
acc)
form))
expr)
reduced-expr @reduced-expr]
(assert reduced-expr)
`(let [~acc ~reduced-expr]
(if (reduced? ~acc)
(let [~acc (deref ~acc)] (reduced ~expr))
~expr))))
(defmacro def-type-predicate
"Takes an instance of an object and defines a function that tests an object
to see if its class is an instance of the exemplar's."
[name exemplar]
`(let [c# (class ~exemplar)]
(defn ~name [x#] (instance? c# x#))))
(def-type-predicate shorts? (short-array 0))
(def-type-predicate ints? (int-array 0))
(def-type-predicate longs? (long-array 0))
(def-type-predicate floats? (float-array 0))
(def-type-predicate doubles? (double-array 0))
(def-type-predicate objects? (object-array 0))
(defmacro reducible-slice
"A reducible slice of an indexed collection. Expands into a reified
CollReduce which uses `(getter coll ... i)` to return the `i`th element.
Defined as a macro so we can do primitive agets, which are waaaay faster for
arrays. Slice will have maximum length n, and starts at index i0."
[getter coll length offset]
`(reify
clojure.core.protocols/CollReduce
(coll-reduce [this# f#]
(clojure.core.protocols/coll-reduce this# f# (f#)))
(coll-reduce [_ f# init#]
(let [length# (long ~length)
offset# (long ~offset)
i-final# (dec (min (count ~coll) (+ offset# length#)))]
(loop [i# offset#
acc# init#]
(let [acc# (f# acc# (~getter ~coll i#))]
(if (or (= i# i-final#)
(reduced? acc#))
acc#
(recur (inc i#) acc#))))))))
; Slices over primitive arrays
(defn reducible-slice-bytes
[^bytes ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-shorts
[^shorts ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-ints
[^ints ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-longs
[^longs ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-floats
[^floats ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-doubles
[^doubles ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn reducible-slice-objects
[^objects ary chunk-size offset]
(reducible-slice aget ary chunk-size offset))
(defn chunk-array
"Partitions an array into reducibles of size `chunk-size` (like
chunk), but faster."
([^long chunk-size ary]
(let [slicer (cond
(bytes? ary) reducible-slice-bytes
(shorts? ary) reducible-slice-shorts
(ints? ary) reducible-slice-ints
(longs? ary) reducible-slice-longs
(floats? ary) reducible-slice-floats
(doubles? ary) reducible-slice-doubles
(objects? ary) reducible-slice-objects)]
(->> (range 0 (count ary) chunk-size)
(map (partial slicer ary chunk-size))))))
(defn chunk-vec
"Partitions a vector into reducibles of size n (somewhat like partition-all)
but uses subvec for speed.
(chunk-vec 2 [1]) ; => ([1])
(chunk-vec 2 [1 2 3]) ; => ([1 2] [3])
Useful for supplying vectors to tesser.core/tesser."
([^long n v]
(let [c (count v)]
(->> (range 0 c n)
(map #(subvec v % (min c (+ % n))))))))
(defn reducible-chunk
"Like partition-all, but only emits reducibles. Faster for vectors and
arrays. May return chunks of any reducible type. Useful for supplying colls
to tesser.
(->> [1 2 3 4 5 6 7 8]
(chunk 2)
(map (partial into [])))
; => ([1 2] [3 4] [5 6] [7 8])"
[^long n coll]
(cond
(vector? coll) (chunk-vec n coll)
(.isArray (class coll)) (chunk-array n coll)
true (partition-all n coll)))
(defn maybe-unary
"Not all functions used in `tesser/fold` and `tesser/reduce` have a
single-arity form. This takes a function `f` and returns a fn `g` such that
`(g x)` is `(f x)` unless `(f x)` throws ArityException, in which case `(g
x)` returns just `x`."
[f]
(fn wrapper
([] (f))
([x] (try
(f x)
(catch clojure.lang.ArityException e
x)))
([x y] (f x y))
([x y & more] (apply f x y more))))