/
lexical.cljc
170 lines (144 loc) · 5.96 KB
/
lexical.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
; Copyright (c) Alan Thompson. All rights reserved.
; The use and distribution terms for this software are covered by the Eclipse Public
; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can be found in the
; file epl-v10.html at the root of this distribution. By using this software in any
; fashion, you are agreeing to be bound by the terms of this license.
; You must not remove this notice, or any other, from this software.
(ns tupelo.lexical
"Utils for lexical sorting and searching"
(:refer-clojure :exclude [compare]) ; #todo
#?(:clj (:require
;[tupelo.core :as t :refer [spy spyx spyxx spyx-pretty grab]]
[tupelo.schema :as tsk]
[clojure.data.avl :as avl]
[schema.core :as s]
))
#?(:cljs (:require
; [tupelo.core :as t :refer [spy spyx spyxx spyx-pretty grab] ] ; #todo :include-macros true
[tupelo.schema :as tsk]
[clojure.data.avl :as avl]
[schema.core :as s]
))
)
#?(:cljs (enable-console-print!))
#?(:clj (do
(def LexicalValType tsk/Vec)
(def SortedSetType (class (avl/sorted-set 1 2 3)))
(def SortedMapType (class (avl/sorted-map :a 1 :b 2 :c 3)))
;---------------------------------------------------------------------------------------------------
; comparison-class throws exceptions for some types that might be
; useful to include.
(defn comparison-class [x]
(cond (nil? x) ""
; Lump all numbers together since Clojure's compare can
; compare them all to each other sensibly.
(number? x) "java.lang.Number"
; sequential? includes lists, conses, vectors, and seqs of
; just about any collection, although it is recommended not
; to use this to compare seqs of unordered collections like
; sets or maps (vectors should be OK). This should be
; everything we would want to compare using cmp-seq-lexi
; below. TBD: Does it leave anything out? Include anything
; it should not?
(sequential? x) "clojure.lang.Sequential"
; NOTE: record case must preempt `(map? ...)` case below, since all records can be viewed as maps
(record? x) (.getName (class x))
(set? x) "clojure.lang.IPersistentSet"
(map? x) "clojure.lang.IPersistentMap"
(.isArray (class x)) "java.util.Arrays"
; Comparable includes Boolean, Character, String, Clojure
; refs, and many others.
(instance? Comparable x) (.getName (class x))
:else (throw
(ex-info (format "cc-cmp does not implement comparison of values with class %s"
(.getName (class x)))
{:value x}))))
(defn cmp-seq-lexi
[cmpf x y]
(loop [x x
y y]
(if (seq x)
(if (seq y)
(let [c (cmpf (first x) (first y))]
(if (zero? c)
(recur (rest x) (rest y))
c))
; else we reached end of y first, so x > y
1)
(if (seq y)
; we reached end of x first, so x < y
-1
; Sequences contain same elements. x = y
0))))
; The same result can be obtained by calling cmp-seq-lexi on two
; vectors, but cmp-vec-lexi should allocate less memory comparing
; vectors.
(defn cmp-vec-lexi
[cmpf x y]
(let [x-len (count x)
y-len (count y)
len (min x-len y-len)]
(loop [i 0]
(if (== i len)
; If all elements 0..(len-1) are same, shorter vector comes
; first.
(clojure.core/compare x-len y-len)
(let [c (cmpf (x i) (y i))]
(if (zero? c)
(recur (inc i))
c))))))
(defn cmp-array-lexi
[cmpf x y]
(let [x-len (alength x)
y-len (alength y)
len (min x-len y-len)]
(loop [i 0]
(if (== i len)
; If all elements 0..(len-1) are same, shorter array comes
; first.
(clojure.core/compare x-len y-len)
(let [c (cmpf (aget x i) (aget y i))]
(if (zero? c)
(recur (inc i))
c))))))
(defn compare-generic
[x y]
(let [x-cls (comparison-class x)
y-cls (comparison-class y)
c (clojure.core/compare x-cls y-cls)]
(cond (not= c 0) c ; different classes
; Compare sets to each other as sequences, with elements in
; sorted order.
(= x-cls "clojure.lang.IPersistentSet")
(cmp-seq-lexi compare-generic (sort compare-generic x) (sort compare-generic y))
; Compare records to each other like maps below.
; NOTE: record case must preempt `(map? ...)` case below, since all records can be viewed as maps
(record? x)
(cmp-seq-lexi compare-generic
(sort-by key compare-generic (seq x))
(sort-by key compare-generic (seq y)))
; Compare maps to each other as sequences of [key val]
; pairs, with pairs in order sorted by key.
(= x-cls "clojure.lang.IPersistentMap")
(cmp-seq-lexi compare-generic
(sort-by key compare-generic (seq x))
(sort-by key compare-generic (seq y)))
(= x-cls "java.util.Arrays")
(cmp-array-lexi compare-generic x y)
; Make a special check for two vectors, since cmp-vec-lexi
; should allocate less memory comparing them than
; cmp-seq-lexi. Both here and for comparing sequences, we
; must use cc-cmp recursively on the elements, because if
; we used compare we would lose the ability to compare
; elements with different types.
(and (vector? x) (vector? y)) (cmp-vec-lexi compare-generic x y)
; This will compare any two sequences, if they are not both
; vectors, e.g. a vector and a list will be compared here.
(= x-cls "clojure.lang.Sequential")
(cmp-seq-lexi compare-generic x y)
:else (clojure.core/compare x y))))
(s/defn compare-lex :- s/Int
[a :- tsk/Vec
b :- tsk/Vec ]
(compare-generic a b))
))