-
Notifications
You must be signed in to change notification settings - Fork 13
/
lexical_v1.cljc
106 lines (98 loc) · 3.85 KB
/
lexical_v1.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
; Copyright (c) Alan Thompson. All rights reserved.
; The use and distribution terms for this software are covered by the Eclipse Public
; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can be found in the
; file epl-v10.html at the root of this distribution. By using this software in any
; fashion, you are agreeing to be bound by the terms of this license.
; You must not remove this notice, or any other, from this software.
(ns tupelo.lexical-v1
"Utils for lexical sorting and searching"
(:refer-clojure :exclude [compare]) ; #todo
#?(:clj (:require
[tupelo.core :as t :refer [spy spyx spyxx spyx-pretty grab]]
[tupelo.schema :as tsk]
[clojure.data.avl :as avl]
[schema.core :as s]
)) )
#?(:clj
(do
(def Val tsk/Vec)
(def Set (class (avl/sorted-set 1 2 3)))
(def Map (class (avl/sorted-map :a 1 :b 2 :c 3)))
; #todo generalize to allow `nil` as an ultimate lower bound?
(s/defn compare-lex :- s/Int
"Performs a lexical comparison of 2 sequences, sorting as follows:
[1]
[1 nil]
[1 :a]
[1 :b]
[1 :b 3]
[2]
[3]
[3 :y] "
[a :- tsk/Vec
b :- tsk/Vec]
;(s/validate tsk/Vec a)
;(s/validate tsk/Vec b)
(cond
(= a b) 0
(empty? a) -1
(empty? b) 1
:else (let [a0 (t/xfirst a)
b0 (t/xfirst b)]
(if (= a0 b0)
(do (compare-lex (t/xrest a) (t/xrest b)))
(do (clojure.core/compare a0 b0))))))
(s/defn ->sorted-set :- Set
"Converts a set into a lexically-sorted set"
([] (->sorted-set #{}))
([some-set :- (s/cond-pre tsk/Set tsk/Vec)]
(into (avl/sorted-set-by compare-lex) some-set)))
; #todo add (->sorted-map <map>) => (into (sorted-map) <map>)
; #todo add (->sorted-vec <sequential>) => (vec (sort <vec>))
(s/defn bound-lower :- tsk/Vec
"Given a lexical value as a vector such as [1 :a], returns a lower bound like [1]"
[val :- tsk/Vec]
(when (zero? (count val))
(throw (ex-info "Cannot find lower bound for empty vec" {:val val})))
(t/xbutlast val))
(s/defn prefix-match? :- s/Bool
"Returns true if the sample value equals the pattern when truncated to the same length"
[pattern :- Val
sample :- Val]
(= pattern (t/xtake (count pattern) sample)))
(s/defn split-key-prefix :- {s/Keyword Set}
"Like clojure.data.avl/split-key, but allows prefix matches. Given a lexically sorted set like:
#{[:a 1]
[:a 2]
[:a 3]
[:b 1]
[:b 2]
[:b 3]
[:c 1]
[:c 2]}
splits data by prefix match for patterns like [:b], returning a map of 3 sorted sets like:
{:smaller #{[:a 1]
[:a 2]
[:a 3]}
:matches #{[:b 1]
[:b 2]
[:b 3]}
:larger #{[:c 1]
[:c 2]} ]
"
[match-val :- Val
lex-set :- Set]
(let [[smaller-set found-val larger-set] (avl/split-key match-val lex-set)
result (if (nil? found-val)
(let [[matches-seq larger-seq] (split-with #(prefix-match? match-val %) larger-set)]
{:smaller smaller-set
:matches (->sorted-set matches-seq)
:larger (->sorted-set larger-seq)})
{:smaller smaller-set
:matches (avl/sorted-set found-val)
:larger larger-set})]
;(s/validate Set (grab :smaller result))
;(s/validate Set (grab :matches result))
;(s/validate Set (grab :larger result))
result))
))