/
index.cljc
146 lines (126 loc) · 5.02 KB
/
index.cljc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
; Copyright (c) Alan Thompson. All rights reserved.
; The use and distribution terms for this software are covered by the Eclipse Public License 1.0
; (http://opensource.org/licenses/eclipse-1.0.php) which can be found in the file epl-v10.html at
; the root of this distribution. By using this software in any fashion, you are agreeing to be
; bound by the terms of this license. You must not remove this notice, or any other, from this
; software.
(ns tupelo.data.index
(:refer-clojure :exclude [load ->VecNode])
; #?(:clj (:use tupelo.core)) ; #todo remove for cljs
#?(:clj (:require
[tupelo.core :as t :refer [spy spyx spyxx spyx-pretty grab glue map-entry indexed
forv vals->map fetch-in
]]
[tupelo.lexical :as lex]
[tupelo.schema :as tsk]
[clojure.data.avl :as avl]
[clojure.set :as set]
[schema.core :as s]
))
#?(:cljs (:require
[tupelo.core :as t :refer [spy spyx spyxx spyx-pretty grab]] ; #todo :include-macros true
[tupelo.lexical :as lex]
[tupelo.schema :as tsk]
[clojure.data.avl :as avl]
[clojure.set :as set]
[schema.core :as s]
))
)
; #todo add indexes
; #todo add sets (primative only or HID) => map with same key/value
; #todo copy destruct syntax for search
#?(:cljs (enable-console-print!))
; #todo Tupelo Data Language (TDL)
#?(:clj (do
;---------------------------------------------------------------------------------------------------
; index stuff
(def LexicalValType tsk/Vec)
(def SortedSetType (class (avl/sorted-set 1 2 3)))
(def SortedMapType (class (avl/sorted-map :a 1 :b 2 :c 3)))
(def IndexType SortedSetType )
(def IndexEntryType tsk/Vec )
(s/defn empty-index
"Returns an empty lexically-sorted index"
[] (avl/sorted-set-by lex/compare-lex))
(s/defn ->index :- SortedSetType ; #todo enforce tupele element type?
"Converts a sequence (vec or set) into a lexically-sorted index"
[some-set :- (s/cond-pre tsk/Set tsk/Vec)]
(into (empty-index) some-set))
; #todo add (->sorted-map <map>) => (into (sorted-map) <map>)
; #todo add (->sorted-vec <sequential>) => (vec (sort <vec>))
(s/defn bound-lower :- tsk/Vec
"Given a lexical value as a vector such as [1 :a], returns a lower bound like [1]"
[val :- tsk/Vec]
(when (zero? (count val))
(throw (ex-info "Cannot find lower bound for empty vec" {:val val})))
(t/xbutlast val))
(s/defn prefix-match? :- s/Bool
"Returns true if the sample value equals the pattern when truncated to the same length"
[pattern :- LexicalValType
sample :- LexicalValType]
(= pattern (t/xtake (count pattern) sample)))
(s/defn split-key-prefix :- {s/Keyword SortedSetType}
"Like clojure.data.avl/split-key, but allows prefix matches. Given a lexically sorted set like:
#{[:a 1]
[:a 2]
[:a 3]
[:b 1]
[:b 2]
[:b 3]
[:c 1]
[:c 2]}
splits data by prefix match for patterns like [:b], returning a map of 3 sorted sets like:
{:smaller #{[:a 1]
[:a 2]
[:a 3]}
:matches #{[:b 1]
[:b 2]
[:b 3]}
:larger #{[:c 1]
[:c 2]} ]
"
[match-val :- LexicalValType
lex-set :- SortedSetType]
(let [[smaller-set found-val larger-set] (avl/split-key match-val lex-set)
result (if (nil? found-val)
(let [[matches-seq larger-seq] (split-with #(prefix-match? match-val %) larger-set)]
{:smaller smaller-set
:matches (->index matches-seq)
:larger (->index larger-seq)})
{:smaller smaller-set
:matches (avl/sorted-set found-val)
:larger larger-set})]
;(s/validate SortedSetType (grab :smaller result))
;(s/validate SortedSetType (grab :matches result))
;(s/validate SortedSetType (grab :larger result))
result))
(s/defn prefix-matches :- SortedSetType
"Return the `:matches` values found via `split-key-prefix`."
[match-val :- LexicalValType
lex-set :- SortedSetType]
(t/grab :matches (split-key-prefix match-val lex-set )))
; #todo add-entry & remove-entry instead of conj/disj ???
(s/defn add-entry
"Add an entry to the index, returning the modified index"
[index :- SortedSetType
entry :- tsk/Vec ]
(conj index entry))
(s/defn remove-entry
"Remove an entry to the index, returning the modified index. Throws if entry not found in index."
[index :- SortedSetType
entry :- tsk/Vec ]
(when-not (contains? index entry)
(throw (ex-info "entry not found in index" (vals->map entry))) )
(disj index entry))
;-----------------------------------------------------------------------------
; #todo keep these? tupelo.set/add tupelo.set/remove
;(s/defn set-add-eid :- tsk/Set
; [set-in :- tsk/Set
; eid-in :- EidType]
; (conj set-in eid-in))
;
;(s/defn set-remove-eid :- tsk/Set
; [set-in :- tsk/Set
; eid-in :- EidType]
; (disj set-in eid-in))
))