forked from cayleygraph/cayley
/
unique.go
145 lines (120 loc) · 3.3 KB
/
unique.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package iterator
import (
"context"
"github.com/cayleygraph/cayley/graph"
)
var _ graph.Iterator = &Unique{}
// Unique iterator removes duplicate values from it's subiterator.
type Unique struct {
uid uint64
tags graph.Tagger
subIt graph.Iterator
result graph.Value
runstats graph.IteratorStats
err error
seen map[interface{}]bool
}
func NewUnique(subIt graph.Iterator) *Unique {
return &Unique{
uid: NextUID(),
subIt: subIt,
seen: make(map[interface{}]bool),
}
}
func (it *Unique) UID() uint64 {
return it.uid
}
// Reset resets the internal iterators and the iterator itself.
func (it *Unique) Reset() {
it.result = nil
it.subIt.Reset()
it.seen = make(map[interface{}]bool)
}
func (it *Unique) Tagger() *graph.Tagger {
return &it.tags
}
func (it *Unique) TagResults(dst map[string]graph.Value) {
it.tags.TagResult(dst, it.Result())
if it.subIt != nil {
it.subIt.TagResults(dst)
}
}
func (it *Unique) Clone() graph.Iterator {
uniq := NewUnique(it.subIt.Clone())
uniq.tags.CopyFrom(it)
return uniq
}
// SubIterators returns a slice of the sub iterators. The first iterator is the
// primary iterator, for which the complement is generated.
func (it *Unique) SubIterators() []graph.Iterator {
return []graph.Iterator{it.subIt}
}
// Next advances the subiterator, continuing until it returns a value which it
// has not previously seen.
func (it *Unique) Next(ctx context.Context) bool {
graph.NextLogIn(it)
it.runstats.Next += 1
for it.subIt.Next(ctx) {
curr := it.subIt.Result()
key := graph.ToKey(curr)
if ok := it.seen[key]; !ok {
it.result = curr
it.seen[key] = true
return graph.NextLogOut(it, true)
}
}
it.err = it.subIt.Err()
return graph.NextLogOut(it, false)
}
func (it *Unique) Err() error {
return it.err
}
func (it *Unique) Result() graph.Value {
return it.result
}
// Contains checks whether the passed value is part of the primary iterator,
// which is irrelevant for uniqueness.
func (it *Unique) Contains(ctx context.Context, val graph.Value) bool {
graph.ContainsLogIn(it, val)
it.runstats.Contains += 1
return graph.ContainsLogOut(it, val, it.subIt.Contains(ctx, val))
}
// NextPath for unique always returns false. If we were to return multiple
// paths, we'd no longer be a unique result, so we have to choose only the first
// path that got us here. Unique is serious on this point.
func (it *Unique) NextPath(ctx context.Context) bool {
return false
}
// Close closes the primary iterators.
func (it *Unique) Close() error {
it.seen = nil
return it.subIt.Close()
}
func (it *Unique) Type() graph.Type { return graph.Unique }
func (it *Unique) Optimize() (graph.Iterator, bool) {
newIt, optimized := it.subIt.Optimize()
if optimized {
it.subIt = newIt
}
return it, false
}
const uniquenessFactor = 2
func (it *Unique) Stats() graph.IteratorStats {
subStats := it.subIt.Stats()
return graph.IteratorStats{
NextCost: subStats.NextCost * uniquenessFactor,
ContainsCost: subStats.ContainsCost,
Size: subStats.Size / uniquenessFactor,
ExactSize: false,
Next: it.runstats.Next,
Contains: it.runstats.Contains,
ContainsNext: it.runstats.ContainsNext,
}
}
func (it *Unique) Size() (int64, bool) {
st := it.Stats()
return st.Size, st.ExactSize
}
func (it *Unique) String() string {
return "Unique"
}