forked from RoaringBitmap/roaring
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rlecommon.go
199 lines (172 loc) · 4.31 KB
/
rlecommon.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
package roaring
import (
"fmt"
"sort"
)
// common to rle32.go and rle16.go
// rleVerbose controls whether p() prints show up.
// The testing package sets this based on
// testing.Verbose().
var rleVerbose bool
// p is a shorthand for fmt.Printf with beginning and
// trailing newlines. p() makes it easy
// to add diagnostic print statements.
func p(format string, args ...interface{}) {
if rleVerbose {
fmt.Printf("\n"+format+"\n", args...)
}
}
// MaxUint32 is the largest uint32 value.
const MaxUint32 = 4294967295
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
const MaxUint16 = 65535
// searchOptions allows us to accelerate runContainer32.search with
// prior knowledge of (mostly lower) bounds. This is used by Union
// and Intersect.
type searchOptions struct {
// start here instead of at 0
startIndex int64
// upper bound instead of len(rc.iv);
// endxIndex == 0 means ignore the bound and use
// endxIndex == n ==len(rc.iv) which is also
// naturally the default for search()
// when opt = nil.
endxIndex int64
}
// And finds the intersection of rc and b.
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
for i := p.start; i <= p.last; i++ {
if b.Contains(i) {
out.Add(i)
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
if out.Contains(v) {
out.RemoveRange(uint64(v), uint64(v+1))
} else {
out.Add(v)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
out.Add(v)
}
}
return out
}
func showHash(name string, h map[int]bool) {
hv := []int{}
for k := range h {
hv = append(hv, k)
}
sort.Sort(sort.IntSlice(hv))
stringH := ""
for i := range hv {
stringH += fmt.Sprintf("%v, ", hv[i])
}
fmt.Printf("%s is (len %v): %s", name, len(h), stringH)
}
// trial is used in the randomized testing of runContainers
type trial struct {
n int
percentFill float64
ntrial int
// only in the union test
// only subtract test
percentDelete float64
// only in 067 randomized operations
// we do this + 1 passes
numRandomOpsPass int
// allow sampling range control
// only recent tests respect this.
srang *interval16
}
// And finds the intersection of rc and b.
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
for i := p.start; i <= p.last; i++ {
if b.Contains(uint32(i)) {
out.Add(uint32(i))
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
w := uint32(v)
if out.Contains(w) {
out.RemoveRange(uint64(w), uint64(w+1))
} else {
out.Add(w)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
out.Add(uint32(v))
}
}
return out
}
func (rc *runContainer32) and(container) container {
panic("TODO. not yet implemented")
}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer16. This is for the
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
func (rc *runContainer16) serializedSizeInBytes() int {
// number of runs in one unint16, then each run
// needs two more uint16
return 2 + len(rc.iv)*4
}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer32.
func (rc *runContainer32) serializedSizeInBytes() int {
return 4 + len(rc.iv)*8
}
func (rc *runContainer32) equals32(srb *runContainer32) bool {
//p("both rc32")
// Check if the containers are the same object.
if rc == srb {
//p("same object")
return true
}
if len(srb.iv) != len(rc.iv) {
//p("iv len differ")
return false
}
for i, v := range rc.iv {
if v != srb.iv[i] {
//p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i])
return false
}
}
//p("all intervals same, returning true")
return true
}