forked from canonical/snapd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
grouping.go
286 lines (264 loc) · 8.3 KB
/
grouping.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
// -*- Mode: Go; indent-tabs-mode: t -*-
/*
* Copyright (C) 2020 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package internal
import (
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"sort"
)
// Groupings maintain labels to identify membership to one or more groups.
// Labels are implemented as subsets of integers from 0
// up to an excluded maximum, where the integers represent the groups.
// Assumptions:
// - most labels are for one group or very few
// - a few labels are sparse with more groups in them
// - very few comprise the universe of all groups
type Groupings struct {
n uint
maxGroup uint16
bitsetThreshold uint16
}
// NewGroupings creates a new Groupings supporting labels for membership
// to up n groups. n must be a positive multiple of 16 and <=65536.
func NewGroupings(n int) (*Groupings, error) {
if n <= 0 || n > 65536 {
return nil, fmt.Errorf("n=%d groups is outside of valid range (0, 65536]", n)
}
if n%16 != 0 {
return nil, fmt.Errorf("n=%d groups is not a multiple of 16", n)
}
return &Groupings{n: uint(n), bitsetThreshold: uint16(n / 16)}, nil
}
// N returns up to how many groups are supported.
// That is the value that was passed to NewGroupings.
func (gr *Groupings) N() int {
return int(gr.n)
}
// WithinRange checks whether group is within the admissible range for
// labeling otherwise it returns an error.
func (gr *Groupings) WithinRange(group uint16) error {
if uint(group) >= gr.n {
return fmt.Errorf("group exceeds admissible maximum: %d >= %d", group, gr.n)
}
return nil
}
type Grouping struct {
size uint16
elems []uint16
}
func (g Grouping) Copy() Grouping {
elems2 := make([]uint16, len(g.elems), cap(g.elems))
copy(elems2[:], g.elems[:])
g.elems = elems2
return g
}
// search locates group among the sorted Grouping elements, it returns:
// * true if found
// * false if not found
// * the index at which group should be inserted to keep the
// elements sorted if not found and the bit-set representation is not in use
func (gr *Groupings) search(g *Grouping, group uint16) (found bool, j uint16) {
if g.size > gr.bitsetThreshold {
return bitsetContains(g, group), 0
}
j = uint16(sort.Search(int(g.size), func(i int) bool { return g.elems[i] >= group }))
if j < g.size && g.elems[j] == group {
return true, 0
}
return false, j
}
func bitsetContains(g *Grouping, group uint16) bool {
return (g.elems[group/16] & (1 << (group % 16))) != 0
}
// AddTo adds the given group to the grouping.
func (gr *Groupings) AddTo(g *Grouping, group uint16) error {
if err := gr.WithinRange(group); err != nil {
return err
}
if group > gr.maxGroup {
gr.maxGroup = group
}
if g.size == 0 {
g.size = 1
g.elems = []uint16{group}
return nil
}
found, j := gr.search(g, group)
if found {
return nil
}
newsize := g.size + 1
if newsize > gr.bitsetThreshold {
// switching to a bit-set representation after the size point
// where the space cost is the same, the representation uses
// bitsetThreshold-many 16-bits words stored in elems.
// We don't always use the bit-set representation because
// * we expect small groupings and iteration to be common,
// iteration is more costly over the bit-set representation
// * serialization matches more or less what we do in memory,
// so again is more efficient for small groupings in the
// extensive representation.
if g.size == gr.bitsetThreshold {
prevelems := g.elems
g.elems = make([]uint16, gr.bitsetThreshold)
for _, e := range prevelems {
bitsetAdd(g, e)
}
}
g.size = newsize
bitsetAdd(g, group)
return nil
}
var newelems []uint16
if int(g.size) == cap(g.elems) {
newelems = make([]uint16, newsize, cap(g.elems)*2)
copy(newelems, g.elems[:j])
} else {
newelems = g.elems[:newsize]
}
if j < g.size {
copy(newelems[j+1:], g.elems[j:])
}
// inserting new group at j index keeping the elements sorted
newelems[j] = group
g.size = newsize
g.elems = newelems
return nil
}
func bitsetAdd(g *Grouping, group uint16) {
g.elems[group/16] |= 1 << (group % 16)
}
// Contains returns whether the given group is a member of the grouping.
func (gr *Groupings) Contains(g *Grouping, group uint16) bool {
found, _ := gr.search(g, group)
return found
}
// Serialize produces a string encoding the given integers.
func Serialize(elems []uint16) string {
b := bytes.NewBuffer(make([]byte, 0, len(elems)*2))
binary.Write(b, binary.LittleEndian, elems)
return base64.RawURLEncoding.EncodeToString(b.Bytes())
}
// Serialize produces a string representing the grouping label.
func (gr *Groupings) Serialize(g *Grouping) string {
// groupings are serialized as:
// * the actual element groups if there are up to
// bitsetThreshold elements: elems[0], elems[1], ...
// * otherwise the number of elements, followed by the bitset
// representation comprised of bitsetThreshold-many 16-bits words
// (stored using elems as well)
if g.size > gr.bitsetThreshold {
return gr.bitsetSerialize(g)
}
return Serialize(g.elems)
}
func (gr *Groupings) bitsetSerialize(g *Grouping) string {
b := bytes.NewBuffer(make([]byte, 0, (gr.bitsetThreshold+1)*2))
binary.Write(b, binary.LittleEndian, g.size)
binary.Write(b, binary.LittleEndian, g.elems)
return base64.RawURLEncoding.EncodeToString(b.Bytes())
}
const errSerializedLabelFmt = "invalid serialized grouping label: %v"
// Deserialize reconstructs a grouping out of the serialized label.
func (gr *Groupings) Deserialize(label string) (*Grouping, error) {
b, err := base64.RawURLEncoding.DecodeString(label)
if err != nil {
return nil, fmt.Errorf(errSerializedLabelFmt, err)
}
if len(b)%2 != 0 {
return nil, fmt.Errorf(errSerializedLabelFmt, "not divisible into 16-bits words")
}
m := len(b) / 2
var g Grouping
if m == int(gr.bitsetThreshold+1) {
// deserialize number of elements + bitset representation
// comprising bitsetThreshold-many 16-bits words
return gr.bitsetDeserialize(&g, b)
}
if m > int(gr.bitsetThreshold) {
return nil, fmt.Errorf(errSerializedLabelFmt, "too large")
}
g.size = uint16(m)
esz := uint16(1)
for esz < g.size {
esz *= 2
}
g.elems = make([]uint16, g.size, esz)
binary.Read(bytes.NewBuffer(b), binary.LittleEndian, g.elems)
for i, e := range g.elems {
if e > gr.maxGroup {
return nil, fmt.Errorf(errSerializedLabelFmt, "element larger than maximum group")
}
if i > 0 && g.elems[i-1] >= e {
return nil, fmt.Errorf(errSerializedLabelFmt, "not sorted")
}
}
return &g, nil
}
func (gr *Groupings) bitsetDeserialize(g *Grouping, b []byte) (*Grouping, error) {
buf := bytes.NewBuffer(b)
binary.Read(buf, binary.LittleEndian, &g.size)
if g.size > gr.maxGroup+1 {
return nil, fmt.Errorf(errSerializedLabelFmt, "bitset size cannot be possibly larger than maximum group plus 1")
}
if g.size <= gr.bitsetThreshold {
// should not have used a bitset repr for so few elements
return nil, fmt.Errorf(errSerializedLabelFmt, "bitset for too few elements")
}
g.elems = make([]uint16, gr.bitsetThreshold)
binary.Read(buf, binary.LittleEndian, g.elems)
return g, nil
}
// Iter iterates over the groups in the grouping and calls f with each of
// them. If f returns an error Iter immediately returns with it.
func (gr *Groupings) Iter(g *Grouping, f func(group uint16) error) error {
if g.size > gr.bitsetThreshold {
return gr.bitsetIter(g, f)
}
for _, e := range g.elems {
if err := f(e); err != nil {
return err
}
}
return nil
}
func (gr *Groupings) bitsetIter(g *Grouping, f func(group uint16) error) error {
c := g.size
for i := uint16(0); i <= gr.maxGroup/16; i++ {
w := g.elems[i]
if w == 0 {
continue
}
for j := uint16(0); w != 0; j++ {
if w&1 != 0 {
if err := f(i*16 + j); err != nil {
return err
}
c--
if c == 0 {
// found all elements
return nil
}
}
w >>= 1
}
}
return nil
}