-
Notifications
You must be signed in to change notification settings - Fork 3.6k
/
compressed.go
173 lines (142 loc) · 3.46 KB
/
compressed.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package hll
import "encoding/binary"
// Original author of this file is github.com/clarkduvall/hyperloglog
type iterable interface {
decode(i int, last uint32) (uint32, int)
Len() int
Iter() *iterator
}
type iterator struct {
i int
last uint32
v iterable
}
func (iter *iterator) Next() uint32 {
n, i := iter.v.decode(iter.i, iter.last)
iter.last = n
iter.i = i
return n
}
func (iter *iterator) Peek() uint32 {
n, _ := iter.v.decode(iter.i, iter.last)
return n
}
func (iter iterator) HasNext() bool {
return iter.i < iter.v.Len()
}
type compressedList struct {
count uint32
last uint32
b variableLengthList
}
func (v *compressedList) Clone() *compressedList {
if v == nil {
return nil
}
newV := &compressedList{
count: v.count,
last: v.last,
}
newV.b = make(variableLengthList, len(v.b))
copy(newV.b, v.b)
return newV
}
func (v *compressedList) MarshalBinary() (data []byte, err error) {
// Marshal the variableLengthList
bdata, err := v.b.MarshalBinary()
if err != nil {
return nil, err
}
// At least 4 bytes for the two fixed sized values plus the size of bdata.
data = make([]byte, 0, 4+4+len(bdata))
// Marshal the count and last values.
data = append(data, []byte{
// Number of items in the list.
byte(v.count >> 24),
byte(v.count >> 16),
byte(v.count >> 8),
byte(v.count),
// The last item in the list.
byte(v.last >> 24),
byte(v.last >> 16),
byte(v.last >> 8),
byte(v.last),
}...)
// Append the list
return append(data, bdata...), nil
}
func (v *compressedList) UnmarshalBinary(data []byte) error {
// Set the count.
v.count, data = binary.BigEndian.Uint32(data[:4]), data[4:]
// Set the last value.
v.last, data = binary.BigEndian.Uint32(data[:4]), data[4:]
// Set the list.
sz, data := binary.BigEndian.Uint32(data[:4]), data[4:]
v.b = make([]uint8, sz)
for i := uint32(0); i < sz; i++ {
v.b[i] = uint8(data[i])
}
return nil
}
func newCompressedList(size int) *compressedList {
v := &compressedList{}
v.b = make(variableLengthList, 0, size)
return v
}
func (v *compressedList) Len() int {
return len(v.b)
}
func (v *compressedList) decode(i int, last uint32) (uint32, int) {
n, i := v.b.decode(i, last)
return n + last, i
}
func (v *compressedList) Append(x uint32) {
v.count++
v.b = v.b.Append(x - v.last)
v.last = x
}
func (v *compressedList) Iter() *iterator {
return &iterator{0, 0, v}
}
type variableLengthList []uint8
func (v variableLengthList) MarshalBinary() (data []byte, err error) {
// 4 bytes for the size of the list, and a byte for each element in the
// list.
data = make([]byte, 0, 4+v.Len())
// Length of the list. We only need 32 bits because the size of the set
// couldn't exceed that on 32 bit architectures.
sz := v.Len()
data = append(data, []byte{
byte(sz >> 24),
byte(sz >> 16),
byte(sz >> 8),
byte(sz),
}...)
// Marshal each element in the list.
for i := 0; i < sz; i++ {
data = append(data, byte(v[i]))
}
return data, nil
}
func (v variableLengthList) Len() int {
return len(v)
}
func (v *variableLengthList) Iter() *iterator {
return &iterator{0, 0, v}
}
func (v variableLengthList) decode(i int, last uint32) (uint32, int) {
var x uint32
j := i
for ; v[j]&0x80 != 0; j++ {
x |= uint32(v[j]&0x7f) << (uint(j-i) * 7)
}
x |= uint32(v[j]) << (uint(j-i) * 7)
return x, j + 1
}
func (v variableLengthList) Append(x uint32) variableLengthList {
for x&0xffffff80 != 0 {
v = append(v, uint8((x&0x7f)|0x80))
x >>= 7
}
return append(v, uint8(x&0x7f))
}