forked from lytics/hll
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sparse.go
134 lines (113 loc) · 2.89 KB
/
sparse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
package hll
import (
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"github.com/golang/snappy"
)
type sparse struct {
buf []byte
lastVal, numElements uint64
}
func newSparse(estimatedCap uint64) *sparse {
return &sparse{
buf: make([]byte, 0, estimatedCap),
lastVal: 0,
numElements: 0,
}
}
func (s *sparse) Copy() *sparse {
if s == nil {
return nil
}
buf := make([]byte, len(s.buf))
copy(buf, s.buf)
return &sparse{
buf: buf,
lastVal: s.lastVal,
numElements: s.numElements,
}
}
func (s *sparse) Add(x uint64) {
delta := x - s.lastVal
// This slice is not strictly necessary, but it saves a lot of complexity. For now, simplicity
// trumps performance in this case.
deltaBuf := make([]byte, binary.MaxVarintLen64)
n := binary.PutUvarint(deltaBuf, delta)
s.buf = append(s.buf, deltaBuf[0:n]...)
s.lastVal = x
s.numElements++
}
func (s *sparse) SizeInBits() uint64 {
return uint64(len(s.buf) * 8)
}
func (s *sparse) SizeInBytes() uint64 {
return uint64(len(s.buf))
}
// Returns a function that can be called repeatedly to yield values from the list.
func (s *sparse) GetIterator() u64It {
rdr := bytes.NewBuffer(s.buf)
var lastDecoded uint64 = 0
return func() (uint64, bool) {
delta, err := binary.ReadUvarint(rdr)
if err != nil {
return 0, false
}
returnVal := lastDecoded + delta
lastDecoded = returnVal
return returnVal, true
}
}
func (s *sparse) GetNumElements() uint64 {
return s.numElements
}
type jsonableSparse struct {
B []byte
L, N uint64
}
func (s *sparse) MarshalJSON() ([]byte, error) {
compressed, err := snappyB64(s.buf)
if err != nil {
return nil, err
}
j := &jsonableSparse{compressed, s.lastVal, s.numElements}
return json.Marshal(j)
}
func (s *sparse) UnmarshalJSON(buf []byte) error {
j := jsonableSparse{}
if err := json.Unmarshal(buf, &j); err != nil {
return err
}
uncompressed, err := unsnappyB64(j.B)
if err != nil {
return err
}
s.buf, s.lastVal, s.numElements = uncompressed, j.L, j.N
return nil
}
// Compress the input using snapp and encode the result using URL-safe base64.
func snappyB64(in []byte) ([]byte, error) {
compressed := snappy.Encode(nil, in)
outBuf := make([]byte, base64.URLEncoding.EncodedLen(len(compressed)))
base64.URLEncoding.Encode(outBuf, compressed)
return outBuf, nil
}
// The inverse of snappyB64.
func unsnappyB64(in []byte) ([]byte, error) {
unBase64ed := make([]byte, base64.URLEncoding.DecodedLen(len(in)))
n, err := base64.URLEncoding.Decode(unBase64ed, in)
if err != nil {
return nil, err
}
uncompressed, err := snappy.Decode(nil, unBase64ed[:n])
if err != nil {
return nil, err
}
// The snappy library returns nil when the output length is zero. Fix it now.
// I filed this bug upstream: https://code.google.com/p/snappy-go/issues/detail?id=6
if uncompressed == nil {
uncompressed = []byte{}
}
return uncompressed, nil
}