forked from ledgerwatch/erigon-lib
/
golomb_rice.go
executable file
·183 lines (158 loc) · 5.75 KB
/
golomb_rice.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*
Copyright 2021 Erigon contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package recsplit
import (
"encoding/binary"
"io"
"math/bits"
"unsafe"
"github.com/gateway-fm/cdk-erigon-lib/common/bitutil"
)
// Optimal Golomb-Rice parameters for leaves
var bijMemo = []uint32{0, 0, 0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 21, 22, 23, 25, 26, 28, 29, 30}
// GolombRice can build up the golomb-rice encoding of the sequeuce of numbers, as well as read the numbers back from it.
type GolombRice struct {
data []uint64 // Present in the builder and in the reader
bitCount int // Speficic to the builder - number of bits added to the encoding so far
}
// appendUnaryAll adds the unary encoding of specified sequence of numbers to the end of the
// current encoding
func (g *GolombRice) appendUnaryAll(unary []uint64) {
bitInc := 0
for _, u := range unary {
// Each number u uses u+1 bits for its unary representation
bitInc += int(u) + 1
}
targetSize := (g.bitCount + bitInc + 63) / 64
for len(g.data) < targetSize {
g.data = append(g.data, 0)
}
for _, u := range unary {
g.bitCount += int(u)
appendPtr := g.bitCount / 64
g.data[appendPtr] |= uint64(1) << (g.bitCount & 63)
g.bitCount++
}
}
// appendFixed encodes the next value using specified Golomb parameter. Since we are using Golomb-Rice encoding,
// all Golomb parameters are powers of two. Therefore we input log2 of golomb parameter, rather than golomn paramter itself,
// for convinience
func (g *GolombRice) appendFixed(v uint64, log2golomb int) {
if log2golomb == 0 {
return
}
lowerBits := v & ((uint64(1) << log2golomb) - 1) // Extract the part of the number that will be encoded using truncated binary encoding
usedBits := g.bitCount & 63 // How many bits of the last element of b.data is used by previous value
targetSize := (g.bitCount + log2golomb + 63) / 64
//fmt.Printf("g.bitCount = %d, log2golomb = %d, targetSize = %d\n", g.bitCount, log2golomb, targetSize)
for len(g.data) < targetSize {
g.data = append(g.data, 0)
}
appendPtr := g.bitCount / 64 // The index in b.data corresponding to the last element used by previous value, or if previous values fits perfectly, the index of the next free element
curWord := g.data[appendPtr]
curWord |= lowerBits << usedBits // curWord now contains the new value potentially combined with the part of the previous value
if usedBits+log2golomb > 64 {
// New value overflows to the next element
g.data[appendPtr] = curWord
appendPtr++
curWord = lowerBits >> (64 - usedBits) // curWord now contains the part of the new value that overflows
}
g.data[appendPtr] = curWord
g.bitCount += log2golomb
}
// Bits returns currrent number of bits in the compact encoding of the hash function representation
func (g *GolombRice) Bits() int {
return g.bitCount
}
func (g *GolombRiceReader) ReadReset(bitPos, unaryOffset int) {
g.currFixedOffset = bitPos
unaryPos := bitPos + unaryOffset
g.currPtrUnary = unaryPos / 64
g.currWindowUnary = g.data[g.currPtrUnary] >> (unaryPos & 63)
g.currPtrUnary++
g.validLowerBitsUnary = 64 - (unaryPos & 63)
}
func (g *GolombRiceReader) SkipSubtree(nodes, fixedLen int) {
if nodes <= 0 {
panic("nodes <= 0")
}
missing := nodes
var cnt int
for cnt = bits.OnesCount64(g.currWindowUnary); cnt < missing; cnt = bits.OnesCount64(g.currWindowUnary) {
g.currWindowUnary = g.data[g.currPtrUnary]
g.currPtrUnary++
missing -= cnt
g.validLowerBitsUnary = 64
}
cnt = bitutil.Select64(g.currWindowUnary, missing-1)
g.currWindowUnary >>= cnt
g.currWindowUnary >>= 1
g.validLowerBitsUnary -= cnt + 1
g.currFixedOffset += fixedLen
}
func (g *GolombRiceReader) ReadNext(log2golomb int) uint64 {
var result uint64
if g.currWindowUnary == 0 {
result += uint64(g.validLowerBitsUnary)
g.currWindowUnary = g.data[g.currPtrUnary]
g.currPtrUnary++
g.validLowerBitsUnary = 64
for g.currWindowUnary == 0 {
result += 64
g.currWindowUnary = g.data[g.currPtrUnary]
g.currPtrUnary++
}
}
pos := bits.TrailingZeros64(g.currWindowUnary)
g.currWindowUnary >>= pos
g.currWindowUnary >>= 1
g.validLowerBitsUnary -= pos + 1
result += uint64(pos)
result <<= log2golomb
idx64 := g.currFixedOffset >> 6
var fixed uint64
shift := g.currFixedOffset & 63
fixed = g.data[idx64] >> shift
if shift+log2golomb > 64 {
fixed |= g.data[idx64+1] << (64 - shift)
}
result |= fixed & ((uint64(1) << log2golomb) - 1)
g.currFixedOffset += log2golomb
return result
}
// Data returns the binary representation of the Golomb-Rice code that is built
func (g *GolombRice) Data() []uint64 {
return g.data
}
const maxDataSize = 0xFFFFFFFFFFFF
// Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read
func (g *GolombRice) Write(w io.Writer) error {
var numBuf [8]byte
binary.BigEndian.PutUint64(numBuf[:], uint64(len(g.data)))
if _, e := w.Write(numBuf[:]); e != nil {
return e
}
p := (*[maxDataSize]byte)(unsafe.Pointer(&g.data[0]))
b := (*p)[:]
if _, e := w.Write(b[:len(g.data)*8]); e != nil {
return e
}
return nil
}
type GolombRiceReader struct {
data []uint64 // Present in the builder and in the reader
currFixedOffset int // Specific to the reader
currWindowUnary uint64
currPtrUnary int
validLowerBitsUnary int
}