-
Notifications
You must be signed in to change notification settings - Fork 3.5k
/
boolean_decoder.go
102 lines (90 loc) · 3.53 KB
/
boolean_decoder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package encoding
import (
"github.com/apache/arrow/go/v8/arrow/bitutil"
shared_utils "github.com/apache/arrow/go/v8/internal/utils"
"github.com/apache/arrow/go/v8/parquet"
"github.com/apache/arrow/go/v8/parquet/internal/utils"
"golang.org/x/xerrors"
)
// PlainBooleanDecoder is for the Plain Encoding type, there is no
// dictionary decoding for bools.
type PlainBooleanDecoder struct {
decoder
bitOffset int
}
// Type for the PlainBooleanDecoder is parquet.Types.Boolean
func (PlainBooleanDecoder) Type() parquet.Type {
return parquet.Types.Boolean
}
// Decode fills out with bools decoded from the data at the current point
// or until we reach the end of the data.
//
// Returns the number of values decoded
func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
max := shared_utils.MinInt(len(out), dec.nvals)
unalignedExtract := func(start, end, curBitOffset int) int {
i := start
for ; curBitOffset < end && i < max; i, curBitOffset = i+1, curBitOffset+1 {
out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0
}
return i // return the number of bits we extracted
}
// if we aren't at a byte boundary, then get bools until we hit
// a byte boundary with the bit offset.
i := 0
if dec.bitOffset != 0 {
i = unalignedExtract(0, 8, dec.bitOffset)
dec.bitOffset = (dec.bitOffset + i) % 8
}
// determine the number of full bytes worth of bits we can decode
// given the number of values we want to decode.
bitsRemain := max - i
batch := bitsRemain / 8 * 8
if batch > 0 { // only go in here if there's at least one full byte to decode
if i > 0 { // skip our data forward if we decoded anything above
dec.data = dec.data[1:]
out = out[i:]
}
// determine the number of aligned bytes we can grab using SIMD optimized
// functions to improve performance.
alignedBytes := bitutil.BytesForBits(int64(batch))
utils.BytesToBools(dec.data[:alignedBytes], out)
dec.data = dec.data[alignedBytes:]
out = out[alignedBytes*8:]
}
// grab any trailing bits now that we've got our aligned bytes.
dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset)
dec.nvals -= max
return max, nil
}
// DecodeSpaced is like Decode except it expands the values to leave spaces for null
// as determined by the validBits bitmap.
func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
if nullCount > 0 {
toRead := len(out) - nullCount
valuesRead, err := dec.Decode(out[:toRead])
if err != nil {
return 0, err
}
if valuesRead != toRead {
return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
}
return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
}
return dec.Decode(out)
}