-
Notifications
You must be signed in to change notification settings - Fork 2
/
verify_pointsvisitor.go
173 lines (148 loc) · 5.7 KB
/
verify_pointsvisitor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package bkd
import (
"bytes"
"context"
"errors"
"fmt"
"github.com/bits-and-blooms/bitset"
"github.com/geange/lucene-go/core/types"
)
var _ types.IntersectVisitor = &VerifyPointsVisitor{}
// VerifyPointsVisitor
// Walks the entire N-dimensional points space, verifying that all points fall within the last cell's boundaries.
// lucene.internal
type VerifyPointsVisitor struct {
pointCountSeen int64
lastDocID int
maxDoc int
docsSeen *bitset.BitSet
lastMinPackedValue []byte
lastMaxPackedValue []byte
lastPackedValue []byte
globalMinPackedValue []byte
globalMaxPackedValue []byte
packedBytesCount int
packedIndexBytesCount int
numDataDims int
numIndexDims int
bytesPerDim int
fieldName string
}
func NewVerifyPointsVisitor(fieldName string, maxDoc int, values types.PointValues) (*VerifyPointsVisitor, error) {
numDataDims, err := values.GetNumDimensions()
if err != nil {
return nil, err
}
numIndexDims, err := values.GetNumIndexDimensions()
if err != nil {
return nil, err
}
bytesPerDim, err := values.GetBytesPerDimension()
if err != nil {
return nil, err
}
packedBytesCount := numDataDims * bytesPerDim
packedIndexBytesCount := numIndexDims * bytesPerDim
globalMinPackedValue, err := values.GetMinPackedValue()
if err != nil {
return nil, err
}
globalMaxPackedValue, err := values.GetMaxPackedValue()
if err != nil {
return nil, err
}
docsSeen := bitset.New(uint(maxDoc))
lastMinPackedValue := make([]byte, packedIndexBytesCount)
lastMaxPackedValue := make([]byte, packedIndexBytesCount)
lastPackedValue := make([]byte, packedBytesCount)
return &VerifyPointsVisitor{
pointCountSeen: 0,
lastDocID: -1,
maxDoc: maxDoc,
docsSeen: docsSeen,
lastMinPackedValue: lastMinPackedValue,
lastMaxPackedValue: lastMaxPackedValue,
lastPackedValue: lastPackedValue,
globalMinPackedValue: globalMinPackedValue,
globalMaxPackedValue: globalMaxPackedValue,
packedBytesCount: packedBytesCount,
packedIndexBytesCount: packedIndexBytesCount,
numDataDims: numDataDims,
numIndexDims: numIndexDims,
bytesPerDim: bytesPerDim,
fieldName: fieldName,
}, nil
}
func (v *VerifyPointsVisitor) Visit(ctx context.Context, docID int) error {
return errors.New("not available")
}
func (v *VerifyPointsVisitor) VisitLeaf(ctx context.Context, docID int, packedValue []byte) error {
v.pointCountSeen++
v.docsSeen.Set(uint(docID))
for dim := 0; dim < v.numIndexDims; dim++ {
fromIndex := v.bytesPerDim * dim
toIndex := fromIndex + v.bytesPerDim
// Compare to last cell:
if bytes.Compare(packedValue[fromIndex:toIndex], v.lastMinPackedValue[fromIndex:toIndex]) < 0 {
// This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
return fmt.Errorf("docId=%d, in this dimension, is lower than the minimum value of the last cell checked", docID)
}
if bytes.Compare(packedValue[fromIndex:toIndex], v.lastMaxPackedValue[fromIndex:toIndex]) > 0 {
// This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
return fmt.Errorf("docId=%d, in this dimension, is greater than the maximum value of the last cell checked", docID)
}
}
// In the 1D data case, PointValues must make a single in-order sweep through all values, and tie-break by
// increasing docID:
// for data dimension > 1, leaves are sorted by the dimension with the lowest cardinality to improve block compression
if v.numDataDims == 1 {
cmp := bytes.Compare(v.lastPackedValue[:v.bytesPerDim], packedValue[:v.bytesPerDim])
if cmp >= 0 {
return errors.New("last packed value bigger")
}
copy(v.lastPackedValue, packedValue[:v.bytesPerDim])
v.lastDocID = docID
}
return nil
}
func (v *VerifyPointsVisitor) Compare(minPackedValue, maxPackedValue []byte) types.Relation {
arraycopy(minPackedValue, 0, v.lastMinPackedValue, 0, v.packedIndexBytesCount)
arraycopy(maxPackedValue, 0, v.lastMaxPackedValue, 0, v.packedIndexBytesCount)
bytesPerDim := v.bytesPerDim
globalMinPackedValue := v.globalMinPackedValue
globalMaxPackedValue := v.globalMaxPackedValue
for dim := 0; dim < v.numIndexDims; dim++ {
fromIndex := v.bytesPerDim * dim
toIndex := fromIndex + bytesPerDim
if compareUnsigned(minPackedValue, fromIndex, toIndex, maxPackedValue, fromIndex, toIndex) > 0 {
panic("CheckIndexException")
}
// Make sure this cell is not outside of the global min/max:
if compareUnsigned(minPackedValue, fromIndex, toIndex, globalMinPackedValue, fromIndex, toIndex) < 0 {
panic("CheckIndexException")
}
if compareUnsigned(maxPackedValue, fromIndex, toIndex, globalMinPackedValue, fromIndex, toIndex) < 0 {
panic("CheckIndexException")
}
if compareUnsigned(minPackedValue, fromIndex, toIndex, globalMaxPackedValue, fromIndex, toIndex) > 0 {
panic("CheckIndexException")
}
if compareUnsigned(maxPackedValue, fromIndex, toIndex, globalMaxPackedValue, fromIndex, toIndex) > 0 {
panic("CheckIndexException")
}
if compareUnsigned(maxPackedValue, fromIndex, toIndex, globalMinPackedValue, fromIndex, toIndex) < 0 {
panic("CheckIndexException")
}
if compareUnsigned(minPackedValue, fromIndex, toIndex, globalMaxPackedValue, fromIndex, toIndex) > 0 {
panic("CheckIndexException")
}
if compareUnsigned(maxPackedValue, fromIndex, toIndex, globalMaxPackedValue, fromIndex, toIndex) > 0 {
panic("CheckIndexException")
}
}
// We always pretend the query shape is so complex that it crosses every cell, so
// that packedValue is passed for every document
return types.CELL_CROSSES_QUERY
}
func (v *VerifyPointsVisitor) Grow(count int) {
}