-
Notifications
You must be signed in to change notification settings - Fork 0
/
MonotonicBlockPackedWriter.java
107 lines (96 loc) · 3.9 KB
/
MonotonicBlockPackedWriter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.packed;
import static org.apache.lucene.util.packed.MonotonicBlockPackedReader.expected;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BitUtil;
/**
* A writer for large monotonically 单调的 increasing sequences of positive longs.
* <p>
* The sequence is divided into fixed-size blocks and for each block, values
* are modeled after a linear function f: x → A × x + B. The block
* encodes deltas from the expected values computed from this function using as
* few bits as possible.
* <p>
* Format:
* <ul>
* <li><BLock><sup>BlockCount</sup>
* <li>BlockCount: ⌈ ValueCount / BlockSize ⌉
* <li>Block: <Header, (Ints)>
* <li>Header: <B, A, BitsPerValue>
* <li>B: the B from f: x → A × x + B using a
* {@link BitUtil#zigZagEncode(long) zig-zag encoded}
* {@link DataOutput#writeVLong(long) vLong}
* <li>A: the A from f: x → A × x + B encoded using
* {@link Float#floatToIntBits(float)} on
* {@link DataOutput#writeInt(int) 4 bytes}
* <li>BitsPerValue: a {@link DataOutput#writeVInt(int) variable-length int}
* <li>Ints: if BitsPerValue is <tt>0</tt>, then there is nothing to read and
* all values perfectly match the result of the function. Otherwise, these
* are the {@link PackedInts packed} deltas from the expected value
* (computed from the function) using exactly BitsPerValue bits per value.
* </ul>
* @see MonotonicBlockPackedReader 单调的
* @lucene.internal
*/
public final class MonotonicBlockPackedWriter extends AbstractBlockPackedWriter {
/**
* Sole constructor.
* @param blockSize the number of values of a single block, must be a power of 2
*/
public MonotonicBlockPackedWriter(DataOutput out, int blockSize) {
super(out, blockSize);
}
@Override
public void add(long l) throws IOException {
assert l >= 0;
super.add(l);
}
protected void flush() throws IOException {
assert off > 0;
// 第一个元素到最后一个元素的斜率, 拟合直线l1
final float avg = off == 1 ? 0f : (float) (values[off - 1] - values[0]) / (off - 1);
long min = values[0];
// adjust min so that all deltas will be positive
// 调整,生成拟合直线l2, 使所有的delta都是正值
for (int i = 1; i < off; ++i) {
final long actual = values[i];
final long expected = expected(min, avg, i);
if (expected > actual) {
min -= (expected - actual);
}
}
long maxDelta = 0;
for (int i = 0; i < off; ++i) {
values[i] = values[i] - expected(min, avg, i);
maxDelta = Math.max(maxDelta, values[i]);
}
// min是最小值,values[i]是斜线上方的真实数据点到拟合直线l2上点的差值
out.writeZLong(min);
out.writeInt(Float.floatToIntBits(avg));
if (maxDelta == 0) {
out.writeVInt(0);
} else {
// 根据最大差值,来确定差值使用几个bit位表示
final int bitsRequired = PackedInts.bitsRequired(maxDelta);
out.writeVInt(bitsRequired);
writeValues(bitsRequired);
}
off = 0;
}
}