-
Notifications
You must be signed in to change notification settings - Fork 54
/
hash_rolling_inc.h
215 lines (181 loc) · 7.19 KB
/
hash_rolling_inc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
/* NOLINT(build/header_guard) */
/* Copyright 2018 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
/* JUMP = skip bytes for speedup */
/* Rolling hash for long distance long string matches. Stores one position
per bucket, bucket key is computed over a long region. */
#define HashRolling HASHER()
static const uint32_t FN(kRollingHashMul32) = 69069;
static const uint32_t FN(kInvalidPos) = 0xffffffff;
/* This hasher uses a longer forward length, but returning a higher value here
will hurt compression by the main hasher when combined with a composite
hasher. The hasher tests for forward itself instead. */
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* Computes a code from a single byte. A lookup table of 256 values could be
used, but simply adding 1 works about as good. */
static uint32_t FN(HashByte)(uint8_t byte) {
return (uint32_t)byte + 1u;
}
static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
uint32_t factor) {
return (uint32_t)(factor * state + FN(HashByte)(add));
}
static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
uint8_t rem, uint32_t factor,
uint32_t factor_remove) {
return (uint32_t)(factor * state +
FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
}
typedef struct HashRolling {
uint32_t state;
uint32_t* table;
size_t next_ix;
uint32_t chunk_len;
uint32_t factor;
uint32_t factor_remove;
} HashRolling;
static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
return (HashRolling*)&(GetHasherCommon(handle)[1]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HashRolling* self = FN(Self)(handle);
size_t i;
self->state = 0;
self->next_ix = 0;
self->factor = FN(kRollingHashMul32);
/* Compute the factor of the oldest byte to remove: factor**steps modulo
0xffffffff (the multiplications rely on 32-bit overflow) */
self->factor_remove = 1;
for (i = 0; i < CHUNKLEN; i += JUMP) {
self->factor_remove *= self->factor;
}
self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
for (i = 0; i < NUMBUCKETS; i++) {
self->table[i] = FN(kInvalidPos);
}
BROTLI_UNUSED(params);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashRolling* self = FN(Self)(handle);
size_t i;
/* Too small size, cannot use this hasher. */
if (input_size < CHUNKLEN) return;
self->state = 0;
for (i = 0; i < CHUNKLEN; i += JUMP) {
self->state = FN(HashRollingFunctionInitial)(
self->state, data[i], self->factor);
}
BROTLI_UNUSED(one_shot);
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
BROTLI_UNUSED(params);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
}
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(data);
BROTLI_UNUSED(mask);
BROTLI_UNUSED(ix);
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(data);
BROTLI_UNUSED(mask);
BROTLI_UNUSED(ix_start);
BROTLI_UNUSED(ix_end);
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ring_buffer_mask) {
/* In this case we must re-initialize the hasher from scratch from the
current position. */
HashRolling* self = FN(Self)(handle);
size_t position_masked;
size_t available = num_bytes;
if ((position & (JUMP - 1)) != 0) {
size_t diff = JUMP - (position & (JUMP - 1));
available = (diff > available) ? 0 : (available - diff);
position += diff;
}
position_masked = position & ring_buffer_mask;
/* wrapping around ringbuffer not handled. */
if (available > ring_buffer_mask - position_masked) {
available = ring_buffer_mask - position_masked;
}
FN(Prepare)(handle, BROTLI_FALSE, available,
ringbuffer + (position & ring_buffer_mask));
self->next_ix = position;
BROTLI_UNUSED(num_bytes);
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(distance_cache);
}
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward, const size_t gap,
const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
HashRolling* self = FN(Self)(handle);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t pos = self->next_ix;
if ((cur_ix & (JUMP - 1)) != 0) return;
/* Not enough lookahead */
if (max_length < CHUNKLEN) return;
for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
uint32_t code = self->state & MASK;
uint8_t rem = data[pos & ring_buffer_mask];
uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
size_t found_ix = FN(kInvalidPos);
self->state = FN(HashRollingFunction)(
self->state, add, rem, self->factor, self->factor_remove);
if (code < NUMBUCKETS) {
found_ix = self->table[code];
self->table[code] = (uint32_t)pos;
if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
/* The cast to 32-bit makes backward distances up to 4GB work even
if cur_ix is above 4GB, despite using 32-bit values in the table. */
size_t backward = (uint32_t)(cur_ix - found_ix);
if (backward <= max_backward) {
const size_t found_ix_masked = found_ix & ring_buffer_mask;
const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
&data[cur_ix_masked],
max_length);
if (len >= 4 && len > out->len) {
score_t score = BackwardReferenceScore(len, backward);
if (score > out->score) {
out->len = len;
out->distance = backward;
out->score = score;
out->len_code_delta = 0;
}
}
}
}
}
}
self->next_ix = cur_ix + JUMP;
/* NOTE: this hasher does not search in the dictionary. It is used as
backup-hasher, the main hasher already searches in it. */
BROTLI_UNUSED(dictionary);
BROTLI_UNUSED(distance_cache);
BROTLI_UNUSED(gap);
BROTLI_UNUSED(max_distance);
}
#undef HashRolling