Skip to content
Newer
Older
100644 375 lines (330 sloc) 9.07 KB
ff25e3b @tudor Add Bob Jenkins's SpookyHash to folly.
tudor authored
1 /*
2 * Copyright 2012 Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // Spooky Hash
18 // A 128-bit noncryptographic hash, for checksums and table lookup
19 // By Bob Jenkins. Public domain.
20 // Oct 31 2010: published framework, disclaimer ShortHash isn't right
21 // Nov 7 2010: disabled ShortHash
22 // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
23 // April 10 2012: buffer overflow on platforms without unaligned reads
24 // July 12 2012: was passing out variables in final to in/out in short
25 // July 30 2012: I reintroduced the buffer overflow
26
27 #include "folly/SpookyHash.h"
28
29 #include <cstring>
30
31 #define ALLOW_UNALIGNED_READS 1
32
33 namespace folly {
34 namespace hash {
35
36 //
37 // short hash ... it could be used on any message,
38 // but it's used by Spooky just for short messages.
39 //
40 void SpookyHash::Short(
41 const void *message,
42 size_t length,
43 uint64_t *hash1,
44 uint64_t *hash2)
45 {
46 uint64_t buf[2*sc_numVars];
47 union
48 {
49 const uint8_t *p8;
50 uint32_t *p32;
51 uint64_t *p64;
52 size_t i;
53 } u;
54
55 u.p8 = (const uint8_t *)message;
56
57 if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
58 {
59 memcpy(buf, message, length);
60 u.p64 = buf;
61 }
62
63 size_t remainder = length%32;
64 uint64_t a=*hash1;
65 uint64_t b=*hash2;
66 uint64_t c=sc_const;
67 uint64_t d=sc_const;
68
69 if (length > 15)
70 {
71 const uint64_t *end = u.p64 + (length/32)*4;
72
73 // handle all complete sets of 32 bytes
74 for (; u.p64 < end; u.p64 += 4)
75 {
76 c += u.p64[0];
77 d += u.p64[1];
78 ShortMix(a,b,c,d);
79 a += u.p64[2];
80 b += u.p64[3];
81 }
82
83 //Handle the case of 16+ remaining bytes.
84 if (remainder >= 16)
85 {
86 c += u.p64[0];
87 d += u.p64[1];
88 ShortMix(a,b,c,d);
89 u.p64 += 2;
90 remainder -= 16;
91 }
92 }
93
94 // Handle the last 0..15 bytes, and its length
95 d = ((uint64_t)length) << 56;
96 switch (remainder)
97 {
98 case 15:
99 d += ((uint64_t)u.p8[14]) << 48;
100 case 14:
101 d += ((uint64_t)u.p8[13]) << 40;
102 case 13:
103 d += ((uint64_t)u.p8[12]) << 32;
104 case 12:
105 d += u.p32[2];
106 c += u.p64[0];
107 break;
108 case 11:
109 d += ((uint64_t)u.p8[10]) << 16;
110 case 10:
111 d += ((uint64_t)u.p8[9]) << 8;
112 case 9:
113 d += (uint64_t)u.p8[8];
114 case 8:
115 c += u.p64[0];
116 break;
117 case 7:
118 c += ((uint64_t)u.p8[6]) << 48;
119 case 6:
120 c += ((uint64_t)u.p8[5]) << 40;
121 case 5:
122 c += ((uint64_t)u.p8[4]) << 32;
123 case 4:
124 c += u.p32[0];
125 break;
126 case 3:
127 c += ((uint64_t)u.p8[2]) << 16;
128 case 2:
129 c += ((uint64_t)u.p8[1]) << 8;
130 case 1:
131 c += (uint64_t)u.p8[0];
132 break;
133 case 0:
134 c += sc_const;
135 d += sc_const;
136 }
137 ShortEnd(a,b,c,d);
138 *hash1 = a;
139 *hash2 = b;
140 }
141
142
143
144
145 // do the whole hash in one call
146 void SpookyHash::Hash128(
147 const void *message,
148 size_t length,
149 uint64_t *hash1,
150 uint64_t *hash2)
151 {
152 if (length < sc_bufSize)
153 {
154 Short(message, length, hash1, hash2);
155 return;
156 }
157
158 uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
159 uint64_t buf[sc_numVars];
160 uint64_t *end;
161 union
162 {
163 const uint8_t *p8;
164 uint64_t *p64;
165 size_t i;
166 } u;
167 size_t remainder;
168
169 h0=h3=h6=h9 = *hash1;
170 h1=h4=h7=h10 = *hash2;
171 h2=h5=h8=h11 = sc_const;
172
173 u.p8 = (const uint8_t *)message;
174 end = u.p64 + (length/sc_blockSize)*sc_numVars;
175
176 // handle all whole sc_blockSize blocks of bytes
177 if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
178 {
179 while (u.p64 < end)
180 {
181 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
182 u.p64 += sc_numVars;
183 }
184 }
185 else
186 {
187 while (u.p64 < end)
188 {
189 memcpy(buf, u.p64, sc_blockSize);
190 Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
191 u.p64 += sc_numVars;
192 }
193 }
194
195 // handle the last partial block of sc_blockSize bytes
196 remainder = (length - ((const uint8_t *)end-(const uint8_t *)message));
197 memcpy(buf, end, remainder);
198 memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder);
199 ((uint8_t *)buf)[sc_blockSize-1] = remainder;
200 Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
201
202 // do some final mixing
203 End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
204 *hash1 = h0;
205 *hash2 = h1;
206 }
207
208
209
210 // init spooky state
211 void SpookyHash::Init(uint64_t seed1, uint64_t seed2)
212 {
213 m_length = 0;
214 m_remainder = 0;
215 m_state[0] = seed1;
216 m_state[1] = seed2;
217 }
218
219
220 // add a message fragment to the state
221 void SpookyHash::Update(const void *message, size_t length)
222 {
223 uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
224 size_t newLength = length + m_remainder;
225 uint8_t remainder;
226 union
227 {
228 const uint8_t *p8;
229 uint64_t *p64;
230 size_t i;
231 } u;
232 const uint64_t *end;
233
234 // Is this message fragment too short? If it is, stuff it away.
235 if (newLength < sc_bufSize)
236 {
237 memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
238 m_length = length + m_length;
239 m_remainder = (uint8_t)newLength;
240 return;
241 }
242
243 // init the variables
244 if (m_length < sc_bufSize)
245 {
246 h0=h3=h6=h9 = m_state[0];
247 h1=h4=h7=h10 = m_state[1];
248 h2=h5=h8=h11 = sc_const;
249 }
250 else
251 {
252 h0 = m_state[0];
253 h1 = m_state[1];
254 h2 = m_state[2];
255 h3 = m_state[3];
256 h4 = m_state[4];
257 h5 = m_state[5];
258 h6 = m_state[6];
259 h7 = m_state[7];
260 h8 = m_state[8];
261 h9 = m_state[9];
262 h10 = m_state[10];
263 h11 = m_state[11];
264 }
265 m_length = length + m_length;
266
267 // if we've got anything stuffed away, use it now
268 if (m_remainder)
269 {
270 uint8_t prefix = sc_bufSize-m_remainder;
271 memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
272 u.p64 = m_data;
273 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
274 Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
275 u.p8 = ((const uint8_t *)message) + prefix;
276 length -= prefix;
277 }
278 else
279 {
280 u.p8 = (const uint8_t *)message;
281 }
282
283 // handle all whole blocks of sc_blockSize bytes
284 end = u.p64 + (length/sc_blockSize)*sc_numVars;
285 remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
286 if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
287 {
288 while (u.p64 < end)
289 {
290 Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
291 u.p64 += sc_numVars;
292 }
293 }
294 else
295 {
296 while (u.p64 < end)
297 {
298 memcpy(m_data, u.p8, sc_blockSize);
299 Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
300 u.p64 += sc_numVars;
301 }
302 }
303
304 // stuff away the last few bytes
305 m_remainder = remainder;
306 memcpy(m_data, end, remainder);
307
308 // stuff away the variables
309 m_state[0] = h0;
310 m_state[1] = h1;
311 m_state[2] = h2;
312 m_state[3] = h3;
313 m_state[4] = h4;
314 m_state[5] = h5;
315 m_state[6] = h6;
316 m_state[7] = h7;
317 m_state[8] = h8;
318 m_state[9] = h9;
319 m_state[10] = h10;
320 m_state[11] = h11;
321 }
322
323
324 // report the hash for the concatenation of all message fragments so far
325 void SpookyHash::Final(uint64_t *hash1, uint64_t *hash2)
326 {
327 // init the variables
328 if (m_length < sc_bufSize)
329 {
330 *hash1 = m_state[0];
331 *hash2 = m_state[1];
332 Short( m_data, m_length, hash1, hash2);
333 return;
334 }
335
336 const uint64_t *data = (const uint64_t *)m_data;
337 uint8_t remainder = m_remainder;
338
339 uint64_t h0 = m_state[0];
340 uint64_t h1 = m_state[1];
341 uint64_t h2 = m_state[2];
342 uint64_t h3 = m_state[3];
343 uint64_t h4 = m_state[4];
344 uint64_t h5 = m_state[5];
345 uint64_t h6 = m_state[6];
346 uint64_t h7 = m_state[7];
347 uint64_t h8 = m_state[8];
348 uint64_t h9 = m_state[9];
349 uint64_t h10 = m_state[10];
350 uint64_t h11 = m_state[11];
351
352 if (remainder >= sc_blockSize)
353 {
354 // m_data can contain two blocks; handle any whole first block
355 Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
356 data += sc_numVars;
357 remainder -= sc_blockSize;
358 }
359
360 // mix in the last partial block, and the length mod sc_blockSize
361 memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder));
362
363 ((uint8_t *)data)[sc_blockSize-1] = remainder;
364 Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
365
366 // do some final mixing
367 End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
368
369 *hash1 = h0;
370 *hash2 = h1;
371 }
372
373 } // namespace hash
374 } // namespace folly
Something went wrong with that request. Please try again.