-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.c
101 lines (83 loc) · 2.38 KB
/
utils.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <ctype.h>
#include "utils.h"
/* -------------------------- hash functions -------------------------------- */
/* Thomas Wang's 32 bit Mix Function */
uint32_t generate_integer_hash(uint32_t key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
/* Identity hash function for integer keys */
uint32_t generate_identity_hash(uint32_t key)
{
return key;
}
static uint32_t hash_function_seed = 5381;
void set_hash_function_seed(uint32_t seed)
{
hash_function_seed = seed;
}
uint32_t get_hash_function_seed(void)
{
return hash_function_seed;
}
/* MurmurHash2, by Austin Appleby
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4
*
* And it has a few limitations -
*
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines.
*
* 算法的具体信息可以参考 http://code.google.com/p/smhasher/
*/
uint32_t generate_hash(const void *key, int32_t len)
{
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well. */
uint32_t seed = hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;
while(len >= 4) {
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return (uint32_t)h;
}
/* And a case insensitive hash function (based on djb hash) */
uint32_t generate_case_hash(const unsigned char *buf, int len)
{
uint32_t hash = (uint32_t)hash_function_seed;
while (len--)
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}