-
Notifications
You must be signed in to change notification settings - Fork 3.5k
/
hash_funcs.go
90 lines (81 loc) · 3.08 KB
/
hash_funcs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hashing
import (
"math/bits"
"unsafe"
"github.com/zeebo/xxh3"
)
func hashInt(val uint64, alg uint64) uint64 {
// Two of xxhash's prime multipliers (which are chosen for their
// bit dispersion properties)
var multipliers = [2]uint64{11400714785074694791, 14029467366897019727}
// Multiplying by the prime number mixes the low bits into the high bits,
// then byte-swapping (which is a single CPU instruction) allows the
// combined high and low bits to participate in the initial hash table index.
return bits.ReverseBytes64(multipliers[alg] * val)
}
func hashFloat32(val float32, alg uint64) uint64 {
// grab the raw byte pattern of the
bt := *(*[4]byte)(unsafe.Pointer(&val))
x := uint64(*(*uint32)(unsafe.Pointer(&bt[0])))
hx := hashInt(x, alg)
hy := hashInt(x, alg^1)
return 4 ^ hx ^ hy
}
func hashFloat64(val float64, alg uint64) uint64 {
bt := *(*[8]byte)(unsafe.Pointer(&val))
hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg)
hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1)
return 8 ^ hx ^ hy
}
// prime constants used for slightly increasing the hash quality further
var exprimes = [2]uint64{1609587929392839161, 9650029242287828579}
// for smaller amounts of bytes this is faster than even calling into
// xxh3 to do the Hash, so we specialize in order to get the benefits
// of that performance.
func Hash(b []byte, alg uint64) uint64 {
n := uint32(len(b))
if n <= 16 {
switch {
case n > 8:
// 8 < length <= 16
// apply same principle as above, but as two 64-bit ints
x := *(*uint64)(unsafe.Pointer(&b[n-8]))
y := *(*uint64)(unsafe.Pointer(&b[0]))
hx := hashInt(x, alg)
hy := hashInt(y, alg^1)
return uint64(n) ^ hx ^ hy
case n >= 4:
// 4 < length <= 8
// we can read the bytes as two overlapping 32-bit ints, apply different
// hash functions to each in parallel
// then xor the results
x := *(*uint32)(unsafe.Pointer(&b[n-4]))
y := *(*uint32)(unsafe.Pointer(&b[0]))
hx := hashInt(uint64(x), alg)
hy := hashInt(uint64(y), alg^1)
return uint64(n) ^ hx ^ hy
case n > 0:
x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1]))
return hashInt(uint64(x), alg)
case n == 0:
return 1
}
}
// increase differentiation enough to improve hash quality
return xxh3.Hash(b) + exprimes[alg]
}