-
Notifications
You must be signed in to change notification settings - Fork 0
/
gear.go
52 lines (43 loc) · 1.17 KB
/
gear.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package main
import "math/rand"
const (
// Split mask has 20 rightmost bits set to 1, which correspond to 1M possible values,
// or consequently, to 1MiB average block size.
splitMask = ^uint32(0) &^ ((1 << (32 - 20)) - 1)
// Initial hash. Pseudo random - first digits of square root of 2020.
initialHash = 0x2cf1c4dc
// Minimum and maximum chunk sizes, to avoid degenerative chunks (very small or huge).
minChunkSize = 1 << 16 // 64 KiB
maxChunkSize = 1 << 24 // 16 MiB
)
var (
byteVal [256]uint32
)
func init() {
r := rand.New(rand.NewSource(42))
for i := 0; i < len(byteVal); i++ {
byteVal[i] = r.Uint32()
}
}
// Gear hashing algorithm, see
// https://en.wikipedia.org/wiki/Rolling_hash#Gear_fingerprint_and_content-based_chunking_algorithm_FastCDC
type GearRollingHash struct {
h uint32
written int
}
func NewGearHash() GearRollingHash {
return GearRollingHash{initialHash, 0}
}
func (h *GearRollingHash) FindSplit(data []byte) int {
for i := 0; i < len(data); i++ {
h.h = (h.h << 1) + byteVal[data[i]]
h.written++
if h.written >= maxChunkSize {
return i + 1
}
if h.written >= minChunkSize && (h.h&splitMask == 0) {
return i + 1
}
}
return -1
}