-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom.go
71 lines (61 loc) · 1.52 KB
/
bloom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package bloom
import (
"fmt"
"hash"
"hash/fnv"
"sync"
"github.com/binaek/gocoll/trie"
)
type BloomConfig func(*BloomFilter)
func WithHashers(hashers []hash.Hash) BloomConfig {
return func(bf *BloomFilter) {
bf.hashers = hashers
}
}
// BloomFilter represents a Bloom Filter data structure.
type BloomFilter struct {
trie trie.Tree[struct{}]
hashers []hash.Hash
lock sync.RWMutex
}
// NewBloomFilter creates and returns a new BloomFilter instance.
func NewBloomFilter(config ...BloomConfig) *BloomFilter {
bf := &BloomFilter{
trie: trie.NewConcurrentTree[struct{}](),
hashers: []hash.Hash{fnv.New64(), fnv.New64a()},
}
for _, c := range config {
c(bf)
}
return bf
}
// Add inserts a token into the Bloom Filter.
func (bf *BloomFilter) Add(token string) {
hashes := bf.getHashes(token)
for _, hash := range hashes {
bf.trie.InsertB(hash, struct{}{} /* add an empty struct */)
}
}
// Test checks if a token is possibly in the Bloom Filter.
func (bf *BloomFilter) Test(token string) bool {
hashes := bf.getHashes(token)
for _, hash := range hashes {
if _, found := bf.trie.Find(string(hash)); !found {
return false
}
}
return true
}
func (bf *BloomFilter) getHashes(token string) [][]byte {
bf.lock.Lock()
defer bf.lock.Unlock()
hashes := make([][]byte, len(bf.hashers))
for i, hasher := range bf.hashers {
hasher.Reset()
hasher.Write([]byte(token))
// add a salt to reduce the chance of hash collision
hasher.Write([]byte(fmt.Sprint(i)))
hashes[i] = hasher.Sum(nil)
}
return hashes
}