/
filters.go
60 lines (49 loc) · 1.63 KB
/
filters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// package bloom creates and serves bloom filters for stemmed canonical names,
// and names of viruses. The filters are persistent throughout the life of the
// program. The filters are used to find exact matches to the database data
// fast.
package bloom
import (
"sync"
baseBloomfilter "github.com/devopsfaith/bloomfilter/bloomfilter"
"github.com/rs/zerolog/log"
)
// Names of the files to create cache of bloom filters.
const (
canonicalStemFile = "canonical_stems.bf"
virusFile = "viruses.bf"
sizesFile = "canonical_sizes.csv"
)
// bloomFilters contain bloom filters data we use for matching.
type bloomFilters struct {
// canonicalStem is a filter for matching with canonicalStem names.
canonicalStem *baseBloomfilter.Bloomfilter
// canonicalSize is number of entries in 'simple' canonical filter. It is
// used as an option during Canonical filter creation.
canonicalSize uint
// mux is a mutex for thread-safe operations
mux sync.Mutex
}
// getFilters returns bloom filters for name-string matching.
// If filters had been already created before, it just returns them.
// Otherwise it creates filters from either database, or from cached files.
// Creating filters from cache is significantly faster.
func (em *exactMatcher) getFilters() {
path := em.cfg.FiltersDir()
var err error
if em.filters != nil {
return
}
err = em.filtersFromCache(path)
if err != nil {
log.Fatal().Err(err).Msgf("Cannot create filters at %s from cache", path)
}
if em.filters != nil {
return
}
err = em.filtersFromDB(path)
if err != nil {
log.Fatal().Err(err).
Msgf("Cannot create filters at %s from database", path)
}
}