/
db_filters.go
78 lines (69 loc) · 1.62 KB
/
db_filters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package bloom
import (
"database/sql"
"fmt"
"github.com/devopsfaith/bloomfilter"
baseBloomfilter "github.com/devopsfaith/bloomfilter/bloomfilter"
"github.com/gnames/gnmatcher/io/dbase"
"github.com/rs/zerolog/log"
)
func (em *exactMatcher) filtersFromDB(path string) error {
db := dbase.NewDB(em.cfg)
log.Info().Msg("Importing lookup data for stemmed canonicals")
cFilter, cSize, err := createFilter(db, "canonical_stems")
if err != nil {
return err
}
em.filters = &bloomFilters{
canonicalStem: cFilter,
canonicalSize: cSize,
}
saveFilters(path, em.filters)
return db.Close()
}
func createFilter(
db *sql.DB,
table string,
) (*baseBloomfilter.Bloomfilter, uint, error) {
var err error
var nilFilter *baseBloomfilter.Bloomfilter
size, err := getFilterSize(db, table)
if err != nil {
return nilFilter, 0, err
}
return newFilter(db, table, size)
}
func getFilterSize(db *sql.DB, table string) (uint, error) {
q := fmt.Sprintf("SELECT COUNT(*) FROM %s", table)
var num uint
row := db.QueryRow(q)
if err := row.Scan(&num); err != nil {
return 0, err
}
return num, nil
}
func newFilter(
db *sql.DB,
table string,
filterSize uint,
) (*baseBloomfilter.Bloomfilter, uint, error) {
var uuid string
cfg := bloomfilter.Config{
N: filterSize,
P: 0.00001,
HashName: bloomfilter.HASHER_OPTIMAL,
}
bf := baseBloomfilter.New(cfg)
q := fmt.Sprintf("SELECT id FROM %s", table)
rows, err := db.Query(q)
if err != nil {
return bf, filterSize, err
}
for rows.Next() {
if err := rows.Scan(&uuid); err != nil {
return bf, filterSize, err
}
bf.Add([]byte(uuid))
}
return bf, filterSize, nil
}