/
dict.go
74 lines (64 loc) · 1.74 KB
/
dict.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// Package dict provides lookup data for gnparser.
package dict
import (
"bufio"
"embed"
"fmt"
"github.com/rs/zerolog/log"
)
//go:embed data
var data embed.FS
// Dict contains loaded dictionaries
var Dict *Dictionary = LoadDictionary()
// Dictionary contains dictionaries used for detecting information
// about scientific names
type Dictionary struct {
// Bacteria contains bacterial genera, where boolean value is true if
// we are aware of homonyms from other codes.
Bacteria map[string]bool
// AuthorICN contains family names of ICN authors of genera names.
// This list is used to detect ICN name-strings so we can parse a word in
// parenthesis after genus word as an author instead of subgenus.
AuthorICN map[string]struct{}
}
// LoadDictionary creates dictionary from text files.
func LoadDictionary() *Dictionary {
d := Dictionary{
Bacteria: readBacterialData(),
AuthorICN: readAuthorICNData(),
}
return &d
}
func readBacterialData() map[string]bool {
m := make(map[string]bool)
scanBacterialFile("bacteria_genera.txt", false, m)
scanBacterialFile("bacteria_genera_homonyms.txt", true, m)
return m
}
func readAuthorICNData() map[string]struct{} {
m := make(map[string]struct{})
scanAuthorICNFIle("genera_auth_icn.txt", m)
return m
}
func scanAuthorICNFIle(path string, m map[string]struct{}) {
path = fmt.Sprintf("data/%s", path)
f, err := data.Open(path)
if err != nil {
log.Fatal().Err(err)
}
sc := bufio.NewScanner(f)
for sc.Scan() {
m[sc.Text()] = struct{}{}
}
}
func scanBacterialFile(path string, isHomonym bool, m map[string]bool) {
path = fmt.Sprintf("data/%s", path)
f, err := data.Open(path)
if err != nil {
log.Fatal().Err(err)
}
sc := bufio.NewScanner(f)
for sc.Scan() {
m[sc.Text()] = isHomonym
}
}