/
heuristic.go
109 lines (89 loc) · 2.3 KB
/
heuristic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package heuristic
import (
"fmt"
"github.com/gnames/gnfinder/dict"
"github.com/gnames/gnfinder/token"
)
// TagTokens is important for both heuristic and Bayes approaches. It analyses
// tokens and sets up token's indices. Indices determine if a token is a
// potential unimonial, binomial or trinomial. Then if fills out signfificant
// number of features pertained to the token.
func TagTokens(ts []token.Token, d *dict.Dictionary) {
l := len(ts)
for i := range ts {
t := &ts[i]
if !t.Features.Capitalized {
continue
}
nameTs := ts[i:token.UpperIndex(i, l)]
token.SetIndices(nameTs, d)
exploreNameCandidate(nameTs, d)
}
}
func exploreNameCandidate(ts []token.Token, d *dict.Dictionary) bool {
u := &ts[0]
if u.Features.UninomialDict == dict.WhiteUninomial ||
(u.Indices.Species == 0 && u.Features.UninomialDict == dict.WhiteGenus) {
u.Decision = token.Uninomial
return true
}
if u.Indices.Species == 0 || u.UninomialDict == dict.BlackUninomial {
return false
}
if ok := checkAsGenusSpecies(ts, d); !ok {
return false
}
if u.Decision.In(token.Binomial, token.PossibleBinomial,
token.BayesBinomial) {
checkInfraspecies(ts, d)
}
return true
}
func checkAsSpecies(t *token.Token, d *dict.Dictionary) bool {
if !t.Capitalized &&
(t.SpeciesDict == dict.WhiteSpecies || t.SpeciesDict == dict.GreySpecies) {
return true
}
return false
}
func checkAsGenusSpecies(ts []token.Token, d *dict.Dictionary) bool {
g := &ts[0]
s := &ts[g.Indices.Species]
if !checkAsSpecies(s, d) {
if g.UninomialDict == dict.WhiteGenus {
g.Decision = token.Uninomial
return true
}
return false
}
if g.UninomialDict == dict.WhiteGenus {
g.Decision = token.Binomial
return true
}
if checkGreyGeneraSp(g, s, d) {
g.Decision = token.Binomial
return true
}
if s.Features.SpeciesDict == dict.WhiteSpecies && !s.Capitalized {
g.Decision = token.PossibleBinomial
return true
}
return false
}
func checkGreyGeneraSp(g *token.Token, s *token.Token,
d *dict.Dictionary) bool {
sp := fmt.Sprintf("%s %s", g.Cleaned, s.Cleaned)
if _, ok := d.GreyGeneraSp[sp]; ok {
return true
}
return false
}
func checkInfraspecies(ts []token.Token, d *dict.Dictionary) {
i := ts[0].Indices.Infraspecies
if i == 0 {
return
}
if checkAsSpecies(&ts[i], d) {
ts[0].Decision = token.Trinomial
}
}