-
-
Notifications
You must be signed in to change notification settings - Fork 53
/
pos.go
54 lines (46 loc) · 1.05 KB
/
pos.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package filter
import (
"github.com/ikawaha/kagome/v2/tokenizer"
)
type (
// POS represents a part-of-speech that is a vector of features.
POS = []string
)
// POSFilter represents a part-of-speech filter.
type POSFilter struct {
filter *FeaturesFilter
}
// NewPOSFilter returns a part-of-speech filter.
func NewPOSFilter(p ...POS) *POSFilter {
return &POSFilter{
filter: NewFeaturesFilter(p...),
}
}
// Match returns true if a filter matches given POS.
func (f POSFilter) Match(p POS) bool {
return f.filter.Match(p)
}
// Drop drops a token if a filter matches token's POS.
func (f POSFilter) Drop(tokens *[]tokenizer.Token) {
f.apply(tokens, true)
}
// Keep keeps a token if a filter matches token's POS.
func (f POSFilter) Keep(tokens *[]tokenizer.Token) {
f.apply(tokens, false)
}
func (f POSFilter) apply(tokens *[]tokenizer.Token, drop bool) {
if tokens == nil {
return
}
tail := 0
for i, v := range *tokens {
if f.Match(v.POS()) == drop {
continue
}
if i != tail {
(*tokens)[tail] = v
}
tail++
}
*tokens = (*tokens)[:tail]
}