/
filter.go
52 lines (40 loc) · 1.57 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// Package stemmer offers the Snowball stemmer in several languages
package stemmer
import (
"github.com/clipperhouse/jargon"
"github.com/clipperhouse/jargon/filters/mapper"
"github.com/kljensen/snowball/english"
"github.com/kljensen/snowball/french"
"github.com/kljensen/snowball/norwegian"
"github.com/kljensen/snowball/russian"
"github.com/kljensen/snowball/spanish"
"github.com/kljensen/snowball/swedish"
)
// English is a Snowball stemmer for English, implemented as a jargon.Filter
var English jargon.Filter = newStemmer(english.Stem)
// French is a Snowball stemmer for French, implemented as a jargon.Filter
var French = newStemmer(french.Stem)
// Norwegian is a Snowball stemmer for Norwegian, implemented as a jargon.Filter
var Norwegian = newStemmer(norwegian.Stem)
// Russian is a Snowball stemmer for Russian, implemented as a jargon.Filter
var Russian = newStemmer(russian.Stem)
// Spanish is a Snowball stemmer for Spanish, implemented as a jargon.Filter
var Spanish = newStemmer(spanish.Stem)
// Swedish is a Snowball stemmer for Swedish, implemented as a jargon.Filter
var Swedish = newStemmer(swedish.Stem)
// newStemmer creates a new stemmer
func newStemmer(stem func(string, bool) string) jargon.Filter {
f := func(token *jargon.Token) *jargon.Token {
// Only interested in stemming words
if token.IsPunct() || token.IsSpace() {
return token
}
stemmed := stem(token.String(), true)
if stemmed == token.String() {
// Had no effect, send back the original
return token
}
return jargon.NewToken(stemmed, true)
}
return mapper.NewFilter(f)
}