Skip to content

Commit

Permalink
Cosme (#202)
Browse files Browse the repository at this point in the history
  • Loading branch information
ikawaha committed Oct 6, 2020
1 parent 4d07fe7 commit 95ea551
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 10 deletions.
32 changes: 32 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
run:

issues:
exclude-use-default: false
exclude-rules:
- path: _test\.go
linters:
- gocyclo

linters:
enable:
- gocyclo
- staticcheck
- govet
- golint
disable:
- maligned
- exhaustive
- noctx
- asciicheck
- sqlclosecheck
- rowserrcheck
- gosec
- scopelint
presets:
- bugs
- unused
- format

linters-settings:
gocyclo:
min-complexity: 15
3 changes: 2 additions & 1 deletion cmd/kagome/server/demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ import (
"github.com/ikawaha/kagome/v2/tokenizer"
)

// TokenizeDemoHandler represents the tokenizer demo server struct
// TokenizeDemoHandler represents the tokenizer demo server struct.
type TokenizeDemoHandler struct {
tokenizer *tokenizer.Tokenizer
}

// ServeHTTP serves a tokenize demo server.
func (h *TokenizeDemoHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
type record struct {
Surface string
Expand Down
4 changes: 3 additions & 1 deletion filter/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
)

type (
Feature = string
// Feature represents a feature.
Feature = string
// Features represents a vector of features.
Features = []string
)

Expand Down
4 changes: 3 additions & 1 deletion filter/pos.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ import (
)

type (
// POS represents a part-of-speech that is a vector of features.
POS = []string
)

// POSFilter represents a part-of-speech filter.
type POSFilter struct {
filter *FeaturesFilter
}

// NewPOSFilter returns a part of speech filter.
// NewPOSFilter returns a part-of-speech filter.
func NewPOSFilter(stops ...POS) *POSFilter {
return &POSFilter{
filter: NewFeaturesFilter(stops...),
Expand Down
2 changes: 1 addition & 1 deletion filter/sentence_splitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ func (s SentenceSplitter) isFollower(r rune) bool {
return false
}

// nolint: gocyclo
// ScanSentences is a split function for a Scanner that returns each sentence of text.
// nolint: gocyclo
func (s SentenceSplitter) ScanSentences(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
Expand Down
1 change: 1 addition & 0 deletions filter/word.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/ikawaha/kagome/v2/tokenizer"
)

// WordFilter represents a word filter.
type WordFilter struct {
words map[string]struct{}
}
Expand Down
1 change: 1 addition & 0 deletions tokenizer/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func (t Token) String() string {
return fmt.Sprintf("%q (%d: %d, %d) %v [%d]", t.Surface, t.Position, t.Start, t.End, t.Class, t.ID)
}

// Equal returns true if tokens are equal.
func (t Token) Equal(v Token) bool {
return t.ID == v.ID &&
t.Class == v.Class &&
Expand Down
11 changes: 5 additions & 6 deletions tokenizer/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ import (
)

// TokenizeMode represents a mode of tokenize.
//
// Kagome has segmentation mode for search such as Kuromoji.
// Normal: Regular segmentation
// Search: Use a heuristic to do additional segmentation useful for search
// Extended: Similar to search mode, but also unigram unknown words
type TokenizeMode int

func (m TokenizeMode) String() string {
Expand All @@ -26,12 +31,6 @@ func (m TokenizeMode) String() string {
}

const (
// Segmentation mode for search
// Kagome has segmentation mode for search such as Kuromoji.
// Normal: Regular segmentation
// Search: Use a heuristic to do additional segmentation useful for search
// Extended: Similar to search mode, but also unigram unknown words
//
// Normal is the normal tokenize mode.
Normal TokenizeMode = iota + 1
// Search is the tokenize mode for search.
Expand Down

0 comments on commit 95ea551

Please sign in to comment.