Cosme (#202)

ikawaha · Oct 6, 2020 · 95ea551 · 95ea551
1 parent 4d07fe7
commit 95ea551
Show file tree

Hide file tree

Showing 8 changed files with 48 additions and 10 deletions.
diff --git a/.golangci.yml b/.golangci.yml
@@ -0,0 +1,32 @@
+run:
+
+issues:
+  exclude-use-default: false
+  exclude-rules:
+    - path: _test\.go
+      linters:
+        - gocyclo
+
+linters:
+  enable:
+    - gocyclo
+    - staticcheck
+    - govet
+    - golint
+  disable:
+    - maligned
+    - exhaustive
+    - noctx
+    - asciicheck
+    - sqlclosecheck
+    - rowserrcheck
+    - gosec
+    - scopelint
+  presets:
+    - bugs
+    - unused
+    - format
+
+linters-settings:
+  gocyclo:
+    min-complexity: 15
diff --git a/cmd/kagome/server/demo.go b/cmd/kagome/server/demo.go
@@ -15,11 +15,12 @@ import (
 	"github.com/ikawaha/kagome/v2/tokenizer"
 )
 
-// TokenizeDemoHandler represents the tokenizer demo server struct
+// TokenizeDemoHandler represents the tokenizer demo server struct.
 type TokenizeDemoHandler struct {
 	tokenizer *tokenizer.Tokenizer
 }
 
+// ServeHTTP serves a tokenize demo server.
 func (h *TokenizeDemoHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	type record struct {
 		Surface       string

diff --git a/filter/feature.go b/filter/feature.go
@@ -7,7 +7,9 @@ import (
 )
 
 type (
-	Feature  = string
+	// Feature represents a feature.
+	Feature = string
+	// Features represents a vector of features.
 	Features = []string
 )
 

diff --git a/filter/pos.go b/filter/pos.go
@@ -5,14 +5,16 @@ import (
 )
 
 type (
+	// POS represents a part-of-speech that is a vector of features.
 	POS = []string
 )
 
+// POSFilter represents a part-of-speech filter.
 type POSFilter struct {
 	filter *FeaturesFilter
 }
 
-// NewPOSFilter returns a part of speech filter.
+// NewPOSFilter returns a part-of-speech filter.
 func NewPOSFilter(stops ...POS) *POSFilter {
 	return &POSFilter{
 		filter: NewFeaturesFilter(stops...),

diff --git a/filter/sentence_splitter.go b/filter/sentence_splitter.go
@@ -47,8 +47,8 @@ func (s SentenceSplitter) isFollower(r rune) bool {
 	return false
 }
 
-// nolint: gocyclo
 // ScanSentences is a split function for a Scanner that returns each sentence of text.
+// nolint: gocyclo
 func (s SentenceSplitter) ScanSentences(data []byte, atEOF bool) (advance int, token []byte, err error) {
 	if atEOF && len(data) == 0 {
 		return 0, nil, nil

diff --git a/filter/word.go b/filter/word.go
@@ -4,6 +4,7 @@ import (
 	"github.com/ikawaha/kagome/v2/tokenizer"
 )
 
+// WordFilter represents a word filter.
 type WordFilter struct {
 	words map[string]struct{}
 }

diff --git a/tokenizer/token.go b/tokenizer/token.go
@@ -200,6 +200,7 @@ func (t Token) String() string {
 	return fmt.Sprintf("%q (%d: %d, %d) %v [%d]", t.Surface, t.Position, t.Start, t.End, t.Class, t.ID)
 }
 
+// Equal returns true if tokens are equal.
 func (t Token) Equal(v Token) bool {
 	return t.ID == v.ID &&
 		t.Class == v.Class &&

diff --git a/tokenizer/tokenizer.go b/tokenizer/tokenizer.go
@@ -11,6 +11,11 @@ import (
 )
 
 // TokenizeMode represents a mode of tokenize.
+//
+// Kagome has segmentation mode for search such as Kuromoji.
+//    Normal: Regular segmentation
+//    Search: Use a heuristic to do additional segmentation useful for search
+//    Extended: Similar to search mode, but also unigram unknown words
 type TokenizeMode int
 
 func (m TokenizeMode) String() string {
@@ -26,12 +31,6 @@ func (m TokenizeMode) String() string {
 }
 
 const (
-	// Segmentation mode for search
-	// Kagome has segmentation mode for search such as Kuromoji.
-	//    Normal: Regular segmentation
-	//    Search: Use a heuristic to do additional segmentation useful for search
-	//    Extended: Similar to search mode, but also unigram unknown words
-	//
 	// Normal is the normal tokenize mode.
 	Normal TokenizeMode = iota + 1
 	// Search is the tokenize mode for search.