-
Notifications
You must be signed in to change notification settings - Fork 0
/
spell.go
160 lines (140 loc) · 3.71 KB
/
spell.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright (c) 2018, The GoKi Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package spell provides functions for spell check and correction
*/
package spell
import (
"bufio"
"errors"
"log"
"os"
"regexp"
"strings"
"github.com/sajari/fuzzy"
)
type EditData struct {
NewText string `desc:"spelling correction text after special edits if needed"`
}
///////////////////////////////////////////////////////////////////////////////
// spell check returning suggestions using github.com/sajari/fuzzy
// EditFunc is passed the current word and the selected correction for text editing.
type EditFunc func(data interface{}, new string, old string) EditData
var inited bool
var model *fuzzy.Model
var Ignore []string
// Initialized returns true if the model has been loaded or created anew
func Initialized() bool {
return inited
}
// Load loads the saved model stored in json format
func Load(path string) (err error) {
model, err = fuzzy.Load(path)
if err == nil {
inited = true
}
return err
}
// Save saves the spelling model which includes the data and parameters
func Save(filename string) error {
if model == nil {
return nil
}
return model.Save(filename)
}
func Train(file os.File, new bool) (err error) {
var out []string
reader := bufio.NewReader(&file)
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
// Count the words.
count := 0
for scanner.Scan() {
exp, _ := regexp.Compile("[a-zA-Z]+")
words := exp.FindAll([]byte(scanner.Text()), -1)
for _, word := range words {
if len(word) > 1 {
out = append(out, strings.ToLower(string(word)))
count++
}
}
}
if err = scanner.Err(); err != nil {
log.Println(os.Stderr, "reading input: ", err)
return err
}
if new {
model = fuzzy.NewModel()
}
model.Train(out)
inited = true
return err
}
// CheckWord checks a single word and returns suggestions if word is unknown
// Programs should call gi.CheckWord - all program calls should be done through that single API
func CheckWord(w string) (suggests []string, known bool, err error) {
if model == nil {
err = errors.New("Model not initialized")
return suggests, false, err
}
known = false
w = strings.Trim(w, "`'*.,?[]():;")
w = strings.ToLower(w)
suggests = model.SpellCheckSuggestions(w, 10)
if suggests == nil {
return nil, known, err // known is false
}
if len(suggests) > 0 && suggests[0] == w {
known = true
}
return suggests, known, err
}
// LearnWord adds a single word to the corpus
func LearnWord(word string) {
model.TrainWord(strings.ToLower(word))
}
// Complete finds possible completions based on the prefix s
func Complete(s string) (result []string, err error) {
if model == nil {
return result, errors.New("Model is nil")
}
result, err = model.Autocomplete(s)
return result, err
}
// CorrectText replaces the old unknown word with the new word chosen from the list of corrections
// delta is the change in cursor position (cp).
func CorrectText(old string, new string) (ed EditData) {
// do what is possible to keep the casing of old string
oldlc := strings.ToLower(old)
min := len(old)
if len(new) < len(old) {
min = len(new)
}
var new2 []byte
var i int
for i = 0; i < min; i++ {
if oldlc[i] != new[i] {
break
}
new2 = append(new2, byte(old[i]))
}
for j := i; j < len(new); j++ {
new2 = append(new2, byte(new[j]))
}
ed.NewText = string(new2)
return ed
}
// IgnoreWord adds the word to the Ignore list
func IgnoreWord(word string) {
Ignore = append(Ignore, word)
}
// DoIgnore returns true if the word is found in the Ignore list
func DoIgnore(word string) bool {
for _, w := range Ignore {
if w == word {
return true
}
}
return false
}