/
split_words.go
244 lines (227 loc) · 5.25 KB
/
split_words.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
// Insight 0+0 [ 洞悉 0+0 ]
// InDimensions Construct Source [ 忆黛蒙逝·建造源 ] -> idcsource@gmail.com
// Stephen Fire Meditation Qin [ 火志溟 ] -> firemeditation@gmail.com
// This source code is governed by GNU LGPL v3 license
// Insight 0+0各包共同使用的辅助函数
package base
import (
"fmt"
"regexp"
"strings"
)
// Split the string, if trim is true, the split with out space.
func CommandSplit(command string, trim bool) (split []string, err error) {
command = strings.TrimSpace(command)
regexpm := make(map[string]*regexp.Regexp)
regexpm["space"], _ = regexp.Compile(`[^ ]+`)
regexpm["'b"], _ = regexp.Compile(`^'`)
regexpm["b'"], _ = regexp.Compile(`'$`)
regexpm["''"], _ = regexp.Compile(`^\'`)
crune := []rune(command)
//crunelen := len(crune)
split = make([]string, 0)
temprune := make([]rune, 0)
inquot := false // 是否在引号里
for i, onerune := range crune {
if inquot == true {
// 在引号里怎么处理
if onerune == '"' && crune[i-1] == '\\' {
// 转义字符 \"
temprune[len(temprune)-1] = '"'
} else if onerune == '"' {
// 引号结尾 "
split = append(split, string(temprune))
temprune = make([]rune, 0)
inquot = false
} else {
// 正常字符
temprune = append(temprune, onerune)
}
} else {
// 不在引号里
if onerune == '"' {
// 碰到引号怎么办
inquot = true
} else if onerune == ' ' && trim == false {
// 如果是空格
if len(temprune) == 0 {
split = append(split, string(temprune))
temprune = make([]rune, 0)
} else {
split = append(split, string(temprune))
temprune = make([]rune, 0)
split = append(split, string(temprune))
}
} else if onerune == ' ' {
if len(temprune) != 0 {
split = append(split, string(temprune))
temprune = make([]rune, 0)
}
} else {
// 正常字符
temprune = append(temprune, onerune)
}
}
}
if inquot == true {
err = fmt.Errorf("Command syntax error.")
return
}
if len(temprune) != 0 {
split = append(split, string(temprune))
}
return
}
// 将提供的字符串进行拆分词语处理
func SplitWords(str string) (normal [][]string) {
strn := []rune(str)
var tmpstring string
tempslice := make([]string, 0)
for _, one := range strn {
// 碰到段落就新建一个切片
if subsection(one) == true {
if len(tmpstring) > 0 {
tmpstring = optDot(tmpstring)
tempslice = append(tempslice, tmpstring)
tmpstring = ""
}
if len(tempslice) > 0 {
normal = append(normal, tempslice)
tempslice = make([]string, 0)
}
continue
}
// 如果碰到的是单字节的字,并且不是空格,就加入临时字符串
if len([]byte(string(one))) == 1 && string(one) != " " {
tmpstring += string(one)
continue
}
// 如果碰到空格,如果临时字符串里有东西,则加入临时切片
if string(one) == " " || string(one) == "\n" || string(one) == "\r" || string(one) == "\t" {
if len(tmpstring) > 0 {
tmpstring = optDot(tmpstring)
tempslice = append(tempslice, tmpstring)
tmpstring = ""
}
continue
}
// 普通的字符,就直接加入临时切片
tempslice = append(tempslice, string(one))
}
if len(tmpstring) > 0 {
tempslice = append(tempslice, tmpstring)
}
if len(tempslice) > 0 {
normal = append(normal, tempslice)
}
return
}
// 处理最后的标点
func optDot(str string) string {
str = strings.Trim(str, ".")
str = strings.Trim(str, ",")
str = strings.Trim(str, "!")
return str
}
// 划分段落
func subsection(str rune) bool {
pun := []string{
"。",
" ",
"·",
",",
"!",
";",
";",
"?",
":",
"、",
"“",
"”",
"\"",
"'",
"<",
">",
"《",
"》",
"(",
")",
"(",
")",
"…",
"}",
"{",
"\n",
"\r",
"\t",
}
strs := string(str)
for _, one := range pun {
if one == strs {
return true
}
}
return false
}
func strpos(str, substr string) int {
// 子串在字符串的字节位置
result := strings.Index(str, substr)
if result >= 0 {
// 获得子串之前的字符串并转换成[]byte
prefix := []byte(str)[0:result]
// 将子串之前的字符串转换成[]rune
rs := []rune(string(prefix))
// 获得子串之前的字符串的长度,便是子串在字符串的字符位置
result = len(rs)
}
return result
}
// EncodeSalt 加盐
func EncodeSalt(str, stream, salt string) string {
salt = GetSha1Sum(salt)
tmpStream := ""
lockLen := len(stream)
j := 0
k := 0
streamb := []byte(stream)
for i := 0; i < len(str); i++ {
if k == len(salt) {
k = 0
}
strb := []byte(str)
stri := strb[i]
saltb := []byte(salt)
saltk := saltb[k]
j = (strpos(stream, string(stri)) + int(saltk)) % (lockLen)
streamj := streamb[j]
tmpStream += string(streamj)
k++
}
return tmpStream
}
// DecodeSalt 解盐
func DecodeSalt(str, stream, salt string) string {
salt = GetSha1Sum(salt)
tmpStream := ""
lockLen := len(stream)
j := 0
k := 0
streamb := []byte(stream)
for i := 0; i < len(str); i++ {
if k == len(salt) {
k = 0
}
strb := []byte(str)
stri := strb[i]
saltb := []byte(salt)
saltk := saltb[k]
j = strpos(stream, string(stri)) - int(saltk)
for j < 0 {
j = j + lockLen
}
streamj := streamb[j]
tmpStream += string(streamj)
k++
}
return tmpStream
}