-
Notifications
You must be signed in to change notification settings - Fork 178
/
strings.go
159 lines (137 loc) · 4.59 KB
/
strings.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright (c) 2012-2014 Jeremy Latt
// Copyright (c) 2014-2015 Edmund Huber
// Copyright (c) 2016-2017 Daniel Oaks <daniel@danieloaks.net>
// released under the MIT license
package irc
import (
"strings"
"github.com/oragono/confusables"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/secure/precis"
"golang.org/x/text/width"
)
const (
casemappingName = "rfc8265"
)
// Each pass of PRECIS casefolding is a composition of idempotent operations,
// but not idempotent itself. Therefore, the spec says "do it four times and hope
// it converges" (lolwtf). Golang's PRECIS implementation has a "repeat" option,
// which provides this functionality, but unfortunately it's not exposed publicly.
func iterateFolding(profile *precis.Profile, oldStr string) (str string, err error) {
str = oldStr
// follow the stabilizing rules laid out here:
// https://tools.ietf.org/html/draft-ietf-precis-7564bis-10.html#section-7
for i := 0; i < 4; i++ {
str, err = profile.CompareKey(str)
if err != nil {
return "", err
}
if oldStr == str {
break
}
oldStr = str
}
if oldStr != str {
return "", errCouldNotStabilize
}
return str, nil
}
// Casefold returns a casefolded string, without doing any name or channel character checks.
func Casefold(str string) (string, error) {
return iterateFolding(precis.UsernameCaseMapped, str)
}
// CasefoldChannel returns a casefolded version of a channel name.
func CasefoldChannel(name string) (string, error) {
if len(name) == 0 {
return "", errStringIsEmpty
}
// don't casefold the preceding #'s
var start int
for start = 0; start < len(name) && name[start] == '#'; start += 1 {
}
if start == 0 {
// no preceding #'s
return "", errInvalidCharacter
}
lowered, err := Casefold(name[start:])
if err != nil {
return "", err
}
// space can't be used
// , is used as a separator
// * is used in mask matching
// ? is used in mask matching
if strings.ContainsAny(lowered, " ,*?") {
return "", errInvalidCharacter
}
return name[:start] + lowered, err
}
// CasefoldName returns a casefolded version of a nick/user name.
func CasefoldName(name string) (string, error) {
lowered, err := Casefold(name)
if err != nil {
return "", err
} else if len(lowered) == 0 {
return "", errStringIsEmpty
}
// space can't be used
// , is used as a separator
// * is used in mask matching
// ? is used in mask matching
// . denotes a server name
// ! separates nickname from username
// @ separates username from hostname
// : means trailing
// # is a channel prefix
// ~&@%+ are channel membership prefixes
// - I feel like disallowing
if strings.ContainsAny(lowered, " ,*?.!@:") || strings.ContainsAny(string(lowered[0]), "#~&@%+-") {
return "", errInvalidCharacter
}
return lowered, err
}
// returns true if the given name is a valid ident, using a mix of Insp and
// Chary's ident restrictions.
func isIdent(name string) bool {
if len(name) < 1 {
return false
}
for i := 0; i < len(name); i++ {
chr := name[i]
if (chr >= 'a' && chr <= 'z') || (chr >= 'A' && chr <= 'Z') || (chr >= '0' && chr <= '9') {
continue // alphanumerics
}
if i == 0 {
return false // first char must be alnum
}
switch chr {
case '[', '\\', ']', '^', '_', '{', '|', '}', '-', '.', '`':
continue // allowed chars
default:
return false // disallowed chars
}
}
return true
}
// Skeleton produces a canonicalized identifier that tries to catch
// homoglyphic / confusable identifiers. It's a tweaked version of the TR39
// skeleton algorithm. We apply the skeleton algorithm first and only then casefold,
// because casefolding first would lose some information about visual confusability.
// This has the weird consequence that the skeleton is not a function of the
// casefolded identifier --- therefore it must always be computed
// from the original (unfolded) identifier and stored/tracked separately from the
// casefolded identifier.
func Skeleton(name string) (string, error) {
// XXX the confusables table includes some, but not all, fullwidth->standard
// mappings for latin characters. do a pass of explicit width folding,
// same as PRECIS:
name = width.Fold.String(name)
name = confusables.SkeletonTweaked(name)
// internationalized lowercasing for skeletons; this is much more lenient than
// Casefold. In particular, skeletons are expected to mix scripts (which may
// violate the bidi rule). We also don't care if they contain runes
// that are disallowed by PRECIS, because every identifier must independently
// pass PRECIS --- we are just further canonicalizing the skeleton.
return cases.Lower(language.Und).String(name), nil
}