forked from upspin/upspin
/
user.go
283 lines (266 loc) · 9.13 KB
/
user.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
// Copyright 2016 The Upspin Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package user provides tools for parsing and validating user names.
package user // import "upspin.io/user"
import (
"strings"
"golang.org/x/text/secure/precis"
"upspin.io/errors"
"upspin.io/upspin"
)
// Parse splits an upspin.UserName into user and domain and returns the pair.
// It also returns the "+" suffix part of the user name, if it has one. For example,
// given the user name
// ann+backup@example.com
// it would return the strings
// "ann+backup" "backup" "example.com"
//
// Parsed validates the name as an e-mail address and lower-cases the domain
// so it is canonical.
//
// The rules are:
//
// <name> := <user name>@<domain name>
//
// <domain name> :=
//
// - each . separated token < 64 characters
// - character set for tokens [a-z0-9\-]
// - final token at least two characters
// - whole name < 254 characters
// - characters are case insensitive
// - final period is OK, but we remove it
//
// We ignore the rules of punycode, which is defined in https://tools.ietf.org/html/rfc3490 .
//
// <user name> :=
//
// Names are validated and canonicalized by the UsernameCasePreserved profile
// of the RFC 7613, "Preparation, Enforcement, and Comparison of Internationalized Strings",
// also known as PRECIS.
//
// Further restrictions are added here. The only ASCII punctuation characters
// that are legal are "!#$%&'*+-/=?^_{|}~", and a name that is only ASCII punctuation
// is rejected.
//
// As a special case for use in Access and Group files, the name "*" is allowed.
//
// Case is significant and spaces are not allowed.
//
// The username suffix is tightly constrained: It uses the same character
// set as domains, but of course the spacing of periods is irrelevant.
//
// Facebook and Google constrain usernames to [a-zA-Z0-9+-.],
// ignoring the period and, in Google only, ignoring everything
// from a plus sign onwards. We accept a superset of this but do not
// follow the "ignore" rules.
//
func Parse(userName upspin.UserName) (user, suffix, domain string, err error) {
const op = errors.Op("user.Parse")
name := string(userName)
if len(userName) >= 254 {
return "", "", "", errors.E(op, errors.Invalid, userName, "name too long")
}
if strings.Count(name, "@") != 1 {
return "", "", "", errors.E(op, errors.Invalid, userName, errors.Str("user name must contain one @ symbol"))
}
at := strings.IndexByte(name, '@')
user, domain = name[:at], name[at+1:]
if user == "*" {
// An important special case:
} else {
user, suffix, err = parseUser(op, userName, user)
if err != nil {
return "", "", "", err
}
}
domain, err = parseDomain(op, userName, domain)
if err != nil {
return "", "", "", err
}
return user, suffix, domain, nil
}
// ParseUser parses the component of a user name before the '@', that is, the
// user component of an email address. The rules are defined in the
// documentation for Parse except that "*" is not a valid user and the user name
// itself must be less than 255 bytes long.
func ParseUser(user string) (userName, suffix string, err error) {
return parseUser(errors.Op("user.ParseUser"), upspin.UserName(user), user)
}
// parseUser is the implementation of ParseUser, also called by Parse.
// It takes the full UserName as well as the user component, to aid in error reporting.
func parseUser(op errors.Op, userName upspin.UserName, user string) (string, string, error) {
if len(user) >= 255 {
return errParseUser(op, userName, "user name too long")
}
if user == "" {
return errParseUser(op, userName, "missing user name")
}
plus := strings.IndexByte(user, '+')
if plus == len(user)-1 { // Check first because PRECIS dislikes + at end of string.
return errParseUser(op, userName, "empty +suffix in user name")
}
// Validate and canonicalize the user name - and maybe suffix, but
// the suffix is checked more thoroughly below. We include the suffix
// here because PRECIS will prevent things like "+" or "ann+" or
// "+ann" as the full name. That is, we do PRECIS validation on
// the full user+suffix.
user, err := canonicalize(user)
if err != nil {
return "", "", errors.E(op, errors.Invalid, user, err)
}
// Valid +suffix (if any)?
suffix := ""
if plus >= 0 {
if plus == 0 {
return errParseUser(op, userName, "user name cannot start with +suffix")
}
suffix = user[plus+1:]
if strings.IndexByte(suffix, '+') > 0 {
return errParseUser(op, userName, "multiple +suffixes in user name")
}
for _, c := range suffix {
if !okDomainChar(c) {
return errParseUser(op, userName, "bad symbol in +suffix")
}
}
}
return user, suffix, nil
}
// ParseDomain parses the component of a user name after the '@', that is, the
// domain component of an email address. The rules are defined in the
// documentation for Parse except the domain name itself must be less than 255
// bytes long.
func ParseDomain(domain string) (string, error) {
return parseDomain(errors.Op("user.ParseDomain"), upspin.UserName(domain), domain)
}
// parseDomain is the implementation of ParseDomain, also called by Parse.
// It takes the full UserName as well as the domain component, to aid in error reporting.
func parseDomain(op errors.Op, userName upspin.UserName, domain string) (string, error) {
if len(domain) >= 255 {
return errParseDomain(op, userName, "domain name too long")
}
// Final period in domain is legal but is dropped.
domain = strings.TrimSuffix(domain, ".")
if domain == "" {
return errParseDomain(op, userName, "missing domain name")
}
if strings.Count(domain, ".") == 0 {
return errParseDomain(op, userName, "domain name must contain a period")
}
// Valid domain name?
period := -1 // First time through loop will fail if first byte is a period.
isUpper := false
for i, c := range domain {
if !okDomainChar(c) {
return errParseDomain(op, userName, "bad symbol in domain name")
}
if c == '.' {
if i-1 >= period+64 {
return errParseDomain(op, userName, "invalid domain name element")
}
if i-1 == period || i-1 >= period+64 {
return errParseDomain(op, userName, "invalid domain name element")
}
period = i
}
if 'A' <= c && c <= 'Z' {
isUpper = true
}
}
// Last domain element must be at least two bytes (".co")
if period+2 >= len(domain) {
return errParseDomain(op, userName, "invalid domain name")
}
// Lower-case the domain name if necessary.
if isUpper {
domain = strings.ToLower(domain)
}
return domain, nil
}
func errParseUser(op errors.Op, userName upspin.UserName, msg string) (u, s string, err error) {
return "", "", errors.E(op, errors.Invalid, userName, msg)
}
func errParseDomain(op errors.Op, userName upspin.UserName, msg string) (d string, err error) {
return "", errors.E(op, errors.Invalid, userName, msg)
}
func canonicalize(user string) (string, error) {
// PRECIS allows any ASCII character, but we are more restrictive.
// That's OK because the ASCII check is cheap and almost always
// sufficient.
allPunct := true
simple := true
for _, r := range user {
if illegalASCIIPunctuation(r) {
return "", errors.Errorf("illegal character %q", r)
}
if !legalASCIIPunctuation(r) {
allPunct = false
}
if !simpleUserNameChar(r) {
simple = false
}
}
if allPunct {
return "", errors.Errorf("user name contains only punctuation")
}
if !simple {
return precis.UsernameCasePreserved.String(user)
}
return user, nil
}
// Used by canonicalize to identify simple strings that don't need PRECIS processing.
// Note we don't check punctuation here because identifiers allow punctuation but
// only in certain places; let PRECIS do the work. "*" is the exception.
func simpleUserNameChar(r rune) bool {
switch {
case 'a' <= r && r <= 'z':
return true
case 'A' <= r && r <= 'Z':
return true
case '0' <= r && r <= '9':
return true
}
return false
}
// illegalASCIIPunctuation reports whether the rune is an ASCII punctuation
// character that is allowed by PRECIS but not by us within a user name.
// We include @ because this does not look at the domain name, just the user part.
func illegalASCIIPunctuation(r rune) bool {
return strings.ContainsRune(" @\"(),:;<>[\\]`", r)
}
// legalASCIIPunctuation reports whether the rune is an ASCII punctuation
// character that is allowed by us.
func legalASCIIPunctuation(r rune) bool {
return strings.ContainsRune("!#.$%&'*+-/=?^_{|}~", r)
}
// See the comments for UserAndDomain.
func okDomainChar(r rune) bool {
switch {
case 'a' <= r && r <= 'z':
return true
case 'A' <= r && r <= 'Z':
return true
case '0' <= r && r <= '9':
return true
case strings.ContainsRune("+-.", r):
return true
}
return false
}
// Clean returns the user name in canonical form as described by
// the comments for the Parse function.
func Clean(userName upspin.UserName) (upspin.UserName, error) {
user, _, domain, err := Parse(userName)
if err != nil {
return "", err
}
// Do we need to rebuild? Avoid allocation if we can.
userString := string(userName)
atSign := strings.IndexByte(userString, '@')
if user == userString[:atSign] && domain == userString[atSign+1:] {
return userName, nil
}
return upspin.UserName(user + "@" + domain), nil
}