forked from kubernetes/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
punycode.go
268 lines (236 loc) · 5.45 KB
/
punycode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
// Package idn implements encoding from and to punycode as speficied by RFC 3492.
package idn
import (
"bytes"
"github.com/miekg/dns"
"strings"
"unicode"
)
// Implementation idea from RFC itself and from from IDNA::Punycode created by
// Tatsuhiko Miyagawa <miyagawa@bulknews.net> and released under Perl Artistic
// License in 2002.
const (
_MIN rune = 1
_MAX rune = 26
_SKEW rune = 38
_BASE rune = 36
_BIAS rune = 72
_N rune = 128
_DAMP rune = 700
_DELIMITER = '-'
_PREFIX = "xn--"
)
// ToPunycode converts unicode domain names to DNS-appropriate punycode names.
// This function would return incorrect result for strings for non-canonical
// unicode strings.
func ToPunycode(s string) string {
tokens := dns.SplitDomainName(s)
switch {
case s == "":
return ""
case tokens == nil: // s == .
return "."
case s[len(s)-1] == '.':
tokens = append(tokens, "")
}
for i := range tokens {
tokens[i] = string(encode([]byte(tokens[i])))
}
return strings.Join(tokens, ".")
}
// FromPunycode returns unicode domain name from provided punycode string.
func FromPunycode(s string) string {
tokens := dns.SplitDomainName(s)
switch {
case s == "":
return ""
case tokens == nil: // s == .
return "."
case s[len(s)-1] == '.':
tokens = append(tokens, "")
}
for i := range tokens {
tokens[i] = string(decode([]byte(tokens[i])))
}
return strings.Join(tokens, ".")
}
// digitval converts single byte into meaningful value that's used to calculate decoded unicode character.
const errdigit = 0xffff
func digitval(code rune) rune {
switch {
case code >= 'A' && code <= 'Z':
return code - 'A'
case code >= 'a' && code <= 'z':
return code - 'a'
case code >= '0' && code <= '9':
return code - '0' + 26
}
return errdigit
}
// lettercode finds BASE36 byte (a-z0-9) based on calculated number.
func lettercode(digit rune) rune {
switch {
case digit >= 0 && digit <= 25:
return digit + 'a'
case digit >= 26 && digit <= 36:
return digit - 26 + '0'
}
panic("dns: not reached")
}
// adapt calculates next bias to be used for next iteration delta.
func adapt(delta rune, numpoints int, firsttime bool) rune {
if firsttime {
delta /= _DAMP
} else {
delta /= 2
}
var k rune
for delta = delta + delta/rune(numpoints); delta > (_BASE-_MIN)*_MAX/2; k += _BASE {
delta /= _BASE - _MIN
}
return k + ((_BASE-_MIN+1)*delta)/(delta+_SKEW)
}
// next finds minimal rune (one with lowest codepoint value) that should be equal or above boundary.
func next(b []rune, boundary rune) rune {
if len(b) == 0 {
panic("dns: invalid set of runes to determine next one")
}
m := b[0]
for _, x := range b[1:] {
if x >= boundary && (m < boundary || x < m) {
m = x
}
}
return m
}
// preprune converts unicode rune to lower case. At this time it's not
// supporting all things described in RFCs
func preprune(r rune) rune {
if unicode.IsUpper(r) {
r = unicode.ToLower(r)
}
return r
}
// tfunc is a function that helps calculate each character weight
func tfunc(k, bias rune) rune {
switch {
case k <= bias:
return _MIN
case k >= bias+_MAX:
return _MAX
}
return k - bias
}
// encode transforms Unicode input bytes (that represent DNS label) into punycode bytestream
func encode(input []byte) []byte {
n, bias := _N, _BIAS
b := bytes.Runes(input)
for i := range b {
b[i] = preprune(b[i])
}
basic := make([]byte, 0, len(b))
for _, ltr := range b {
if ltr <= 0x7f {
basic = append(basic, byte(ltr))
}
}
basiclen := len(basic)
fulllen := len(b)
if basiclen == fulllen {
return basic
}
var out bytes.Buffer
out.WriteString(_PREFIX)
if basiclen > 0 {
out.Write(basic)
out.WriteByte(_DELIMITER)
}
var (
ltr, nextltr rune
delta, q rune // delta calculation (see rfc)
t, k, cp rune // weight and codepoint calculation
)
s := &bytes.Buffer{}
for h := basiclen; h < fulllen; n, delta = n+1, delta+1 {
nextltr = next(b, n)
s.Truncate(0)
s.WriteRune(nextltr)
delta, n = delta+(nextltr-n)*rune(h+1), nextltr
for _, ltr = range b {
if ltr < n {
delta++
}
if ltr == n {
q = delta
for k = _BASE; ; k += _BASE {
t = tfunc(k, bias)
if q < t {
break
}
cp = t + ((q - t) % (_BASE - t))
out.WriteRune(lettercode(cp))
q = (q - t) / (_BASE - t)
}
out.WriteRune(lettercode(q))
bias = adapt(delta, h+1, h == basiclen)
h, delta = h+1, 0
}
}
}
return out.Bytes()
}
// decode transforms punycode input bytes (that represent DNS label) into Unicode bytestream
func decode(b []byte) []byte {
src := b // b would move and we need to keep it
n, bias := _N, _BIAS
if !bytes.HasPrefix(b, []byte(_PREFIX)) {
return b
}
out := make([]rune, 0, len(b))
b = b[len(_PREFIX):]
for pos, x := range b {
if x == _DELIMITER {
out = append(out, bytes.Runes(b[:pos])...)
b = b[pos+1:] // trim source string
break
}
}
if len(b) == 0 {
return src
}
var (
i, oldi, w rune
ch byte
t, digit rune
ln int
)
for i = 0; len(b) > 0; i++ {
oldi, w = i, 1
for k := _BASE; len(b) > 0; k += _BASE {
ch, b = b[0], b[1:]
digit = digitval(rune(ch))
if digit == errdigit {
return src
}
i += digit * w
t = tfunc(k, bias)
if digit < t {
break
}
w *= _BASE - t
}
ln = len(out) + 1
bias = adapt(i-oldi, ln, oldi == 0)
n += i / rune(ln)
i = i % rune(ln)
// insert
out = append(out, 0)
copy(out[i+1:], out[i:])
out[i] = n
}
var ret bytes.Buffer
for _, r := range out {
ret.WriteRune(r)
}
return ret.Bytes()
}