/
xud.go
274 lines (253 loc) · 7.08 KB
/
xud.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
// Package xud provides X User Defined, character encodings.
// It does not encode or decode text, only provides information about the encodings.
//
// This includes the early American Standards Association (ASA) ASCII character encodings.
// There are three ASA encodings, X3.4-1963, X3.4-1965, X3.4-1967 and one missing ISO 8859-11 encoding.
// These encodings are not compatible with each other.
//
// But the X3.4-1967 character codes are compatible with the ANSI X3.4-1977 and ANSI X3.4-1986 encodings.
// Which are also compatible with many of the IBM Code Page and ISO 8859-X encodings, as-well as Unicode.
package xud
import (
"errors"
"fmt"
"io"
"strings"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
)
// Named, numeric and alias values for the legacy ASA ASCII character encodings.
const (
Name11 = "iso-8859-11" // name of ISO 8859-11
Name63 = "ascii-63" // name of ASA X3.4 1963
Name65 = "ascii-65" // name of ASA X3.4 1965
Name67 = "ascii-67" // name of ANSI X3.4 1967/77/86
Numr11 = "11" // numeric value for ISO 8859-11
Numr63 = "1963" // numeric value for ASA X3.4 1963
Numr65 = "1965" // numeric value for ASA X3.4 1965
Numr67 = "1967" // numeric value for ANSI X3.4 1967/77/86
Alias11 = "iso885911" // alias for ISO 8859-11
Alias67 = "ansi" // alias for ANSI X3.4 1967/77/86
)
var ErrName = errors.New("there is no encoding name")
// Encoding is an implementation of the Encoding interface that adds a formal name
// to a custom encoding.
type Encoding struct {
encoding.Encoding // Encoding is the underlying encoding.
Name string // Name is the formal name of the character encoding.
}
var (
// XUserDefinedISO11 ISO-8859-11.
XUserDefinedISO11 encoding.Encoding = &xThaiISO11
// XUserDefined1963 ASA X3.4 1963.
XUserDefined1963 encoding.Encoding = &x34_1963
// XUserDefined1965 ASA X3.4 1965.
XUserDefined1965 encoding.Encoding = &x34_1965
// XUserDefined1967 ANSI X3.4 1967/77/86.
XUserDefined1967 encoding.Encoding = &x34_1967
xThaiISO11 = Encoding{
Encoding: charmap.Windows874,
Name: "ISO-8859-11",
}
x34_1963 = Encoding{
Encoding: charmap.Windows1252,
Name: "ASA X3.4 1963",
}
x34_1965 = Encoding{
Encoding: charmap.Windows1252,
Name: "ASA X3.4 1965",
}
x34_1967 = Encoding{
Encoding: charmap.Windows1252,
Name: "ANSI X3.4 1967/77/86",
}
)
// String returns the formal name of the ASA encoding.
func (e Encoding) String() string {
return e.Name
}
// CodePage returns the encoding of the code page name or alias.
func CodePage(s string) (encoding.Encoding, error) {
if s == "" {
return nil, ErrName
}
switch strings.ToLower(s) {
case Name11, Numr11, Alias11:
return XUserDefinedISO11, nil
case Name63, Numr63:
return XUserDefined1963, nil
case Name65, Numr65:
return XUserDefined1965, nil
case Name67, Numr67, Alias67:
return XUserDefined1967, nil
default:
return nil, nil
}
}
// Code7bit reports whether the encoding is a 7-bit ASCII encoding.
// The 7-bit encodings are limited to 127 characters.
// The more common 8-bit encodings are limited to 256 characters.
func Code7bit(e encoding.Encoding) bool {
switch e {
case XUserDefined1963, XUserDefined1965, XUserDefined1967:
return true
}
return false
}
// Name returns a named value for the legacy ASA ASCII character encodings.
func Name(e encoding.Encoding) string {
switch e {
case XUserDefinedISO11:
return Name11
case XUserDefined1963:
return Name63
case XUserDefined1965:
return Name65
case XUserDefined1967:
return Name67
}
return ""
}
// Numeric returns a numeric value for the legacy ASA ASCII character encodings.
func Numeric(e encoding.Encoding) string {
switch e {
case XUserDefinedISO11:
return Numr11
case XUserDefined1963:
return Numr63
case XUserDefined1965:
return Numr65
case XUserDefined1967:
return Numr67
}
return ""
}
// Alias returns an alias value for the legacy ASA ASCII character encodings.
func Alias(e encoding.Encoding) string {
switch e {
case XUserDefinedISO11:
return Alias11
case XUserDefined1967:
return Alias67
}
return ""
}
// Footnote returns a footnote value for the legacy ASA ASCII character encodings.
func Footnote(w io.Writer, e encoding.Encoding) {
if w == nil {
w = io.Discard
}
switch e {
case XUserDefined1963:
fmt.Fprintln(w)
fmt.Fprintln(w, "* ASA X3.4 1963 has a number of historic control codes in"+
"\n rows 0 and 1 that are not printable in Unicode.")
case XUserDefined1965:
fmt.Fprintln(w)
fmt.Fprintln(w, "* ASA X3.4 1965 cell 1-A is SUB, but it is not printable in Unicode.")
}
}
// Char returns a string for the 8-bit, character encoding decimal code.
// If the code is not defined in the encoding, then a space is returned.
// If the code matches an existing Windows-1252 character, then -1 is returned.
func Char(e encoding.Encoding, code int) rune {
switch e {
case XUserDefinedISO11:
return CharISO885911(code)
case XUserDefined1963:
return CharX3463(code)
case XUserDefined1965:
return CharX3465(code)
case XUserDefined1967:
return CharX3467(code)
}
return -1
}
// CharISO885911 returns a rune for the ISO-8859-11 character code.
// If the code is not defined in the encoding, then a space is returned.
// If the code matches an existing Windows-1252 character, then -1 is returned.
func CharISO885911(code int) rune {
const pad, nbsp = 128, 160
if code >= pad && code < nbsp {
return ' '
}
return -1
}
// CharX3463 returns a rune for the legacy ASA X3.4 1963 character code.
// If the code is not defined in the encoding, then a space is returned.
// If the code matches an existing Windows-1252 character, then -1 is returned.
func CharX3463(code int) rune {
const blank = ' '
const us, end = 31, 128
if code >= end || code == 125 {
return blank
}
if x := mapX3493(code); x > 0 {
return x
}
if code <= us {
return blank
}
if code >= 96 && code <= 123 {
return blank
}
return rune(code)
}
func mapX3493(i int) rune {
m := map[int]rune{
0: '␀',
4: '␄',
7: '␇',
9: '␉',
10: '␊',
11: '␋',
12: '␌',
13: '␍',
14: '␎',
15: '␏',
17: '␑',
18: '␒',
19: '␓',
20: '␔',
94: '↑',
95: '←',
124: '␆',
126: '␛',
127: '␡',
}
return m[i]
}
// CharX3465 returns a string for the legacy ASA X3.4 1965 character code.
// If the code is not defined in the encoding, then a space is returned.
// If the code matches an existing Windows-1252 character, then -1 is returned.
func CharX3465(code int) rune {
const sub, grave, tilde, at, not, bar, end = 26, 64, 92, 96, 124, 126, 128
if code >= end {
return ' '
}
switch code {
case sub:
return ' '
case grave:
return '`'
case tilde:
return '~'
case at:
return '@'
case not:
return '¬'
case bar:
return '|'
}
return -1
}
// CharX3467 returns a string for the legacy ASA X3.4 1967 character code.
// If the code is not defined in the encoding, then a space is returned.
// If the code matches an existing Windows-1252 character, then -1 is returned.
func CharX3467(code int) rune {
const end = 128
if code >= end {
return ' '
}
return -1
}