/
string.go
137 lines (118 loc) · 2.73 KB
/
string.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// Package unistring contains an implementation of a hybrid ASCII/UTF-16 string.
// For ASCII strings the underlying representation is equivalent to a normal Go string.
// For unicode strings the underlying representation is UTF-16 as []uint16 with 0th element set to 0xFEFF.
// unicode.String allows representing malformed UTF-16 values (e.g. stand-alone parts of surrogate pairs)
// which cannot be represented in UTF-8.
// At the same time it is possible to use unicode.String as property keys just as efficiently as simple strings,
// (the leading 0xFEFF ensures there is no clash with ASCII string), and it is possible to convert it
// to valueString without extra allocations.
package unistring
import (
"reflect"
"unicode/utf16"
"unicode/utf8"
"unsafe"
)
const (
BOM = 0xFEFF
)
type String string
// Scan checks if the string contains any unicode characters. If it does, converts to an array suitable for creating
// a String using FromUtf16, otherwise returns nil.
func Scan(s string) []uint16 {
utf16Size := 0
for ; utf16Size < len(s); utf16Size++ {
if s[utf16Size] >= utf8.RuneSelf {
goto unicode
}
}
return nil
unicode:
for _, chr := range s[utf16Size:] {
utf16Size++
if chr > 0xFFFF {
utf16Size++
}
}
buf := make([]uint16, utf16Size+1)
buf[0] = BOM
c := 1
for _, chr := range s {
if chr <= 0xFFFF {
buf[c] = uint16(chr)
} else {
first, second := utf16.EncodeRune(chr)
buf[c] = uint16(first)
c++
buf[c] = uint16(second)
}
c++
}
return buf
}
func NewFromString(s string) String {
if buf := Scan(s); buf != nil {
return FromUtf16(buf)
}
return String(s)
}
func NewFromRunes(s []rune) String {
ascii := true
size := 0
for _, c := range s {
if c >= utf8.RuneSelf {
ascii = false
if c > 0xFFFF {
size++
}
}
size++
}
if ascii {
return String(s)
}
b := make([]uint16, size+1)
b[0] = BOM
i := 1
for _, c := range s {
if c <= 0xFFFF {
b[i] = uint16(c)
} else {
first, second := utf16.EncodeRune(c)
b[i] = uint16(first)
i++
b[i] = uint16(second)
}
i++
}
return FromUtf16(b)
}
func FromUtf16(b []uint16) String {
var str string
hdr := (*reflect.StringHeader)(unsafe.Pointer(&str))
hdr.Data = uintptr(unsafe.Pointer(&b[0]))
hdr.Len = len(b) * 2
return String(str)
}
func (s String) String() string {
if b := s.AsUtf16(); b != nil {
return string(utf16.Decode(b[1:]))
}
return string(s)
}
func (s String) AsUtf16() []uint16 {
if len(s) < 4 || len(s)&1 != 0 {
return nil
}
var a []uint16
raw := string(s)
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&a))
sliceHeader.Data = (*reflect.StringHeader)(unsafe.Pointer(&raw)).Data
l := len(raw) / 2
sliceHeader.Len = l
sliceHeader.Cap = l
if a[0] == BOM {
return a
}
return nil
}