-
Notifications
You must be signed in to change notification settings - Fork 6
/
mem.go
337 lines (278 loc) · 11.9 KB
/
mem.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
/*
Copyright 2020 The Go4 AUTHORS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package mem provides the mem.RO type that allows you to cheaply pass &
// access either a read-only []byte or a string.
package mem // import "go4.org/mem"
import (
"hash/maphash"
"strconv"
"strings"
"sync"
"unicode/utf8"
"unsafe"
)
// RO is a read-only view of some bytes of memory. It may be be backed
// by a string or []byte. Notably, unlike a string, the memory is not
// guaranteed to be immutable. While the length is fixed, the
// underlying bytes might change if interleaved with code that's
// modifying the underlying memory.
//
// RO is a value type that's the same size of a Go string. Its various
// methods should inline & compile to the equivalent operations
// working on a string or []byte directly.
//
// Unlike a Go string, RO is not 'comparable' (it can't be a map key
// or support ==). Use its Equal method to compare. This is done so an
// RO backed by a later-mutating []byte doesn't break invariants in
// Go's map implementation.
type RO struct {
_ [0]func() // not comparable; don't want to be a map key or support ==
m unsafeString
}
// str returns the unsafeString as a string. Only for use with standard
// library funcs known to not let the string escape, as it doesn't
// obey the language/runtime's expectations of a real string (it can
// change underfoot).
func (r RO) str() string { return string(r.m) }
// Len returns len(r).
func (r RO) Len() int { return len(r.m) }
// At returns r[i].
func (r RO) At(i int) byte { return r.m[i] }
// Slice returns r[from:to].
func (r RO) Slice(from, to int) RO { return RO{m: r.m[from:to]} }
// SliceFrom returns r[from:].
func (r RO) SliceFrom(from int) RO { return RO{m: r.m[from:]} }
// SliceTo returns r[:to].
func (r RO) SliceTo(to int) RO { return RO{m: r.m[:to]} }
// Copy copies up to len(dest) bytes into dest from r and returns the
// number of bytes copied, the min(r.Len(), len(dest)).
func (r RO) Copy(dest []byte) int { return copy(dest, r.m) }
// Equal reports whether r and r2 are the same length and contain the
// same bytes.
func (r RO) Equal(r2 RO) bool { return r.m == r2.m }
// EqualString reports whether r and s are the same length and contain
// the same bytes.
func (r RO) EqualString(s string) bool { return r.str() == s }
// EqualBytes reports whether r and b are the same length and contain
// the same bytes.
func (r RO) EqualBytes(b []byte) bool { return r.str() == string(b) }
// Less reports whether r < r2.
func (r RO) Less(r2 RO) bool { return r.str() < r2.str() }
var builderPool = sync.Pool{
New: func() interface{} {
return new(strings.Builder)
},
}
// StringCopy returns m's contents in a newly allocated string.
func (r RO) StringCopy() string {
buf := builderPool.Get().(*strings.Builder)
defer builderPool.Put(buf)
defer buf.Reset()
buf.WriteString(r.str())
return buf.String()
}
var seed = maphash.MakeSeed()
// MapHash returns a hash of r's contents using runtime/maphash.
// The hash is stable for the lifetime of a process.
func (r RO) MapHash() uint64 {
var hash maphash.Hash
hash.SetSeed(seed)
hash.WriteString(r.str())
return hash.Sum64()
}
// ParseInt returns a signed integer from m, using strconv.ParseInt.
func ParseInt(m RO, base, bitSize int) (int64, error) {
return strconv.ParseInt(m.str(), base, bitSize)
}
// ParseUint returns a unsigned integer from m, using strconv.ParseUint.
func ParseUint(m RO, base, bitSize int) (uint64, error) {
return strconv.ParseUint(m.str(), base, bitSize)
}
// ParseFloat returns a float from, using strconv.ParseFloat.
func ParseFloat(m RO, bitSize int) (float64, error) {
return strconv.ParseFloat(m.str(), bitSize)
}
// Append appends m to dest, and returns the possibly-reallocated
// dest.
func Append(dest []byte, m RO) []byte { return append(dest, m.m...) }
// Contains reports whether substr is within m.
func Contains(m, substr RO) bool { return strings.Contains(m.str(), substr.str()) }
// EqualFold reports whether s and t, interpreted as UTF-8 strings,
// are equal under Unicode case-folding, which is a more general form
// of case-insensitivity.
func EqualFold(m, m2 RO) bool { return strings.EqualFold(m.str(), m2.str()) }
// HasPrefix reports whether m starts with prefix.
func HasPrefix(m, prefix RO) bool { return strings.HasPrefix(m.str(), prefix.str()) }
// HasSuffix reports whether m ends with suffix.
func HasSuffix(m, suffix RO) bool { return strings.HasSuffix(m.str(), suffix.str()) }
// Index returns the index of the first instance of substr in m, or -1
// if substr is not present in m.
func Index(m, substr RO) int { return strings.Index(m.str(), substr.str()) }
// IndexByte returns the index of the first instance of c in m, or -1
// if c is not present in m.
func IndexByte(m RO, c byte) int { return strings.IndexByte(m.str(), c) }
// LastIndexByte returns the index into m of the last Unicode code
// point satisfying f(c), or -1 if none do.
func LastIndexByte(m RO, c byte) int { return strings.LastIndexByte(m.str(), c) }
// LastIndex returns the index of the last instance of substr in m, or
// -1 if substr is not present in m.
func LastIndex(m, substr RO) int { return strings.LastIndex(m.str(), substr.str()) }
// TrimSpace returns a slice of the string s, with all leading and
// trailing white space removed, as defined by Unicode.
func TrimSpace(m RO) RO { return S(strings.TrimSpace(m.str())) }
// TrimSuffix returns m without the provided trailing suffix.
// If m doesn't end with suffix, m is returned unchanged.
func TrimSuffix(m, suffix RO) RO {
return S(strings.TrimSuffix(m.str(), suffix.str()))
}
// TrimPrefix returns m without the provided leading prefix.
// If m doesn't start with prefix, m is returned unchanged.
func TrimPrefix(m, prefix RO) RO {
return S(strings.TrimPrefix(m.str(), prefix.str()))
}
// TrimRightCutset returns a slice of m with all trailing Unicode code
// points contained in cutset removed.
//
// To remove a suffix, use TrimSuffix instead.
func TrimRightCutset(m, cutset RO) RO {
return S(strings.TrimRight(m.str(), cutset.str()))
}
// TrimLeftCutset returns a slice of m with all leading Unicode code
// points contained in cutset removed.
//
// To remove a prefix, use TrimPrefix instead.
func TrimLeftCutset(m, cutset RO) RO {
return S(strings.TrimLeft(m.str(), cutset.str()))
}
// TrimCutset returns a slice of the string s with all leading and
// trailing Unicode code points contained in cutset removed.
func TrimCutset(m, cutset RO) RO {
return S(strings.Trim(m.str(), cutset.str()))
}
// TrimFunc returns a slice of m with all leading and trailing Unicode
// code points c satisfying f(c) removed.
func TrimFunc(m RO, f func(rune) bool) RO {
return S(strings.TrimFunc(m.str(), f))
}
// TrimRightFunc returns a slice of m with all trailing Unicode
// code points c satisfying f(c) removed.
func TrimRightFunc(m RO, f func(rune) bool) RO {
return S(strings.TrimRightFunc(m.str(), f))
}
// TrimLeftFunc returns a slice of m with all leading Unicode
// code points c satisfying f(c) removed.
func TrimLeftFunc(m RO, f func(rune) bool) RO {
return S(strings.TrimLeftFunc(m.str(), f))
}
// Documentation for UTF-8 related functions copied from Go's unicode/utf8 package. (BSD license)
// DecodeRune unpacks the first UTF-8 encoding in m and returns the rune and
// its width in bytes. If m is empty it returns (utf8.RuneError, 0). Otherwise, if
// the encoding is invalid, it returns (utf8.RuneError, 1). Both are impossible
// results for correct, non-empty UTF-8.
//
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRune(m RO) (r rune, size int) {
return utf8.DecodeRuneInString(m.str())
}
// DecodeLastRune unpacks the last UTF-8 encoding in m and returns the rune and
// its width in bytes. If m is empty it returns (utf8.RuneError, 0). Otherwise, if
// the encoding is invalid, it returns (utf8.RuneError, 1). Both are impossible
// results for correct, non-empty UTF-8.
//
// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeLastRune(m RO) (r rune, size int) {
return utf8.DecodeLastRuneInString(m.str())
}
// FullRune reports whether the bytes in m begin with a full UTF-8 encoding of a rune.
// An invalid encoding is considered a full Rune since it will convert as a width-1 error rune.
func FullRune(m RO) bool {
return utf8.FullRuneInString(m.str())
}
// RuneCount returns the number of UTF-8 encoded runes in m. Erroneous and short
// encodings are treated as single runes of width 1 byte.
func RuneCount(m RO) int {
return utf8.RuneCountInString(m.str())
}
// ValidUTF8 reports whether m consists entirely of valid UTF-8 encoded runes.
func ValidUTF8(m RO) bool {
return utf8.ValidString(m.str())
}
// NewReader returns a new Reader that reads from m.
func NewReader(m RO) *Reader {
return &Reader{sr: strings.NewReader(m.str())}
}
// Cut works like strings.Cut, but takes and returns ROs.
func Cut(m, sep RO) (before, after RO, found bool) {
if i := Index(m, sep); i >= 0 {
return m.SliceTo(i), m.SliceFrom(i + sep.Len()), true
}
return m, S(""), false
}
// CutPrefix works like strings.CutPrefix, but takes and returns ROs.
func CutPrefix(m, prefix RO) (after RO, found bool) {
if !HasPrefix(m, prefix) {
return m, false
}
return m.SliceFrom(prefix.Len()), true
}
// CutSuffix works like strings.CutSuffix, but takes and returns ROs.
func CutSuffix(m, suffix RO) (before RO, found bool) {
if !HasSuffix(m, suffix) {
return m, false
}
return m.SliceTo(m.Len() - suffix.Len()), true
}
// Reader is like a bytes.Reader or strings.Reader.
type Reader struct {
sr *strings.Reader
}
func (r *Reader) Len() int { return r.sr.Len() }
func (r *Reader) Size() int64 { return r.sr.Size() }
func (r *Reader) Read(b []byte) (int, error) { return r.sr.Read(b) }
func (r *Reader) ReadAt(b []byte, off int64) (int, error) { return r.sr.ReadAt(b, off) }
func (r *Reader) ReadByte() (byte, error) { return r.sr.ReadByte() }
func (r *Reader) ReadRune() (ch rune, size int, err error) { return r.sr.ReadRune() }
func (r *Reader) Seek(offset int64, whence int) (int64, error) { return r.sr.Seek(offset, whence) }
// TODO: add Reader.WriteTo, but don't use strings.Reader.WriteTo because it uses io.WriteString, leaking our unsafe string
// unsafeString is a string that's not really a Go string.
// It might be pointing into a []byte. Don't let it escape to callers.
// We contain the unsafety to this package.
type unsafeString string
// stringHeader is a safer version of reflect.StringHeader.
// See https://github.com/golang/go/issues/40701.
type stringHeader struct {
P *byte
Len int
}
// S returns a read-only view of the string s.
//
// The compiler should compile this call to nothing. Think of it as a
// free type conversion. The returned RO view is the same size as a
// string.
func S(s string) RO { return RO{m: unsafeString(s)} }
// B returns a read-only view of the byte slice b.
//
// The compiler should compile this call to nothing. Think of it as a
// free type conversion. The returned value is actually smaller than a
// []byte though (16 bytes instead of 24 bytes on 64-bit
// architectures).
func B(b []byte) RO {
if len(b) == 0 {
return RO{m: ""}
}
return RO{m: *(*unsafeString)(unsafe.Pointer(&stringHeader{&b[0], len(b)}))}
}