This repository has been archived by the owner on Aug 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 348
/
BufferExtensions.cs
442 lines (383 loc) · 17.9 KB
/
BufferExtensions.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Numerics;
using System.Runtime.InteropServices;
using System.Buffers.Operations;
namespace System.Buffers
{
public static class MemoryListExtensions
{
// span creation helpers:
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long IndexOf(this ReadOnlySequenceSegment<byte> list, ReadOnlySpan<byte> value)
{
var first = list.Memory.Span;
var index = first.IndexOf(value);
if (index != -1) return index;
var rest = list.Next;
if (rest == null) return -1;
return IndexOfStraddling(first, list.Next, value);
}
public static int CopyTo(this ReadOnlySequenceSegment<byte> list, Span<byte> destination)
{
var current = list.Memory.Span;
int copied = 0;
while (destination.Length > 0)
{
if (current.Length >= destination.Length)
{
current.Slice(0, destination.Length).CopyTo(destination);
copied += destination.Length;
return copied;
}
else
{
current.CopyTo(destination);
copied += current.Length;
destination = destination.Slice(current.Length);
}
}
return copied;
}
public static SequencePosition? PositionOf(this ReadOnlySequenceSegment<byte> list, byte value)
{
while (list != null)
{
var current = list.Memory.Span;
var index = current.IndexOf(value);
if (index != -1) return new SequencePosition(list, index);
list = list.Next;
}
return null;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (T segment, int index) Get<T>(this SequencePosition position)
{
var segment = position.GetObject() == null ? default : (T)position.GetObject();
return (segment, position.GetInteger());
}
// TODO (pri 3): I am pretty sure this whole routine can be written much better
// searches values that potentially straddle between first and rest
internal static long IndexOfStraddling(this ReadOnlySpan<byte> first, ReadOnlySequenceSegment<byte> rest, ReadOnlySpan<byte> value)
{
Debug.Assert(first.IndexOf(value) == -1);
if (rest == null) return -1;
// we only need to search the end of the first buffer. More precisely, only up to value.Length - 1 bytes in the first buffer
// The other bytes in first, were already search and presumably did not match
int bytesToSkipFromFirst = 0;
if (first.Length > value.Length - 1)
{
bytesToSkipFromFirst = first.Length - value.Length - 1;
}
// now that we know how many bytes we need to skip, create slice of first buffer with bytes that need to be searched.
ReadOnlySpan<byte> bytesToSearchAgain;
if (bytesToSkipFromFirst > 0)
{
bytesToSearchAgain = first.Slice(bytesToSkipFromFirst);
}
else
{
bytesToSearchAgain = first;
}
long index;
// now combine the bytes from the end of the first buffer with bytes in the rest, and serarch the combined buffer
// this check is a small optimization: if the first byte from the value does not exist in the bytesToSearchAgain, there is no reason to combine
if (bytesToSearchAgain.IndexOf(value[0]) != -1)
{
var combinedBufferLength = value.Length << 1;
var combined = combinedBufferLength < 128 ?
stackalloc byte[combinedBufferLength] :
// TODO (pri 3): I think this could be eliminated by chunking values
new byte[combinedBufferLength];
bytesToSearchAgain.CopyTo(combined);
int combinedLength = bytesToSearchAgain.Length + rest.CopyTo(combined.Slice(bytesToSearchAgain.Length));
combined = combined.Slice(0, combinedLength);
if (combined.Length < value.Length) return -1;
index = combined.IndexOf(value);
if (index != -1)
{
return index + bytesToSkipFromFirst;
}
}
// try to find the bytes in _rest
index = rest.IndexOf(value);
if (index != -1) return first.Length + index;
return -1;
}
}
public static class BufferExtensions
{
const int stackLength = 32;
public static void Pipe(this IBufferOperation transformation, ReadOnlySequence<byte> source, IBufferWriter<byte> destination)
{
int afterMergeSlice = 0;
// Assign 'remainder' to something formally stack-referring.
// The default classification is "returnable, not referring to stack", we want the opposite in this case.
ReadOnlySpan<byte> remainder = stackalloc byte[0];
Span<byte> stackSpan = stackalloc byte[stackLength];
SequencePosition poisition = default;
while (source.TryGet(ref poisition, out var sourceBuffer))
{
Span<byte> outputSpan = destination.GetSpan();
ReadOnlySpan<byte> sourceSpan = sourceBuffer.Span;
if (!remainder.IsEmpty)
{
int leftOverBytes = remainder.Length;
remainder.CopyTo(stackSpan);
int amountToCopy = Math.Min(sourceSpan.Length, stackSpan.Length - leftOverBytes);
sourceSpan.Slice(0, amountToCopy).CopyTo(stackSpan.Slice(leftOverBytes));
int amountOfData = leftOverBytes + amountToCopy;
Span<byte> spanToTransform = stackSpan.Slice(0, amountOfData);
TryTransformWithRemainder:
OperationStatus status = transformation.Execute(spanToTransform, outputSpan, out int bytesConsumed, out int bytesWritten);
if (status != OperationStatus.Done)
{
destination.Advance(bytesWritten);
spanToTransform = spanToTransform.Slice(bytesConsumed);
if (status == OperationStatus.DestinationTooSmall)
{
outputSpan = destination.GetSpan();
if (outputSpan.Length - bytesWritten < 3)
{
return; // no more output space, user decides what to do.
}
goto TryTransformWithRemainder;
}
else
{
if (status == OperationStatus.InvalidData)
{
continue; // source buffer contains invalid bytes, user decides what to do for fallback
}
// at this point, status = TransformationStatus.NeedMoreSourceData
// left over bytes in stack span
remainder = spanToTransform;
}
continue;
}
else // success
{
afterMergeSlice = bytesConsumed - remainder.Length;
remainder = Span<byte>.Empty;
destination.Advance(bytesWritten);
outputSpan = destination.GetSpan();
}
}
TryTransform:
OperationStatus result = transformation.Execute(sourceSpan.Slice(afterMergeSlice), outputSpan, out int consumed, out int written);
afterMergeSlice = 0;
destination.Advance(written);
sourceSpan = sourceSpan.Slice(consumed);
if (result == OperationStatus.Done) continue;
// Not successful
if (result == OperationStatus.DestinationTooSmall)
{
destination.GetMemory(); // output buffer is too small
outputSpan = destination.GetSpan();
if (outputSpan.Length - written < 3)
{
return; // no more output space, user decides what to do.
}
goto TryTransform;
}
else
{
if (result == OperationStatus.InvalidData)
{
continue; // source buffer contains invalid bytes, user decides what to do for fallback
}
// at this point, result = TransformationStatus.NeedMoreSourceData
// left over bytes in source span
remainder = sourceSpan;
}
}
return;
}
public static bool SequenceEqual<T>(this Memory<T> first, Memory<T> second) where T : struct, IEquatable<T>
{
return first.Span.SequenceEqual(second.Span);
}
public static bool SequenceEqual<T>(this ReadOnlyMemory<T> first, ReadOnlyMemory<T> second) where T : struct, IEquatable<T>
{
return first.Span.SequenceEqual(second.Span);
}
public static int SequenceCompareTo(this Span<byte> left, ReadOnlySpan<byte> right)
{
return SequenceCompareTo((ReadOnlySpan<byte>)left, right);
}
public static int SequenceCompareTo(this ReadOnlySpan<byte> left, ReadOnlySpan<byte> right)
{
var minLength = left.Length;
if (minLength > right.Length) minLength = right.Length;
for (int i = 0; i < minLength; i++)
{
var result = left[i].CompareTo(right[i]);
if (result != 0) return result;
}
return left.Length.CompareTo(right.Length);
}
public static bool TryIndicesOf(this Span<byte> buffer, byte value, Span<int> indices, out int numberOfIndices)
{
var length = buffer.Length;
if (length == 0 || indices.Length == 0)
{
numberOfIndices = 0;
return false;
}
return TryIndicesOf(ref MemoryMarshal.GetReference(buffer), value, length, indices, out numberOfIndices);
}
public static bool TryIndicesOf(this ReadOnlySpan<byte> buffer, byte value, Span<int> indices, out int numberOfIndices)
{
var length = buffer.Length;
if (length == 0 || indices.Length == 0)
{
numberOfIndices = 0;
return false;
}
return TryIndicesOf(ref MemoryMarshal.GetReference(buffer), value, length, indices, out numberOfIndices);
}
private unsafe static bool TryIndicesOf(ref byte searchSpace, byte value, int length, Span<int> indices, out int numberOfIndices)
{
var result = false;
numberOfIndices = 0;
fixed (byte* pSearchSpace = &searchSpace)
{
var searchStart = pSearchSpace;
var offset = 0;
while (true)
{
if (Vector.IsHardwareAccelerated)
{
// Check Vector lengths
if (length - Vector<byte>.Count >= offset)
{
Vector<byte> values = GetVector(value);
do
{
var vFlaggedMatches = Vector.Equals(Unsafe.Read<Vector<byte>>(searchStart + offset), values);
if (!vFlaggedMatches.Equals(Vector<byte>.Zero))
{
// Found match, reuse Vector values to keep register pressure low
values = vFlaggedMatches;
break;
}
offset += Vector<byte>.Count;
} while (length - Vector<byte>.Count >= offset);
// Found match? Perform secondary search outside out of loop, so above loop body is small
if (length - Vector<byte>.Count >= offset)
{
// Find offset of first match
offset += LocateFirstFoundByte(values);
// goto rather than inline return to keep function smaller
goto exitFixed;
}
}
}
ulong flaggedMatches = 0;
// Check ulong length
while (length - sizeof(ulong) >= offset)
{
flaggedMatches = SetLowBitsForByteMatch(*(ulong*)(searchStart + offset), value);
if (flaggedMatches != 0)
{
// Found match
break;
}
offset += sizeof(ulong);
}
// Found match? Perform secondary search outside out of loop, so above loop body is small
if (length - sizeof(ulong) >= offset)
{
// Find offset of first match
offset += LocateFirstFoundByte(flaggedMatches);
// goto rather than inline return to keep function smaller
goto exitFixed;
}
// Haven't found match, scan through remaining
for (; offset < length; offset++)
{
if (*(searchStart + offset) == value)
{
// goto rather than inline return to keep loop body small
goto exitFixed;
}
}
// No Matches
result = true;
break;
exitFixed:;
indices[numberOfIndices++] = offset++;
if (numberOfIndices >= indices.Length)
break;
}
}
return result && numberOfIndices < indices.Length;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateFirstFoundByte(Vector<byte> match)
{
var vector64 = Vector.AsVectorUInt64(match);
ulong candidate = 0;
var i = 0;
// Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
for (; i < Vector<ulong>.Count; i++)
{
candidate = vector64[i];
if (candidate != 0)
{
break;
}
}
// Single LEA instruction with jitted const (using function result)
return i * 8 + LocateFirstFoundByte(candidate);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LocateFirstFoundByte(ulong match)
{
unchecked
{
// Flag least significant power of two bit
var powerOfTwoFlag = match ^ (match - 1);
// Shift all powers of two into the high byte and extract
return (int)((powerOfTwoFlag * xorPowerOfTwoToHighByte) >> 57);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong SetLowBitsForByteMatch(ulong potentialMatch, byte search)
{
unchecked
{
var flaggedValue = potentialMatch ^ (byteBroadcastToUlong * search);
return (
(flaggedValue - byteBroadcastToUlong) &
~(flaggedValue) &
filterByteHighBitsInUlong
) >> 7;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<byte> GetVector(byte vectorByte)
{
#if !NETCOREAPP
// Vector<byte> .ctor doesn't become an intrinsic due to detection issue
// However this does cause it to become an intrinsic (with additional multiply and reg->reg copy)
// https://github.com/dotnet/coreclr/issues/7459#issuecomment-253965670
return Vector.AsVectorByte(new Vector<uint>(vectorByte * 0x01010101u));
#else
return new Vector<byte>(vectorByte);
#endif
}
private const ulong xorPowerOfTwoToHighByte = (0x07ul |
0x06ul << 8 |
0x05ul << 16 |
0x04ul << 24 |
0x03ul << 32 |
0x02ul << 40 |
0x01ul << 48) + 1;
private const ulong byteBroadcastToUlong = ~0UL / Byte.MaxValue;
private const ulong filterByteHighBitsInUlong = (byteBroadcastToUlong >> 1) | (byteBroadcastToUlong << (sizeof(ulong) * 8 - 1));
}
}