Skip to content

Commit e4c0272

Browse files
committed
Do it in parallel per core
This makes HT and non-HT basically the same. X3/X4 do not speed up, rather slow down
1 parent b1b394a commit e4c0272

File tree

2 files changed

+148
-70
lines changed

2 files changed

+148
-70
lines changed

1brc/App.cs

Lines changed: 146 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ public unsafe void ProcessChunkMmapSingleSharedPos(FixedDictionary<Utf8Span, Sum
240240
#endif
241241

242242
if (Vector256.IsHardwareAccelerated)
243-
ProcessSpan2(resultAcc, new Utf8Span(ptr0 + start, (uint)(end - start)));
243+
ProcessSpanX2(resultAcc, new Utf8Span(ptr0 + start, (uint)(end - start)));
244244
else
245245
ProcessSpan(resultAcc, new Utf8Span(ptr0 + start, (uint)(end - start)));
246246

@@ -265,7 +265,6 @@ public unsafe void ProcessChunkMmapViewPerChunkRandom(FixedDictionary<Utf8Span,
265265
var length0 = length / 2 + delta;
266266
using (var accessor = _mmf.CreateViewAccessor(start, length0 + 1024, MemoryMappedFileAccess.Read))
267267
{
268-
269268
byte* ptr = default;
270269
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
271270
ptr += accessor.PointerOffset;
@@ -368,90 +367,169 @@ public static unsafe void ProcessSpan(FixedDictionary<Utf8Span, Summary> result,
368367
while (remaining.Length > 0)
369368
{
370369
nuint idx = remaining.IndexOfSemicolon();
371-
nint value = remaining.ParseInt(idx, out var idx1);
372-
result.GetValueRefOrAddDefault(new Utf8Span(remaining.Pointer, idx)).Apply(value);
373-
remaining = remaining.SliceUnsafe(idx1);
370+
nint value = remaining.ParseInt(idx, out var nextStart);
371+
result.Update(new Utf8Span(remaining.Pointer, idx), value);
372+
remaining = remaining.SliceUnsafe(nextStart);
373+
}
374+
}
375+
376+
[MethodImpl(MethodImplOptions.NoInlining)]
377+
public static void ProcessSpanX2(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk)
378+
{
379+
nuint middle = chunk.Length / 2;
380+
middle += (uint)chunk.SliceUnsafe(middle).Span.IndexOf((byte)'\n') + 1;
381+
var chunk0 = chunk.SliceUnsafe(0, (uint)middle);
382+
var chunk1 = chunk.SliceUnsafe((uint)middle);
383+
ProcessSpan(result, chunk0, chunk1);
384+
}
385+
386+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
387+
public static unsafe void ProcessSpan(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk0, Utf8Span chunk1)
388+
{
389+
while (true)
390+
{
391+
if (chunk0.Length <= 0)
392+
break;
393+
394+
if (chunk1.Length <= 0)
395+
break;
396+
397+
nuint idx0 = chunk0.IndexOfSemicolon();
398+
nuint idx1 = chunk1.IndexOfSemicolon();
399+
400+
nint value0 = chunk0.ParseInt(idx0, out var nextStart0);
401+
nint value1 = chunk1.ParseInt(idx1, out var nextStart1);
402+
403+
result.Update(new Utf8Span(chunk0.Pointer, idx0), value0);
404+
result.Update(new Utf8Span(chunk1.Pointer, idx1), value1);
405+
406+
chunk0 = chunk0.SliceUnsafe(nextStart0);
407+
chunk1 = chunk1.SliceUnsafe(nextStart1);
374408
}
409+
410+
ProcessSpan(result, chunk0);
411+
ProcessSpan(result, chunk1);
375412
}
376413

377414
[MethodImpl(MethodImplOptions.NoInlining)]
378-
public static unsafe void ProcessSpan2(FixedDictionary<Utf8Span, Summary> result, Utf8Span remaining)
415+
public static void ProcessSpanX3(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk)
379416
{
380-
Debug.Assert(Vector256.IsHardwareAccelerated);
417+
nuint third = chunk.Length / 3;
418+
var oneThird = third + (uint)chunk.SliceUnsafe(third).Span.IndexOf((byte)'\n') + 1;
419+
var twoThirds = third * 2 + (uint)chunk.SliceUnsafe(third * 2).Span.IndexOf((byte)'\n') + 1;
420+
421+
var chunk0 = chunk.SliceUnsafe(0, (uint)oneThird);
422+
var chunk1 = chunk.SliceUnsafe((uint)oneThird, twoThirds - oneThird);
423+
var chunk2 = chunk.SliceUnsafe(twoThirds);
381424

382-
const nuint vectorSize = 32;
425+
Debug.Assert(chunk0.Length > 0);
426+
Debug.Assert(chunk1.Length > 0);
427+
Debug.Assert(chunk2.Length > 0);
428+
Debug.Assert(chunk0.Length + chunk1.Length + chunk2.Length == chunk.Length);
383429

430+
ProcessSpan(result, chunk0, chunk1, chunk2);
431+
}
432+
433+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
434+
public static unsafe void ProcessSpan(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk0, Utf8Span chunk1, Utf8Span chunk2)
435+
{
384436
while (true)
385437
{
386-
if (remaining.Length <= 0)
438+
if (chunk0.Length <= 0)
387439
break;
388440

389-
nuint idx;
390-
nuint idx1;
391-
nint value;
392-
var matches = Vector256.Equals(Unsafe.ReadUnaligned<Vector256<byte>>(remaining.Pointer), Vector256.Create((byte)';'));
393-
var mask = matches.ExtractMostSignificantBits();
441+
if (chunk1.Length <= 0)
442+
break;
394443

395-
if (mask != 0)
396-
{
397-
idx = (nuint)BitOperations.TrailingZeroCount(mask);
398-
value = ParseInt(remaining.Pointer, idx, out idx1);
399-
if (result.TryUpdate(new Utf8Span(remaining.Pointer, idx), value))
400-
goto DONE;
401-
}
402-
else // 32-63
403-
{
404-
matches = Vector256.Equals(Unsafe.ReadUnaligned<Vector256<byte>>(remaining.Pointer + vectorSize), Vector256.Create((byte)';'));
405-
mask = matches.ExtractMostSignificantBits();
444+
if (chunk2.Length <= 0)
445+
break;
406446

407-
if (mask != 0) // 64-95
408-
{
409-
idx = vectorSize + (uint)BitOperations.TrailingZeroCount(mask);
410-
value = ParseInt(remaining.Pointer, idx, out idx1);
411-
}
412-
else
413-
{
414-
matches = Vector256.Equals(Unsafe.ReadUnaligned<Vector256<byte>>(remaining.Pointer + 2 * vectorSize), Vector256.Create((byte)';'));
415-
mask = matches.ExtractMostSignificantBits();
416-
417-
if (mask != 0) // 96-127
418-
{
419-
idx = 2 * vectorSize + (uint)BitOperations.TrailingZeroCount(mask);
420-
value = ParseInt(remaining.Pointer, idx, out idx1);
421-
}
422-
else
423-
{
424-
matches = Vector256.Equals(Unsafe.ReadUnaligned<Vector256<byte>>(remaining.Pointer + 3 * vectorSize), Vector256.Create((byte)';'));
425-
mask = matches.ExtractMostSignificantBits();
426-
idx = 3 * vectorSize + (uint)BitOperations.TrailingZeroCount(mask);
427-
value = ParseInt(remaining.Pointer, idx, out idx1);
428-
}
429-
}
430-
}
447+
nuint idx0 = chunk0.IndexOfSemicolon();
448+
nuint idx1 = chunk1.IndexOfSemicolon();
449+
nuint idx2 = chunk2.IndexOfSemicolon();
431450

432-
result.GetValueRefOrAddDefault(new Utf8Span(remaining.Pointer, idx)).Apply(value);
433-
434-
DONE:
435-
remaining = remaining.SliceUnsafe(idx1);
451+
nint value0 = chunk0.ParseInt(idx0, out var nextStart0);
452+
nint value1 = chunk1.ParseInt(idx1, out var nextStart1);
453+
nint value2 = chunk2.ParseInt(idx2, out var nextStart2);
454+
455+
result.Update(new Utf8Span(chunk0.Pointer, idx0), value0);
456+
result.Update(new Utf8Span(chunk1.Pointer, idx1), value1);
457+
result.Update(new Utf8Span(chunk2.Pointer, idx2), value2);
458+
459+
chunk0 = chunk0.SliceUnsafe(nextStart0);
460+
chunk1 = chunk1.SliceUnsafe(nextStart1);
461+
chunk2 = chunk2.SliceUnsafe(nextStart2);
436462
}
437463

438-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
439-
static nint ParseInt(byte* ptr, nuint start, out nuint lfIndex)
464+
ProcessSpan(result, chunk0);
465+
ProcessSpan(result, chunk1);
466+
ProcessSpan(result, chunk2);
467+
}
468+
469+
[MethodImpl(MethodImplOptions.NoInlining)]
470+
public static void ProcessSpanX4(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk)
471+
{
472+
nuint q = chunk.Length / 4;
473+
var one4 = q + (uint)chunk.SliceUnsafe(q).Span.IndexOf((byte)'\n') + 1;
474+
var two4 = q * 2 + (uint)chunk.SliceUnsafe(q * 2).Span.IndexOf((byte)'\n') + 1;
475+
var three4 = q * 3 + (uint)chunk.SliceUnsafe(q * 3).Span.IndexOf((byte)'\n') + 1;
476+
477+
var chunk0 = chunk.SliceUnsafe(0, (uint)one4);
478+
var chunk1 = chunk.SliceUnsafe((uint)one4, two4 - one4);
479+
var chunk2 = chunk.SliceUnsafe(two4, three4 - two4);
480+
var chunk3 = chunk.SliceUnsafe(three4);
481+
482+
Debug.Assert(chunk0.Length > 0);
483+
Debug.Assert(chunk1.Length > 0);
484+
Debug.Assert(chunk2.Length > 0);
485+
Debug.Assert(chunk3.Length > 0);
486+
Debug.Assert(chunk0.Length + chunk1.Length + chunk2.Length + chunk3.Length == chunk.Length);
487+
488+
ProcessSpan(result, chunk0, chunk1, chunk2, chunk3);
489+
}
490+
491+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
492+
public static unsafe void ProcessSpan(FixedDictionary<Utf8Span, Summary> result, Utf8Span chunk0, Utf8Span chunk1, Utf8Span chunk2, Utf8Span chunk3)
493+
{
494+
while (true)
440495
{
441-
const long DOT_BITS = 0x10101000;
442-
const long MAGIC_MULTIPLIER = (100 * 0x1000000 + 10 * 0x10000 + 1);
443-
444-
long word = *(long*)(ptr + start + 1);
445-
long inverted = ~word;
446-
int dot = BitOperations.TrailingZeroCount(inverted & DOT_BITS);
447-
long signed = (inverted << 59) >> 63;
448-
long mask = ~(signed & 0xFF);
449-
long digits = ((word & mask) << (28 - dot)) & 0x0F000F0F00L;
450-
long abs = ((digits * MAGIC_MULTIPLIER) >>> 32) & 0x3FF;
451-
var value = ((abs ^ signed) - signed);
452-
lfIndex = start + (uint)(dot >> 3) + 4u;
453-
return (nint)value;
496+
if (chunk0.Length <= 0)
497+
break;
498+
499+
if (chunk1.Length <= 0)
500+
break;
501+
502+
if (chunk2.Length <= 0)
503+
break;
504+
505+
if (chunk3.Length <= 0)
506+
break;
507+
508+
nuint idx0 = chunk0.IndexOfSemicolon();
509+
nuint idx1 = chunk1.IndexOfSemicolon();
510+
nuint idx2 = chunk2.IndexOfSemicolon();
511+
nuint idx3 = chunk3.IndexOfSemicolon();
512+
513+
nint value0 = chunk0.ParseInt(idx0, out var nextStart0);
514+
nint value1 = chunk1.ParseInt(idx1, out var nextStart1);
515+
nint value2 = chunk2.ParseInt(idx2, out var nextStart2);
516+
nint value3 = chunk3.ParseInt(idx3, out var nextStart3);
517+
518+
result.Update(new Utf8Span(chunk0.Pointer, idx0), value0);
519+
result.Update(new Utf8Span(chunk1.Pointer, idx1), value1);
520+
result.Update(new Utf8Span(chunk2.Pointer, idx2), value2);
521+
result.Update(new Utf8Span(chunk3.Pointer, idx3), value3);
522+
523+
chunk0 = chunk0.SliceUnsafe(nextStart0);
524+
chunk1 = chunk1.SliceUnsafe(nextStart1);
525+
chunk2 = chunk2.SliceUnsafe(nextStart2);
526+
chunk3 = chunk3.SliceUnsafe(nextStart3);
454527
}
528+
529+
ProcessSpan(result, chunk0);
530+
ProcessSpan(result, chunk1);
531+
ProcessSpan(result, chunk2);
532+
ProcessSpan(result, chunk3);
455533
}
456534

457535
public FixedDictionary<Utf8Span, Summary> Process()

1brc/Program.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ internal class Program
88
{
99
private static void Main(string[] args)
1010
{
11-
var path = args.Length > 0 ? args[0] : "D:/tmp/measurements_1B.txt";
11+
var path = args.Length > 0 ? args[0] : "D:/tmp/measurements_1B_10K.txt";
1212

1313
Console.OutputEncoding = Encoding.UTF8;
1414

@@ -20,7 +20,7 @@ private static void Main(string[] args)
2020

2121
private static void DoWork(string path)
2222
{
23-
using (var app = new App(path))
23+
using (var app = new App(path, processMode:ProcessMode.MmapSingleSharedPos))
2424
{
2525
var sw = Stopwatch.StartNew();
2626
app.PrintResult();

0 commit comments

Comments
 (0)