@@ -240,7 +240,7 @@ public unsafe void ProcessChunkMmapSingleSharedPos(FixedDictionary<Utf8Span, Sum
240240#endif
241241
242242 if ( Vector256 . IsHardwareAccelerated )
243- ProcessSpan2 ( resultAcc , new Utf8Span ( ptr0 + start , ( uint ) ( end - start ) ) ) ;
243+ ProcessSpanX2 ( resultAcc , new Utf8Span ( ptr0 + start , ( uint ) ( end - start ) ) ) ;
244244 else
245245 ProcessSpan( resultAcc , new Utf8Span ( ptr0 + start , ( uint ) ( end - start ) ) ) ;
246246
@@ -265,7 +265,6 @@ public unsafe void ProcessChunkMmapViewPerChunkRandom(FixedDictionary<Utf8Span,
265265 var length0 = length / 2 + delta ;
266266 using ( var accessor = _mmf. CreateViewAccessor ( start , length0 + 1024 , MemoryMappedFileAccess . Read ) )
267267 {
268-
269268 byte * ptr = default ;
270269 accessor. SafeMemoryMappedViewHandle. AcquirePointer( ref ptr ) ;
271270 ptr += accessor . PointerOffset ;
@@ -368,90 +367,169 @@ public static unsafe void ProcessSpan(FixedDictionary<Utf8Span, Summary> result,
368367 while ( remaining . Length > 0 )
369368 {
370369 nuint idx = remaining. IndexOfSemicolon ( ) ;
371- nint value = remaining. ParseInt ( idx , out var idx1 ) ;
372- result. GetValueRefOrAddDefault ( new Utf8Span ( remaining . Pointer , idx ) ) . Apply ( value ) ;
373- remaining = remaining. SliceUnsafe ( idx1 ) ;
370+ nint value = remaining. ParseInt ( idx , out var nextStart ) ;
371+ result. Update ( new Utf8Span ( remaining . Pointer , idx ) , value ) ;
372+ remaining = remaining. SliceUnsafe ( nextStart ) ;
373+ }
374+ }
375+
376+ [ MethodImpl ( MethodImplOptions . NoInlining ) ]
377+ public static void ProcessSpanX2( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk )
378+ {
379+ nuint middle = chunk. Length / 2 ;
380+ middle += ( uint ) chunk . SliceUnsafe ( middle ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
381+ var chunk0 = chunk. SliceUnsafe ( 0 , ( uint ) middle ) ;
382+ var chunk1 = chunk. SliceUnsafe ( ( uint ) middle ) ;
383+ ProcessSpan( result , chunk0 , chunk1 ) ;
384+ }
385+
386+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
387+ public static unsafe void ProcessSpan( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk0 , Utf8Span chunk1 )
388+ {
389+ while ( true )
390+ {
391+ if ( chunk0 . Length <= 0 )
392+ break;
393+
394+ if ( chunk1 . Length <= 0 )
395+ break;
396+
397+ nuint idx0 = chunk0. IndexOfSemicolon ( ) ;
398+ nuint idx1 = chunk1. IndexOfSemicolon ( ) ;
399+
400+ nint value0 = chunk0. ParseInt ( idx0 , out var nextStart0 ) ;
401+ nint value1 = chunk1. ParseInt ( idx1 , out var nextStart1 ) ;
402+
403+ result. Update ( new Utf8Span ( chunk0 . Pointer , idx0 ) , value0 ) ;
404+ result. Update ( new Utf8Span ( chunk1 . Pointer , idx1 ) , value1 ) ;
405+
406+ chunk0 = chunk0. SliceUnsafe ( nextStart0 ) ;
407+ chunk1 = chunk1. SliceUnsafe ( nextStart1 ) ;
374408 }
409+
410+ ProcessSpan( result , chunk0 ) ;
411+ ProcessSpan( result , chunk1 ) ;
375412 }
376413
377414 [ MethodImpl ( MethodImplOptions . NoInlining ) ]
378- public static unsafe void ProcessSpan2 ( FixedDictionary < Utf8Span , Summary > result , Utf8Span remaining )
415+ public static void ProcessSpanX3 ( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk )
379416 {
380- Debug. Assert ( Vector256 . IsHardwareAccelerated ) ;
417+ nuint third = chunk. Length / 3 ;
418+ var oneThird = third + ( uint ) chunk . SliceUnsafe ( third ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
419+ var twoThirds = third * 2 + ( uint ) chunk . SliceUnsafe ( third * 2 ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
420+
421+ var chunk0 = chunk. SliceUnsafe( 0 , ( uint ) oneThird ) ;
422+ var chunk1 = chunk. SliceUnsafe( ( uint ) oneThird, twoThirds - oneThird ) ;
423+ var chunk2 = chunk . SliceUnsafe ( twoThirds ) ;
381424
382- const nuint vectorSize = 32 ;
425+ Debug. Assert ( chunk0 . Length > 0 ) ;
426+ Debug. Assert ( chunk1 . Length > 0 ) ;
427+ Debug. Assert ( chunk2 . Length > 0 ) ;
428+ Debug. Assert ( chunk0 . Length + chunk1 . Length + chunk2 . Length == chunk . Length ) ;
383429
430+ ProcessSpan( result , chunk0 , chunk1 , chunk2 ) ;
431+ }
432+
433+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
434+ public static unsafe void ProcessSpan( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk0 , Utf8Span chunk1 , Utf8Span chunk2 )
435+ {
384436 while ( true )
385437 {
386- if ( remaining . Length <= 0 )
438+ if ( chunk0 . Length <= 0 )
387439 break;
388440
389- nuint idx;
390- nuint idx1;
391- nint value;
392- var matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( remaining . Pointer ) , Vector256 . Create ( ( byte ) ';' ) ) ;
393- var mask = matches. ExtractMostSignificantBits ( ) ;
441+ if ( chunk1 . Length <= 0 )
442+ break;
394443
395- if ( mask != 0 )
396- {
397- idx = ( nuint ) BitOperations . TrailingZeroCount ( mask ) ;
398- value = ParseInt( remaining . Pointer , idx , out idx1 ) ;
399- if ( result . TryUpdate ( new Utf8Span ( remaining . Pointer , idx ) , value ) )
400- goto DONE;
401- }
402- else // 32-63
403- {
404- matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( remaining . Pointer + vectorSize ) , Vector256 . Create ( ( byte ) ';' ) ) ;
405- mask = matches. ExtractMostSignificantBits ( ) ;
444+ if ( chunk2 . Length <= 0 )
445+ break;
406446
407- if ( mask != 0 ) // 64-95
408- {
409- idx = vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
410- value = ParseInt ( remaining . Pointer , idx , out idx1 ) ;
411- }
412- else
413- {
414- matches = Vector256 . Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( remaining . Pointer + 2 * vectorSize ) , Vector256 . Create ( ( byte ) ';' ) ) ;
415- mask = matches. ExtractMostSignificantBits ( ) ;
416-
417- if ( mask != 0 ) // 96-127
418- {
419- idx = 2 * vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
420- value = ParseInt ( remaining . Pointer , idx , out idx1 ) ;
421- }
422- else
423- {
424- matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( remaining . Pointer + 3 * vectorSize ) , Vector256 . Create ( ( byte ) ';' ) ) ;
425- mask = matches. ExtractMostSignificantBits ( ) ;
426- idx = 3 * vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
427- value = ParseInt ( remaining . Pointer , idx , out idx1 ) ;
428- }
429- }
430- }
447+ nuint idx0 = chunk0. IndexOfSemicolon ( ) ;
448+ nuint idx1 = chunk1. IndexOfSemicolon ( ) ;
449+ nuint idx2 = chunk2. IndexOfSemicolon ( ) ;
431450
432- result . GetValueRefOrAddDefault ( new Utf8Span ( remaining . Pointer , idx ) ) . Apply ( value ) ;
433-
434- DONE :
435- remaining = remaining . SliceUnsafe ( idx1 ) ;
451+ nint value0 = chunk0. ParseInt ( idx0 , out var nextStart0 ) ;
452+ nint value1 = chunk1. ParseInt ( idx1 , out var nextStart1 ) ;
453+ nint value2 = chunk2. ParseInt ( idx2 , out var nextStart2 ) ;
454+
455+ result. Update ( new Utf8Span ( chunk0 . Pointer , idx0 ) , value0 ) ;
456+ result. Update ( new Utf8Span ( chunk1 . Pointer , idx1 ) , value1 ) ;
457+ result. Update ( new Utf8Span ( chunk2 . Pointer , idx2 ) , value2 ) ;
458+
459+ chunk0 = chunk0. SliceUnsafe ( nextStart0 ) ;
460+ chunk1 = chunk1. SliceUnsafe ( nextStart1 ) ;
461+ chunk2 = chunk2. SliceUnsafe ( nextStart2 ) ;
436462 }
437463
438- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
439- static nint ParseInt ( byte * ptr , nuint start , out nuint lfIndex )
464+ ProcessSpan( result , chunk0 ) ;
465+ ProcessSpan( result , chunk1 ) ;
466+ ProcessSpan( result , chunk2 ) ;
467+ }
468+
469+ [ MethodImpl ( MethodImplOptions . NoInlining ) ]
470+ public static void ProcessSpanX4( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk )
471+ {
472+ nuint q = chunk. Length / 4 ;
473+ var one4 = q + ( uint ) chunk . SliceUnsafe ( q ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
474+ var two4 = q * 2 + ( uint ) chunk . SliceUnsafe ( q * 2 ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
475+ var three4 = q * 3 + ( uint ) chunk . SliceUnsafe ( q * 3 ) . Span . IndexOf ( ( byte ) '\n ' ) + 1 ;
476+
477+ var chunk0 = chunk. SliceUnsafe( 0 , ( uint ) one4 ) ;
478+ var chunk1 = chunk. SliceUnsafe( ( uint ) one4, two4 - one4 ) ;
479+ var chunk2 = chunk . SliceUnsafe ( two4 , three4 - two4 ) ;
480+ var chunk3 = chunk. SliceUnsafe( three4 ) ;
481+
482+ Debug . Assert ( chunk0 . Length > 0 ) ;
483+ Debug . Assert ( chunk1 . Length > 0 ) ;
484+ Debug . Assert ( chunk2 . Length > 0 ) ;
485+ Debug. Assert ( chunk3 . Length > 0 ) ;
486+ Debug. Assert ( chunk0 . Length + chunk1 . Length + chunk2 . Length + chunk3 . Length == chunk . Length ) ;
487+
488+ ProcessSpan( result , chunk0 , chunk1 , chunk2 , chunk3 ) ;
489+ }
490+
491+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ]
492+ public static unsafe void ProcessSpan( FixedDictionary < Utf8Span , Summary > result , Utf8Span chunk0 , Utf8Span chunk1 , Utf8Span chunk2 , Utf8Span chunk3 )
493+ {
494+ while ( true )
440495 {
441- const long DOT_BITS = 0x10101000 ;
442- const long MAGIC_MULTIPLIER = ( 100 * 0x1000000 + 10 * 0x10000 + 1 ) ;
443-
444- long word = * ( long * ) ( ptr + start + 1 ) ;
445- long inverted = ~ word ;
446- int dot = BitOperations. TrailingZeroCount( inverted & DOT_BITS ) ;
447- long signed = ( inverted < < 59 ) >> 63 ;
448- long mask = ~ ( signed & 0xFF ) ;
449- long digits = ( ( word & mask ) << ( 28 - dot ) ) & 0x0F000F0F00L ;
450- long abs = ( ( digits * MAGIC_MULTIPLIER ) >>> 32 ) & 0x3FF ;
451- var value = ( ( abs ^ signed ) - signed ) ;
452- lfIndex = start + ( uint ) ( dot >> 3 ) + 4u ;
453- return ( nint ) value ;
496+ if ( chunk0 . Length <= 0 )
497+ break;
498+
499+ if ( chunk1 . Length <= 0 )
500+ break;
501+
502+ if ( chunk2 . Length <= 0 )
503+ break;
504+
505+ if ( chunk3 . Length <= 0 )
506+ break;
507+
508+ nuint idx0 = chunk0. IndexOfSemicolon ( ) ;
509+ nuint idx1 = chunk1. IndexOfSemicolon ( ) ;
510+ nuint idx2 = chunk2. IndexOfSemicolon ( ) ;
511+ nuint idx3 = chunk3. IndexOfSemicolon ( ) ;
512+
513+ nint value0 = chunk0. ParseInt ( idx0 , out var nextStart0 ) ;
514+ nint value1 = chunk1. ParseInt ( idx1 , out var nextStart1 ) ;
515+ nint value2 = chunk2. ParseInt ( idx2 , out var nextStart2 ) ;
516+ nint value3 = chunk3. ParseInt ( idx3 , out var nextStart3 ) ;
517+
518+ result. Update ( new Utf8Span ( chunk0 . Pointer , idx0 ) , value0 ) ;
519+ result. Update ( new Utf8Span ( chunk1 . Pointer , idx1 ) , value1 ) ;
520+ result. Update ( new Utf8Span ( chunk2 . Pointer , idx2 ) , value2 ) ;
521+ result. Update ( new Utf8Span ( chunk3 . Pointer , idx3 ) , value3 ) ;
522+
523+ chunk0 = chunk0. SliceUnsafe ( nextStart0 ) ;
524+ chunk1 = chunk1. SliceUnsafe ( nextStart1 ) ;
525+ chunk2 = chunk2. SliceUnsafe ( nextStart2 ) ;
526+ chunk3 = chunk3. SliceUnsafe ( nextStart3 ) ;
454527 }
528+
529+ ProcessSpan( result , chunk0 ) ;
530+ ProcessSpan( result , chunk1 ) ;
531+ ProcessSpan( result , chunk2 ) ;
532+ ProcessSpan( result , chunk3 ) ;
455533 }
456534
457535 public FixedDictionary< Utf8Span , Summary> Process ( )
0 commit comments