@@ -149,12 +149,13 @@ public override int GetHashCode()
149149 public override string ToString ( ) => new ( ( sbyte * ) Pointer , 0 , ( int ) Length , Encoding . UTF8 ) ;
150150
151151 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
152- public nint ParseIntBranchless( nuint start , out nuint lfIndex ) {
152+ public nint ParseInt( nuint start , out nuint lfIndex )
153+ {
153154 // I took it from artsiomkorzun, but he mentions merykitty, while noahfalk mentions RagnarGrootKoerkamp. The trace is lost
154-
155+
155156 const long DOT_BITS = 0x10101000 ;
156157 const long MAGIC_MULTIPLIER = ( 100 * 0x1000000 + 10 * 0x10000 + 1 ) ;
157-
158+
158159 long word = * ( long * ) ( Pointer + start + 1 ) ;
159160 long inverted = ~ word ;
160161 int dot = BitOperations . TrailingZeroCount ( inverted & DOT_BITS ) ;
@@ -166,83 +167,43 @@ public nint ParseIntBranchless(nuint start, out nuint lfIndex) {
166167 lfIndex = start + ( uint ) ( dot >> 3 ) + 4u ;
167168 return ( nint ) value ;
168169 }
169-
170- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
171- public nint ParseInt( nuint start , out nuint lfIndex )
172- {
173- var ptr = Pointer + start + 1 ;
174- int sign;
175-
176- if ( * ptr == ( byte ) '-' )
177- {
178- ptr ++ ;
179- sign = - 1 ;
180- lfIndex = start + 6 ;
181- }
182- else
183- {
184- sign = 1 ;
185- lfIndex = start + 5 ;
186- }
187-
188- if ( ptr [ 1 ] != '.' )
189- {
190- lfIndex ++ ;
191- return ( nint ) ( ptr [ 0 ] * 100u + ptr [ 1 ] * 10u + ptr [ 3 ] - '0' * 111u ) * sign ;
192- }
193-
194- return ( nint ) ( ptr [ 0 ] * 10u + ptr [ 2 ] - ( '0' * 11u ) ) * sign ;
195- }
196170
197- /// <summary>
198- /// Spec: Station name: non null UTF-8 string of min length 1 character and max length 100 bytes (i.e. this could be 100 one-byte characters, or 50 two-byte characters, etc.)
199- /// </summary>
200171 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
201172 internal nuint IndexOfSemicolon( )
202173 {
203174 const nuint vectorSize = 32 ;
204- // nuint start = 0; // it's consistently faster with this useless variable (non constant)
205175
206176 if ( Vector256 . IsHardwareAccelerated )
207177 {
208178 var sepVec = Vector256. Create ( ( byte ) ';' ) ;
209179
210180 var matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( Pointer ) , sepVec ) ;
211181 var mask = Vector256. ExtractMostSignificantBits ( matches ) ;
212- nuint tzc = ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
213-
214- if ( mask == 0 ) // For non-taken branches prefer placing them in a "leaf" instead of mask != 0, somewhere on GH they explain why, it would be nice to find.
215- tzc = IndexOfSemicolonCont( this ) ;
216-
217- return tzc;
182+ var idx = ( nuint ) BitOperations . TrailingZeroCount ( mask ) ;
218183
219- [ MethodImpl ( MethodImplOptions . NoInlining ) ]
220- static nuint IndexOfSemicolonCont( Utf8Span span )
184+ if ( mask == 0 ) // 32-63
221185 {
222- // A nicer version would be just a recursive call, even not here but above instead of this function.
223- // It's as fast for the default case and very close for 10K. Yet, this manually unrolled continuation is faster for 10K.
224- // return vectorSize + span.SliceUnsafe(vectorSize).IndexOfSemicolon();
225-
226- var sepVec = Vector256. Create ( ( byte ) ';' ) ;
227- var matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( span . Pointer + vectorSize ) , sepVec ) ;
228- var mask = Vector256. ExtractMostSignificantBits ( matches ) ;
229- var tzc = ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
230- if ( mask != 0 )
231- return vectorSize + tzc ;
232-
233- const nuint vectorSize2 = 2 * vectorSize ;
234- matches = Vector256 . Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( span . Pointer + vectorSize2 ) , sepVec ) ;
186+ matches = Vector256. Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( Pointer + vectorSize ) , sepVec ) ;
235187 mask = Vector256. ExtractMostSignificantBits ( matches ) ;
236- tzc = ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
237- if ( mask != 0 )
238- return vectorSize2 + tzc ;
239-
240- const nuint vectorSize3 = 3 * vectorSize ;
241- matches = Vector256 . Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( span . Pointer + vectorSize3 ) , sepVec ) ;
242- mask = Vector256 . ExtractMostSignificantBits ( matches ) ;
243- tzc = ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
244- return vectorSize3 + tzc ;
188+ idx = vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
189+
190+ if ( mask == 0 ) // 64-95
191+ {
192+ // const nuint vectorSize2 = 2 * vectorSize;
193+ matches = Vector256 . Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( Pointer + 2 * vectorSize ) , sepVec ) ;
194+ mask = Vector256. ExtractMostSignificantBits ( matches ) ;
195+ idx = 2 * vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
196+
197+ if ( mask == 0 ) // 96-127
198+ {
199+ matches = Vector256 . Equals ( Unsafe . ReadUnaligned < Vector256 < byte > > ( Pointer + 3 * vectorSize ) , sepVec ) ;
200+ mask = Vector256. ExtractMostSignificantBits ( matches ) ;
201+ idx = 3 * vectorSize + ( uint ) BitOperations . TrailingZeroCount ( mask ) ;
202+ }
203+ }
245204 }
205+
206+ return idx;
246207 }
247208
248209 return IndexOf( 0 , ( byte ) ';' ) ;
0 commit comments