@@ -1089,14 +1089,14 @@ public String Replace(String oldValue, String newValue)
10891089 return ReplaceInternal ( oldValue , newValue ) ;
10901090 }
10911091
1092- public unsafe String [ ] Split ( char separator , StringSplitOptions options = StringSplitOptions . None )
1092+ public String [ ] Split ( char separator , StringSplitOptions options = StringSplitOptions . None )
10931093 {
1094- return SplitInternal ( & separator , 1 , int . MaxValue , options ) ;
1094+ return SplitInternal ( new ReadOnlySpan < char > ( ref separator , 1 ) , int . MaxValue , options ) ;
10951095 }
10961096
1097- public unsafe String [ ] Split ( char separator , int count , StringSplitOptions options = StringSplitOptions . None )
1097+ public String [ ] Split ( char separator , int count , StringSplitOptions options = StringSplitOptions . None )
10981098 {
1099- return SplitInternal ( & separator , 1 , count , options ) ;
1099+ return SplitInternal ( new ReadOnlySpan < char > ( ref separator , 1 ) , count , options ) ;
11001100 }
11011101
11021102 // Creates an array of strings by splitting this string at each
@@ -1139,16 +1139,7 @@ public String[] Split(char[] separator, int count, StringSplitOptions options)
11391139 return SplitInternal ( separator , count , options ) ;
11401140 }
11411141
1142- private unsafe String [ ] SplitInternal ( char [ ] separator , int count , StringSplitOptions options )
1143- {
1144- fixed ( char * pSeparators = separator )
1145- {
1146- int separatorsLength = separator == null ? 0 : separator . Length ;
1147- return SplitInternal ( pSeparators , separatorsLength , count , options ) ;
1148- }
1149- }
1150-
1151- private unsafe String [ ] SplitInternal ( char * separators , int separatorsLength , int count , StringSplitOptions options )
1142+ private String [ ] SplitInternal ( ReadOnlySpan < char > separators , int count , StringSplitOptions options )
11521143 {
11531144 if ( count < 0 )
11541145 throw new ArgumentOutOfRangeException ( nameof ( count ) ,
@@ -1170,7 +1161,7 @@ private unsafe String[] SplitInternal(char* separators, int separatorsLength, in
11701161 }
11711162
11721163 int [ ] sepList = new int [ Length ] ;
1173- int numReplaces = MakeSeparatorList ( separators , separatorsLength , sepList ) ;
1164+ int numReplaces = MakeSeparatorList ( separators , sepList ) ;
11741165
11751166 // Handle the special case of no replaces.
11761167 if ( 0 == numReplaces )
@@ -1377,45 +1368,83 @@ private String[] SplitOmitEmptyEntries(Int32[] sepList, Int32[] lengthList, Int3
13771368 // Args: separator -- A string containing all of the split characters.
13781369 // sepList -- an array of ints for split char indicies.
13791370 //--------------------------------------------------------------------
1380- private unsafe int MakeSeparatorList ( char * separators , int separatorsLength , int [ ] sepList )
1371+ private int MakeSeparatorList ( ReadOnlySpan < char > separators , int [ ] sepList )
13811372 {
1382- Debug . Assert ( separatorsLength >= 0 , "separatorsLength >= 0" ) ;
13831373 int foundCount = 0 ;
1374+ char sep0 , sep1 , sep2 ;
13841375
1385- if ( separators == null || separatorsLength == 0 )
1376+ switch ( separators . Length )
13861377 {
1387- fixed ( char * pwzChars = & _firstChar )
1388- {
1389- //If they passed null or an empty string, look for whitespace.
1390- for ( int i = 0 ; i < Length && foundCount < sepList . Length ; i ++ )
1378+ // Special-case no separators to mean any whitespace is a separator.
1379+ case 0 :
1380+ for ( int i = 0 ; i < Length ; i ++ )
13911381 {
1392- if ( Char . IsWhiteSpace ( pwzChars [ i ] ) )
1382+ if ( char . IsWhiteSpace ( this [ i ] ) )
13931383 {
13941384 sepList [ foundCount ++ ] = i ;
13951385 }
13961386 }
1397- }
1398- }
1399- else
1400- {
1401- int sepListCount = sepList . Length ;
1402- //If they passed in a string of chars, actually look for those chars.
1403- fixed ( char * pwzChars = & _firstChar )
1404- {
1405- for ( int i = 0 ; i < Length && foundCount < sepListCount ; i ++ )
1387+ break ;
1388+
1389+ // Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator.
1390+ case 1 :
1391+ sep0 = separators [ 0 ] ;
1392+ for ( int i = 0 ; i < Length ; i ++ )
1393+ {
1394+ if ( this [ i ] == sep0 )
1395+ {
1396+ sepList [ foundCount ++ ] = i ;
1397+ }
1398+ }
1399+ break ;
1400+ case 2 :
1401+ sep0 = separators [ 0 ] ;
1402+ sep1 = separators [ 1 ] ;
1403+ for ( int i = 0 ; i < Length ; i ++ )
14061404 {
1407- char * pSep = separators ;
1408- for ( int j = 0 ; j < separatorsLength ; j ++ , pSep ++ )
1405+ char c = this [ i ] ;
1406+ if ( c == sep0 || c == sep1 )
14091407 {
1410- if ( pwzChars [ i ] == * pSep )
1408+ sepList [ foundCount ++ ] = i ;
1409+ }
1410+ }
1411+ break ;
1412+ case 3 :
1413+ sep0 = separators [ 0 ] ;
1414+ sep1 = separators [ 1 ] ;
1415+ sep2 = separators [ 2 ] ;
1416+ for ( int i = 0 ; i < Length ; i ++ )
1417+ {
1418+ char c = this [ i ] ;
1419+ if ( c == sep0 || c == sep1 || c == sep2 )
1420+ {
1421+ sepList [ foundCount ++ ] = i ;
1422+ }
1423+ }
1424+ break ;
1425+
1426+ // Handle > 3 separators with a probabilistic map, ala IndexOfAny.
1427+ // This optimizes for chars being unlikely to match a separator.
1428+ default :
1429+ unsafe
1430+ {
1431+ ProbabilisticMap map = default ;
1432+ uint * charMap = ( uint * ) & map ;
1433+ InitializeProbabilisticMap ( charMap , separators ) ;
1434+
1435+ for ( int i = 0 ; i < Length ; i ++ )
1436+ {
1437+ char c = this [ i ] ;
1438+ if ( IsCharBitSet ( charMap , ( byte ) c ) && IsCharBitSet ( charMap , ( byte ) ( c >> 8 ) ) &&
1439+ separators . Contains ( c ) )
14111440 {
14121441 sepList [ foundCount ++ ] = i ;
1413- break ;
14141442 }
14151443 }
14161444 }
1417- }
1445+ break ;
14181446 }
1447+
14191448 return foundCount ;
14201449 }
14211450
0 commit comments