@@ -2852,41 +2852,32 @@ class String
2852
2852
end
2853
2853
2854
2854
count = 0
2855
- match_offset = 0
2856
- slice_offset = 0
2857
- last_slice_offset = 0
2855
+ match_offset = slice_offset = 0
2858
2856
2859
2857
while match = separator.match_at_byte_index(self , match_offset)
2860
2858
index = match.byte_begin(0 )
2861
- slice_size = index - slice_offset
2862
2859
match_bytesize = match[0 ].bytesize
2860
+ next_offset = index + match_bytesize
2863
2861
2864
- if slice_offset == 0 && slice_size == 0 && match_bytesize == 0
2865
- # Skip
2866
- elsif slice_offset == bytesize && slice_size == 0
2867
- yield byte_slice(last_slice_offset)
2862
+ if next_offset == slice_offset
2863
+ match_offset = next_offset + char_bytesize_at(next_offset)
2868
2864
else
2865
+ slice_size = index - slice_offset
2866
+
2869
2867
yield byte_slice(slice_offset, slice_size)
2870
- end
2871
- count += 1
2868
+ count += 1
2872
2869
2873
- 1 .upto(match.size) do |i |
2874
- if group = match[i]?
2875
- yield group
2870
+ 1 .upto(match.size) do |i |
2871
+ if group = match[i]?
2872
+ yield group
2873
+ end
2876
2874
end
2877
- end
2878
-
2879
- last_slice_offset = slice_offset
2880
2875
2881
- if match_bytesize == 0
2882
- match_offset = index + 1
2883
- slice_offset = index
2884
- else
2885
- match_offset = index + match_bytesize
2886
- slice_offset = match_offset
2876
+ slice_offset = match_offset = next_offset
2887
2877
end
2878
+
2888
2879
break if limit && count + 1 == limit
2889
- break if slice_offset > bytesize
2880
+ break if match_offset >= bytesize
2890
2881
end
2891
2882
2892
2883
yield byte_slice(slice_offset)
@@ -3208,7 +3199,7 @@ class String
3208
3199
$~ = match
3209
3200
yield match
3210
3201
match_bytesize = match[0 ].bytesize
3211
- break if match_bytesize == 0
3202
+ match_bytesize += 1 if match_bytesize == 0
3212
3203
byte_offset = index + match_bytesize
3213
3204
end
3214
3205
@@ -3574,6 +3565,19 @@ class String
3574
3565
@bytesize == size
3575
3566
end
3576
3567
3568
+ protected def char_bytesize_at (byte_index )
3569
+ case unsafe_byte_at(byte_index)
3570
+ when .< 0x80
3571
+ 1
3572
+ when .< 0xe0
3573
+ 2
3574
+ when .< 0xf0
3575
+ 3
3576
+ else
3577
+ 4
3578
+ end
3579
+ end
3580
+
3577
3581
protected def size_known?
3578
3582
@bytesize == 0 || @length > 0
3579
3583
end
@@ -3584,19 +3588,7 @@ class String
3584
3588
3585
3589
while byte_index < bytesize
3586
3590
yield byte_index, char_index
3587
-
3588
- c = to_unsafe[byte_index]
3589
-
3590
- if c < 0x80
3591
- byte_index += 1
3592
- elsif c < 0xe0
3593
- byte_index += 2
3594
- elsif c < 0xf0
3595
- byte_index += 3
3596
- else
3597
- byte_index += 4
3598
- end
3599
-
3591
+ byte_index += char_bytesize_at(byte_index)
3600
3592
char_index += 1
3601
3593
end
3602
3594
0 commit comments