-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
string.dart
838 lines (784 loc) · 28.7 KB
/
string.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.core;
/**
* A sequence of UTF-16 code units.
*
* Strings are mainly used to represent text. A character may be represented by
* multiple code points, each code point consisting of one or two code
* units. For example the Papua New Guinea flag character requires four code
* units to represent two code points, but should be treated like a single
* character: "🇵🇬". Platforms that do not support the flag character may show
* the letters "PG" instead. If the code points are swapped, it instead becomes
* the Guadeloupe flag "🇬🇵" ("GP").
*
* A string can be either single or multiline. Single line strings are
* written using matching single or double quotes, and multiline strings are
* written using triple quotes. The following are all valid Dart strings:
*
* 'Single quotes';
* "Double quotes";
* 'Double quotes in "single" quotes';
* "Single quotes in 'double' quotes";
*
* '''A
* multiline
* string''';
*
* """
* Another
* multiline
* string""";
*
* Strings are immutable. Although you cannot change a string, you can perform
* an operation on a string and assign the result to a new string:
*
* var string = 'Dart is fun';
* var newString = string.substring(0, 5);
*
* You can use the plus (`+`) operator to concatenate strings:
*
* 'Dart ' + 'is ' + 'fun!'; // 'Dart is fun!'
*
* You can also use adjacent string literals for concatenation:
*
* 'Dart ' 'is ' 'fun!'; // 'Dart is fun!'
*
* You can use `${}` to interpolate the value of Dart expressions
* within strings. The curly braces can be omitted when evaluating identifiers:
*
* string = 'dartlang';
* '$string has ${string.length} letters'; // 'dartlang has 8 letters'
*
* A string is represented by a sequence of Unicode UTF-16 code units
* accessible through the [codeUnitAt] or the [codeUnits] members:
*
* string = 'Dart';
* string.codeUnitAt(0); // 68
* string.codeUnits; // [68, 97, 114, 116]
*
* The string representation of code units is accessible through the index
* operator:
*
* string[0]; // 'D'
*
* The characters of a string are encoded in UTF-16. Decoding UTF-16, which
* combines surrogate pairs, yields Unicode code points. Following a similar
* terminology to Go, we use the name 'rune' for an integer representing a
* Unicode code point. Use the [runes] property to get the runes of a string:
*
* string.runes.toList(); // [68, 97, 114, 116]
*
* For a character outside the Basic Multilingual Plane (plane 0) that is
* composed of a surrogate pair, [runes] combines the pair and returns a
* single integer. For example, the Unicode character for a
* musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate
* pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair,
* and using `runes` returns their combined value:
*
* var clef = '\u{1D11E}';
* clef.codeUnits; // [0xD834, 0xDD1E]
* clef.runes.toList(); // [0x1D11E]
*
* The String class can not be extended or implemented. Attempting to do so
* yields a compile-time error.
*
* ## Other resources
*
* See [StringBuffer] to efficiently build a string incrementally. See
* [RegExp] to work with regular expressions.
*
* Also see:
*
* * [Strings and regular expressions](https://dart.dev/guides/libraries/library-tour#strings-and-regular-expressions)
*/
@pragma('vm:entry-point')
abstract class String implements Comparable<String>, Pattern {
/**
* Allocates a new String for the specified [charCodes].
*
* The [charCodes] can be UTF-16 code units or runes. If a char-code value is
* 16-bit, it is copied verbatim:
*
* new String.fromCharCodes([68]); // 'D'
*
* If a char-code value is greater than 16-bits, it is decomposed into a
* surrogate pair:
*
* var clef = new String.fromCharCodes([0x1D11E]);
* clef.codeUnitAt(0); // 0xD834
* clef.codeUnitAt(1); // 0xDD1E
*
* If [start] and [end] is provided, only the values of [charCodes]
* at positions from `start` to, but not including, `end`, are used.
* The `start` and `end` values must satisfy
* `0 <= start <= end <= charCodes.length`.
*/
external factory String.fromCharCodes(Iterable<int> charCodes,
[int start = 0, int? end]);
/**
* Allocates a new String for the specified [charCode].
*
* If the [charCode] can be represented by a single UTF-16 code unit, the new
* string contains a single code unit. Otherwise, the [length] is 2 and
* the code units form a surrogate pair. See documentation for
* [fromCharCodes].
*
* Creating a [String] with one half of a surrogate pair is allowed.
*/
external factory String.fromCharCode(int charCode);
/**
* Returns the string value of the environment declaration [name].
*
* Environment declarations are provided by the surrounding system compiling
* or running the Dart program. Declarations map a string key to a string
* value.
*
* If [name] is not declared in the environment, the result is instead
* [defaultValue].
*
* Example of getting a value:
* ```
* const String.fromEnvironment("defaultFloo", defaultValue: "no floo")
* ```
* In order to check whether a declaration is there at all, use
* [bool.hasEnvironment]. Example:
* ```
* const maybeDeclared = bool.hasEnvironment("maybeDeclared")
* ? String.fromEnvironment("maybeDeclared")
* : null;
* ```
*/
// The .fromEnvironment() constructors are special in that we do not want
// users to call them using "new". We prohibit that by giving them bodies
// that throw, even though const constructors are not allowed to have bodies.
// Disable those static errors.
//ignore: const_constructor_with_body
//ignore: const_factory
external const factory String.fromEnvironment(String name,
{String defaultValue = ""});
/**
* Gets the character (as a single-code-unit [String]) at the given [index].
*
* The returned string represents exactly one UTF-16 code unit, which may be
* half of a surrogate pair. A single member of a surrogate pair is an
* invalid UTF-16 string:
*
* var clef = '\u{1D11E}';
* // These represent invalid UTF-16 strings.
* clef[0].codeUnits; // [0xD834]
* clef[1].codeUnits; // [0xDD1E]
*
* This method is equivalent to
* `new String.fromCharCode(this.codeUnitAt(index))`.
*/
String operator [](int index);
/**
* Returns the 16-bit UTF-16 code unit at the given [index].
*/
int codeUnitAt(int index);
/**
* The length of the string.
*
* Returns the number of UTF-16 code units in this string. The number
* of [runes] might be fewer, if the string contains characters outside
* the Basic Multilingual Plane (plane 0):
*
* 'Dart'.length; // 4
* 'Dart'.runes.length; // 4
*
* var clef = '\u{1D11E}';
* clef.length; // 2
* clef.runes.length; // 1
*/
int get length;
/**
* Returns a hash code derived from the code units of the string.
*
* This is compatible with [operator ==]. Strings with the same sequence
* of code units have the same hash code.
*/
int get hashCode;
/**
* Returns true if other is a `String` with the same sequence of code units.
*
* This method compares each individual code unit of the strings.
* It does not check for Unicode equivalence.
* For example, both the following strings represent the string 'Amélie',
* but due to their different encoding, are not equal:
*
* 'Am\xe9lie' == 'Ame\u{301}lie'; // false
*
* The first string encodes 'é' as a single unicode code unit (also
* a single rune), whereas the second string encodes it as 'e' with the
* combining accent character '◌́'.
*/
bool operator ==(Object other);
/**
* Compares this string to [other].
*
* Returns a negative value if `this` is ordered before `other`,
* a positive value if `this` is ordered after `other`,
* or zero if `this` and `other` are equivalent.
*
* The ordering is the same as the ordering of the code points at the first
* position where the two strings differ.
* If one string is a prefix of the other,
* then the shorter string is ordered before the longer string.
* If the strings have exactly the same content, they are equivalent with
* regard to the ordering.
* Ordering does not check for Unicode equivalence.
* The comparison is case sensitive.
*/
int compareTo(String other);
/**
* Returns true if this string ends with [other]. For example:
*
* 'Dart'.endsWith('t'); // true
*/
bool endsWith(String other);
/**
* Returns true if this string starts with a match of [pattern].
*
* var string = 'Dart';
* string.startsWith('D'); // true
* string.startsWith(new RegExp(r'[A-Z][a-z]')); // true
*
* If [index] is provided, this method checks if the substring starting
* at that index starts with a match of [pattern]:
*
* string.startsWith('art', 1); // true
* string.startsWith(new RegExp(r'\w{3}')); // true
*
* [index] must not be negative or greater than [length].
*
* A [RegExp] containing '^' does not match if the [index] is greater than
* zero. The pattern works on the string as a whole, and does not extract
* a substring starting at [index] first:
*
* string.startsWith(new RegExp(r'^art'), 1); // false
* string.startsWith(new RegExp(r'art'), 1); // true
*/
bool startsWith(Pattern pattern, [int index = 0]);
/**
* Returns the position of the first match of [pattern] in this string,
* starting at [start], inclusive:
*
* var string = 'Dartisans';
* string.indexOf('art'); // 1
* string.indexOf(new RegExp(r'[A-Z][a-z]')); // 0
*
* Returns -1 if no match is found:
*
* string.indexOf(new RegExp(r'dart')); // -1
*
* [start] must be non-negative and not greater than [length].
*/
int indexOf(Pattern pattern, [int start = 0]);
/**
* Returns the starting position of the last match [pattern] in this string,
* searching backward starting at [start], inclusive:
*
* var string = 'Dartisans';
* string.lastIndexOf('a'); // 6
* string.lastIndexOf(RegExp(r'a(r|n)')); // 6
*
* Returns -1 if [pattern] could not be found in this string.
*
* string.lastIndexOf(RegExp(r'DART')); // -1
*
* If [start] is omitted, search starts from the end of the string.
* If supplied, [start] must be non-negative and not greater than [length].
*/
int lastIndexOf(Pattern pattern, [int? start]);
/**
* Returns true if this string is empty.
*/
bool get isEmpty;
/**
* Returns true if this string is not empty.
*/
bool get isNotEmpty;
/**
* Creates a new string by concatenating this string with [other].
*
* 'dart' + 'lang'; // 'dartlang'
*/
String operator +(String other);
/**
* Returns the substring of this string that extends from [startIndex],
* inclusive, to [endIndex], exclusive.
*
* var string = 'dartlang';
* string.substring(1); // 'artlang'
* string.substring(1, 4); // 'art'
*/
String substring(int startIndex, [int? endIndex]);
/**
* Returns the string without any leading and trailing whitespace.
*
* If the string contains leading or trailing whitespace, a new string with no
* leading and no trailing whitespace is returned:
* ```dart
* '\tDart is fun\n'.trim(); // 'Dart is fun'
* ```
* Otherwise, the original string itself is returned:
* ```dart
* var str1 = 'Dart';
* var str2 = str1.trim();
* identical(str1, str2); // true
* ```
* Whitespace is defined by the Unicode White_Space property (as defined in
* version 6.2 or later) and the BOM character, 0xFEFF.
*
* Here is the list of trimmed characters according to Unicode version 6.3:
* ```
* 0009..000D ; White_Space # Cc <control-0009>..<control-000D>
* 0020 ; White_Space # Zs SPACE
* 0085 ; White_Space # Cc <control-0085>
* 00A0 ; White_Space # Zs NO-BREAK SPACE
* 1680 ; White_Space # Zs OGHAM SPACE MARK
* 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE
* 2028 ; White_Space # Zl LINE SEPARATOR
* 2029 ; White_Space # Zp PARAGRAPH SEPARATOR
* 202F ; White_Space # Zs NARROW NO-BREAK SPACE
* 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE
* 3000 ; White_Space # Zs IDEOGRAPHIC SPACE
*
* FEFF ; BOM ZERO WIDTH NO_BREAK SPACE
* ```
* Some later versions of Unicode do not include U+0085 as a whitespace
* character. Whether it is trimmed depends on the Unicode version
* used by the system.
*/
String trim();
/**
* Returns the string without any leading whitespace.
*
* As [trim], but only removes leading whitespace.
*/
String trimLeft();
/**
* Returns the string without any trailing whitespace.
*
* As [trim], but only removes trailing whitespace.
*/
String trimRight();
/**
* Creates a new string by concatenating this string with itself a number
* of times.
*
* The result of `str * n` is equivalent to
* `str + str + ...`(n times)`... + str`.
*
* Returns an empty string if [times] is zero or negative.
*/
String operator *(int times);
/**
* Pads this string on the left if it is shorter than [width].
*
* Return a new string that prepends [padding] onto this string
* one time for each position the length is less than [width].
*
* If [width] is already smaller than or equal to `this.length`,
* no padding is added. A negative `width` is treated as zero.
*
* If [padding] has length different from 1, the result will not
* have length `width`. This may be useful for cases where the
* padding is a longer string representing a single character, like
* `" "` or `"\u{10002}`".
* In that case, the user should make sure that `this.length` is
* the correct measure of the strings length.
*/
String padLeft(int width, [String padding = ' ']);
/**
* Pads this string on the right if it is shorter than [width].
*
* Return a new string that appends [padding] after this string
* one time for each position the length is less than [width].
*
* If [width] is already smaller than or equal to `this.length`,
* no padding is added. A negative `width` is treated as zero.
*
* If [padding] has length different from 1, the result will not
* have length `width`. This may be useful for cases where the
* padding is a longer string representing a single character, like
* `" "` or `"\u{10002}`".
* In that case, the user should make sure that `this.length` is
* the correct measure of the strings length.
*/
String padRight(int width, [String padding = ' ']);
/**
* Returns true if this string contains a match of [other]:
*
* var string = 'Dart strings';
* string.contains('D'); // true
* string.contains(new RegExp(r'[A-Z]')); // true
*
* If [startIndex] is provided, this method matches only at or after that
* index:
*
* string.contains('X', 1); // false
* string.contains(new RegExp(r'[A-Z]'), 1); // false
*
* [startIndex] must not be negative or greater than [length].
*/
bool contains(Pattern other, [int startIndex = 0]);
/**
* Returns a new string in which the first occurrence of [from] in this string
* is replaced with [to], starting from [startIndex]:
*
* '0.0001'.replaceFirst(new RegExp(r'0'), ''); // '.0001'
* '0.0001'.replaceFirst(new RegExp(r'0'), '7', 1); // '0.7001'
*/
String replaceFirst(Pattern from, String to, [int startIndex = 0]);
/**
* Replace the first occurrence of [from] in this string.
*
* Returns a new string, which is this string
* except that the first match of [from], starting from [startIndex],
* is replaced by the result of calling [replace] with the match object.
*
* The optional [startIndex] is by default set to 0. If provided, it must be
* an integer in the range `[0 .. len]`, where `len` is this string's length.
*/
String replaceFirstMapped(Pattern from, String replace(Match match),
[int startIndex = 0]);
/**
* Replaces all substrings that match [from] with [replace].
*
* Returns a new string in which the non-overlapping substrings matching
* [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
* by the literal string [replace].
*
* 'resume'.replaceAll(new RegExp(r'e'), 'é'); // 'résumé'
*
* Notice that the [replace] string is not interpreted. If the replacement
* depends on the match (for example on a [RegExp]'s capture groups), use
* the [replaceAllMapped] method instead.
*/
String replaceAll(Pattern from, String replace);
/**
* Replace all substrings that match [from] by a string computed from the
* match.
*
* Returns a new string in which the non-overlapping substrings that match
* [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
* by the result of calling [replace] on the corresponding [Match] object.
*
* This can be used to replace matches with new content that depends on the
* match, unlike [replaceAll] where the replacement string is always the same.
*
* The [replace] function is called with the [Match] generated
* by the pattern, and its result is used as replacement.
*
* The function defined below converts each word in a string to simplified
* 'pig latin' using [replaceAllMapped]:
*
* pigLatin(String words) => words.replaceAllMapped(
* new RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false),
* (Match m) => "${m[2]}${m[1]}${m[1].isEmpty ? 'way' : 'ay'}");
*
* pigLatin('I have a secret now!'); // 'Iway avehay away ecretsay ownay!'
*/
String replaceAllMapped(Pattern from, String Function(Match match) replace);
/**
* Replaces the substring from [start] to [end] with [replacement].
*
* Returns a new string equivalent to:
*
* this.substring(0, start) + replacement + this.substring(end)
*
* The [start] and [end] indices must specify a valid range of this string.
* That is `0 <= start <= end <= this.length`.
* If [end] is `null`, it defaults to [length].
*/
String replaceRange(int start, int? end, String replacement);
/**
* Splits the string at matches of [pattern] and returns a list of substrings.
*
* Finds all the matches of `pattern` in this string,
* and returns the list of the substrings between the matches.
*
* var string = "Hello world!";
* string.split(" "); // ['Hello', 'world!'];
*
* Empty matches at the beginning and end of the strings are ignored,
* and so are empty matches right after another match.
*
* var string = "abba";
* string.split(new RegExp(r"b*")); // ['a', 'a']
* // not ['', 'a', 'a', '']
*
* If this string is empty, the result is an empty list if `pattern` matches
* the empty string, and it is `[""]` if the pattern doesn't match.
*
* var string = '';
* string.split(''); // []
* string.split("a"); // ['']
*
* Splitting with an empty pattern splits the string into single-code unit
* strings.
*
* var string = 'Pub';
* string.split(''); // ['P', 'u', 'b']
*
* string.codeUnits.map((unit) {
* return new String.fromCharCode(unit);
* }).toList(); // ['P', 'u', 'b']
*
* Splitting happens at UTF-16 code unit boundaries,
* and not at rune boundaries:
*
* // String made up of two code units, but one rune.
* string = '\u{1D11E}';
* string.split('').length; // 2 surrogate values
*
* To get a list of strings containing the individual runes of a string,
* you should not use split. You can instead map each rune to a string
* as follows:
*
* string.runes.map((rune) => new String.fromCharCode(rune)).toList();
*/
List<String> split(Pattern pattern);
/**
* Splits the string, converts its parts, and combines them into a new
* string.
*
* [pattern] is used to split the string into parts and separating matches.
*
* Each match is converted to a string by calling [onMatch]. If [onMatch]
* is omitted, the matched string is used.
*
* Each non-matched part is converted by a call to [onNonMatch]. If
* [onNonMatch] is omitted, the non-matching part is used.
*
* Then all the converted parts are combined into the resulting string.
*
* 'Eats shoots leaves'.splitMapJoin((new RegExp(r'shoots')),
* onMatch: (m) => '${m.group(0)}',
* onNonMatch: (n) => '*'); // *shoots*
*/
String splitMapJoin(Pattern pattern,
{String Function(Match)? onMatch, String Function(String)? onNonMatch});
/**
* Returns an unmodifiable list of the UTF-16 code units of this string.
*/
List<int> get codeUnits;
/**
* Returns an [Iterable] of Unicode code-points of this string.
*
* If the string contains surrogate pairs, they are combined and returned
* as one integer by this iterator. Unmatched surrogate halves are treated
* like valid 16-bit code-units.
*/
Runes get runes;
/**
* Converts all characters in this string to lower case.
* If the string is already in all lower case, this method returns `this`.
*
* 'ALPHABET'.toLowerCase(); // 'alphabet'
* 'abc'.toLowerCase(); // 'abc'
*
* This function uses the language independent Unicode mapping and thus only
* works in some languages.
*/
// TODO(floitsch): document better. (See EcmaScript for description).
String toLowerCase();
/**
* Converts all characters in this string to upper case.
* If the string is already in all upper case, this method returns `this`.
*
* 'alphabet'.toUpperCase(); // 'ALPHABET'
* 'ABC'.toUpperCase(); // 'ABC'
*
* This function uses the language independent Unicode mapping and thus only
* works in some languages.
*/
// TODO(floitsch): document better. (See EcmaScript for description).
String toUpperCase();
}
/**
* The runes (integer Unicode code points) of a [String].
*/
class Runes extends Iterable<int> {
final String string;
Runes(this.string);
RuneIterator get iterator => RuneIterator(string);
int get last {
if (string.length == 0) {
throw StateError('No elements.');
}
int length = string.length;
int code = string.codeUnitAt(length - 1);
if (_isTrailSurrogate(code) && string.length > 1) {
int previousCode = string.codeUnitAt(length - 2);
if (_isLeadSurrogate(previousCode)) {
return _combineSurrogatePair(previousCode, code);
}
}
return code;
}
}
// Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate.
bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800;
// Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate.
bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00;
// Combine a lead and a trail surrogate value into a single code point.
int _combineSurrogatePair(int start, int end) {
return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF);
}
/**
* [Iterator] for reading runes (integer Unicode code points) of a Dart string.
*/
class RuneIterator implements BidirectionalIterator<int> {
/** String being iterated. */
final String string;
/** Position before the current code point. */
int _position;
/** Position after the current code point. */
int _nextPosition;
/**
* Current code point.
*
* If the iterator has hit either end, the [_currentCodePoint] is -1
* and `_position == _nextPosition`.
*/
int _currentCodePoint = -1;
/** Create an iterator positioned at the beginning of the string. */
RuneIterator(String string)
: this.string = string,
_position = 0,
_nextPosition = 0;
/**
* Create an iterator positioned before the [index]th code unit of the string.
*
* When created, there is no [current] value.
* A [moveNext] will use the rune starting at [index] the current value,
* and a [movePrevious] will use the rune ending just before [index] as the
* the current value.
*
* The [index] position must not be in the middle of a surrogate pair.
*/
RuneIterator.at(String string, int index)
: string = string,
_position = index,
_nextPosition = index {
RangeError.checkValueInInterval(index, 0, string.length);
_checkSplitSurrogate(index);
}
/** Throw an error if the index is in the middle of a surrogate pair. */
void _checkSplitSurrogate(int index) {
if (index > 0 &&
index < string.length &&
_isLeadSurrogate(string.codeUnitAt(index - 1)) &&
_isTrailSurrogate(string.codeUnitAt(index))) {
throw ArgumentError('Index inside surrogate pair: $index');
}
}
/**
* The starting position of the current rune in the string.
*
* Returns -1 if there is no current rune ([current] is -1).
*/
int get rawIndex => (_position != _nextPosition) ? _position : -1;
/**
* Resets the iterator to the rune at the specified index of the string.
*
* Setting a negative [rawIndex], or one greater than or equal to
* `string.length`, is an error. So is setting it in the middle of a surrogate
* pair.
*
* Setting the position to the end of then string means that there is no
* current rune.
*/
void set rawIndex(int rawIndex) {
RangeError.checkValidIndex(rawIndex, string, "rawIndex");
reset(rawIndex);
moveNext();
}
/**
* Resets the iterator to the given index into the string.
*
* After this the [current] value is unset.
* You must call [moveNext] make the rune at the position current,
* or [movePrevious] for the last rune before the position.
*
* The [rawIndex] must be non-negative and no greater than `string.length`.
* It must also not be the index of the trailing surrogate of a surrogate
* pair.
*/
void reset([int rawIndex = 0]) {
RangeError.checkValueInInterval(rawIndex, 0, string.length, "rawIndex");
_checkSplitSurrogate(rawIndex);
_position = _nextPosition = rawIndex;
_currentCodePoint = -1;
}
/**
* The rune (integer Unicode code point) starting at the current position in
* the string.
*
* The value is -1 if there is no current code point.
*/
int get current => _currentCodePoint;
/**
* The number of code units comprising the current rune.
*
* Returns zero if there is no current rune ([current] is -1).
*/
int get currentSize => _nextPosition - _position;
/**
* A string containing the current rune.
*
* For runes outside the basic multilingual plane, this will be
* a String of length 2, containing two code units.
*
* Returns an empty string if there is no [current] value.
*/
String get currentAsString {
if (_position == _nextPosition) return "";
if (_position + 1 == _nextPosition) return string[_position];
return string.substring(_position, _nextPosition);
}
bool moveNext() {
_position = _nextPosition;
if (_position == string.length) {
_currentCodePoint = -1;
return false;
}
int codeUnit = string.codeUnitAt(_position);
int nextPosition = _position + 1;
if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) {
int nextCodeUnit = string.codeUnitAt(nextPosition);
if (_isTrailSurrogate(nextCodeUnit)) {
_nextPosition = nextPosition + 1;
_currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit);
return true;
}
}
_nextPosition = nextPosition;
_currentCodePoint = codeUnit;
return true;
}
bool movePrevious() {
_nextPosition = _position;
if (_position == 0) {
_currentCodePoint = -1;
return false;
}
int position = _position - 1;
int codeUnit = string.codeUnitAt(position);
if (_isTrailSurrogate(codeUnit) && position > 0) {
int prevCodeUnit = string.codeUnitAt(position - 1);
if (_isLeadSurrogate(prevCodeUnit)) {
_position = position - 1;
_currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit);
return true;
}
}
_position = position;
_currentCodePoint = codeUnit;
return true;
}
}