-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
string.dart
966 lines (908 loc) · 36.2 KB
/
string.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.core;
/// A sequence of UTF-16 code units.
///
/// Strings are mainly used to represent text. A character may be represented by
/// multiple code points, each code point consisting of one or two code
/// units. For example, the Papua New Guinea flag character requires four code
/// units to represent two code points, but should be treated like a single
/// character: "🇵🇬". Platforms that do not support the flag character may show
/// the letters "PG" instead. If the code points are swapped, it instead becomes
/// the Guadeloupe flag "🇬🇵" ("GP").
///
/// A string can be either single or multiline. Single line strings are
/// written using matching single or double quotes, and multiline strings are
/// written using triple quotes. The following are all valid Dart strings:
/// ```dart
/// 'Single quotes';
/// "Double quotes";
/// 'Double quotes in "single" quotes';
/// "Single quotes in 'double' quotes";
///
/// '''A
/// multiline
/// string''';
///
/// """
/// Another
/// multiline
/// string""";
/// ```
/// Strings are immutable. Although you cannot change a string, you can perform
/// an operation on a string which creates a new string:
/// ```dart
/// const string = 'Dart is fun';
/// print(string.substring(0, 4)); // 'Dart'
/// ```
/// You can use the plus (`+`) operator to concatenate strings:
/// ```dart
/// const string = 'Dart ' + 'is ' + 'fun!';
/// print(string); // 'Dart is fun!'
/// ```
/// Adjacent string literals are concatenated automatically:
/// ```dart
/// const string = 'Dart ' 'is ' 'fun!';
/// print(string); // 'Dart is fun!'
/// ```
/// You can use `${}` to interpolate the value of Dart expressions
/// within strings. The curly braces can be omitted when evaluating identifiers:
/// ```dart
/// const string = 'dartlang';
/// print('$string has ${string.length} letters'); // dartlang has 8 letters
/// ```
/// A string is represented by a sequence of Unicode UTF-16 code units
/// accessible through the [codeUnitAt] or the [codeUnits] members:
/// ```dart
/// const string = 'Dart';
/// final firstCodeUnit = string.codeUnitAt(0);
/// print(firstCodeUnit); // 68, aka U+0044, the code point for 'D'.
/// final allCodeUnits = string.codeUnits;
/// print(allCodeUnits); // [68, 97, 114, 116]
/// ```
/// A string representation of the individual code units is accessible through
/// the index operator:
/// ```dart
/// const string = 'Dart';
/// final charAtIndex = string[0];
/// print(charAtIndex); // 'D'
/// ```
/// The characters of a string are encoded in UTF-16. Decoding UTF-16, which
/// combines surrogate pairs, yields Unicode code points. Following a similar
/// terminology to Go, Dart uses the name 'rune' for an integer representing a
/// Unicode code point. Use the [runes] property to get the runes of a string:
/// ```dart
/// const string = 'Dart';
/// final runes = string.runes.toList();
/// print(runes); // [68, 97, 114, 116]
/// ```
/// For a character outside the Basic Multilingual Plane (plane 0) that is
/// composed of a surrogate pair, [runes] combines the pair and returns a
/// single integer. For example, the Unicode character for a
/// musical G-clef ('𝄞') with rune value 0x1D11E consists of a UTF-16 surrogate
/// pair: `0xD834` and `0xDD1E`. Using [codeUnits] returns the surrogate pair,
/// and using `runes` returns their combined value:
/// ```dart
/// const clef = '\u{1D11E}';
/// for (final item in clef.codeUnits) {
/// print(item.toRadixString(16));
/// // d834
/// // dd1e
/// }
/// for (final item in clef.runes) {
/// print(item.toRadixString(16)); // 1d11e
/// }
/// ```
/// The `String` class cannot be extended or implemented. Attempting to do so
/// yields a compile-time error.
///
/// ## Other resources
///
/// * [StringBuffer] to efficiently build a string incrementally.
/// * [RegExp] to work with regular expressions.
/// * [Strings and regular expressions](https://dart.dev/guides/libraries/library-tour#strings-and-regular-expressions)
@pragma('vm:entry-point')
abstract final class String implements Comparable<String>, Pattern {
/// Allocates a new string containing the specified [charCodes].
///
/// The [charCodes] can be both UTF-16 code units and runes.
/// If a char-code value is 16-bit, it is used as a code unit:
/// ```dart
/// final string = String.fromCharCodes([68]);
/// print(string); // D
/// ```
/// If a char-code value is greater than 16-bits, it is decomposed into a
/// surrogate pair:
/// ```dart
/// final clef = String.fromCharCodes([0x1D11E]);
/// clef.codeUnitAt(0); // 0xD834
/// clef.codeUnitAt(1); // 0xDD1E
/// ```
/// If [start] and [end] are provided, only the values of [charCodes]
/// at positions from `start` to, but not including, `end`, are used.
/// The `start` and `end` values must satisfy `0 <= start <= end`.
/// If [start] is omitted, it defaults to zero, the start of [charCodes],
/// and if [end] is omitted, all char-codes after [start] are included.
/// If [charCodes] does not have [end], or even [start], elements,
/// the specified char-codes may be shorter than `end - start`, or even empty.
external factory String.fromCharCodes(Iterable<int> charCodes,
[int start = 0, int? end]);
/// Allocates a new string containing the specified [charCode].
///
/// If the [charCode] can be represented by a single UTF-16 code unit, the new
/// string contains a single code unit. Otherwise, the [length] is 2 and
/// the code units form a surrogate pair. See documentation for
/// [fromCharCodes].
///
/// Creating a [String] with one half of a surrogate pair is allowed.
external factory String.fromCharCode(int charCode);
/// Value for [name] in the compilation configuration environment declaration.
///
/// The compilation configuration environment is provided by the
/// surrounding tools which are compiling or running the Dart program.
/// The environment is a mapping from a set of string keys to their associated
/// string value.
/// The string value, or lack of a value, associated with a [name]
/// must be consistent across all calls to `String.fromEnvironment`,
/// [int.fromEnvironment], [bool.fromEnvironment] and [bool.hasEnvironment]
/// in a single program.
///
/// The result of invoking this constructor is the string associated with
/// the key [name]. If no value is associated with [name],
/// the result is instead the [defaultValue] string, which defaults to
/// the empty string.
///
/// Example of looking up a value:
/// ```dart
/// const String.fromEnvironment("defaultFloo", defaultValue: "no floo")
/// ```
/// In order to check whether a value is there at all, use
/// [bool.hasEnvironment]. Example:
/// ```dart
/// const maybeDeclared = bool.hasEnvironment("maybeDeclared")
/// ? String.fromEnvironment("maybeDeclared")
/// : null;
/// ```
///
/// The string value, or lack of a value, associated with a [name]
/// must be consistent across all calls to `String.fromEnvironment`,
/// [int.fromEnvironment], [bool.fromEnvironment] and [bool.hasEnvironment]
/// in a single program.
///
/// This constructor is only guaranteed to work when invoked as `const`.
/// It may work as a non-constant invocation on some platforms which
/// have access to compiler options at run-time, but most ahead-of-time
/// compiled platforms will not have this information.
///
/// The compilation configuration environment is not the same as the
/// environment variables of a POSIX process. Those can be accessed on
/// native platforms using `Platform.environment` from the `dart:io` library.
external const factory String.fromEnvironment(String name,
{String defaultValue = ""});
/// The character (as a single-code-unit [String]) at the given [index].
///
/// The returned string represents exactly one UTF-16 code unit, which may be
/// half of a surrogate pair. A single member of a surrogate pair is an
/// invalid UTF-16 string:
/// ```dart
/// var clef = '\u{1D11E}';
/// // These represent invalid UTF-16 strings.
/// clef[0].codeUnits; // [0xD834]
/// clef[1].codeUnits; // [0xDD1E]
/// ```
/// This method is equivalent to
/// `String.fromCharCode(this.codeUnitAt(index))`.
String operator [](int index);
/// Returns the 16-bit UTF-16 code unit at the given [index].
int codeUnitAt(int index);
/// The length of the string.
///
/// Returns the number of UTF-16 code units in this string. The number
/// of [runes] might be fewer if the string contains characters outside
/// the Basic Multilingual Plane (plane 0):
/// ```dart
/// 'Dart'.length; // 4
/// 'Dart'.runes.length; // 4
///
/// var clef = '\u{1D11E}';
/// clef.length; // 2
/// clef.runes.length; // 1
/// ```
int get length;
/// A hash code derived from the code units of the string.
///
/// This is compatible with [operator ==]. Strings with the same sequence
/// of code units have the same hash code.
int get hashCode;
/// Whether [other] is a `String` with the same sequence of code units.
///
/// This method compares each individual code unit of the strings.
/// It does not check for Unicode equivalence.
/// For example, both the following strings represent the string 'Amélie',
/// but due to their different encoding, are not equal:
/// ```dart
/// 'Am\xe9lie' == 'Ame\u{301}lie'; // false
/// ```
/// The first string encodes 'é' as a single unicode code unit (also
/// a single rune), whereas the second string encodes it as 'e' with the
/// combining accent character '◌́'.
bool operator ==(Object other);
/// Compares this string to [other].
///
/// Returns a negative value if `this` is ordered before `other`,
/// a positive value if `this` is ordered after `other`,
/// or zero if `this` and `other` are equivalent.
///
/// The ordering is the same as the ordering of the code units at the first
/// position where the two strings differ.
/// If one string is a prefix of the other,
/// then the shorter string is ordered before the longer string.
/// If the strings have exactly the same content, they are equivalent with
/// regard to the ordering.
/// Ordering does not check for Unicode equivalence.
/// The comparison is case sensitive.
/// ```dart
/// var relation = 'Dart'.compareTo('Go');
/// print(relation); // < 0
/// relation = 'Go'.compareTo('Forward');
/// print(relation); // > 0
/// relation = 'Forward'.compareTo('Forward');
/// print(relation); // 0
/// ```
int compareTo(String other);
/// Whether this string ends with [other].
///
/// For example:
/// ```dart
/// const string = 'Dart is open source';
/// print(string.endsWith('urce')); // true
/// ```
bool endsWith(String other);
/// Whether this string starts with a match of [pattern].
///
/// ```dart
/// const string = 'Dart is open source';
/// print(string.startsWith('Dar')); // true
/// print(string.startsWith(RegExp(r'[A-Z][a-z]'))); // true
/// ```
/// If [index] is provided, this method checks if the substring starting
/// at that index starts with a match of [pattern]:
/// ```dart
/// const string = 'Dart';
/// print(string.startsWith('art', 0)); // false
/// print(string.startsWith('art', 1)); // true
/// print(string.startsWith(RegExp(r'\w{3}'), 2)); // false
/// ```
/// [index] must not be negative or greater than [length].
///
/// A [RegExp] containing '^' does not match if the [index] is greater than
/// zero and the regexp is not multi-line.
/// The pattern works on the string as a whole, and does not extract
/// a substring starting at [index] first:
/// ```dart
/// const string = 'Dart';
/// print(string.startsWith(RegExp(r'^art'), 1)); // false
/// print(string.startsWith(RegExp(r'art'), 1)); // true
/// ```
bool startsWith(Pattern pattern, [int index = 0]);
/// Returns the position of the first match of [pattern] in this string,
/// starting at [start], inclusive:
/// ```dart
/// const string = 'Dartisans';
/// print(string.indexOf('art')); // 1
/// print(string.indexOf(RegExp(r'[A-Z][a-z]'))); // 0
/// ```
/// Returns -1 if no match is found:
/// ```dart
/// const string = 'Dartisans';
/// string.indexOf(RegExp(r'dart')); // -1
/// ```
/// The [start] must be non-negative and not greater than [length].
int indexOf(Pattern pattern, [int start = 0]);
/// The starting position of the last match [pattern] in this string.
///
/// Finds a match of pattern by searching backward starting at [start]:
/// ```dart
/// const string = 'Dartisans';
/// print(string.lastIndexOf('a')); // 6
/// print(string.lastIndexOf(RegExp(r'a(r|n)'))); // 6
/// ```
/// Returns -1 if [pattern] could not be found in this string.
/// ```dart
/// const string = 'Dartisans';
/// print(string.lastIndexOf(RegExp(r'DART'))); // -1
/// ```
/// If [start] is omitted, search starts from the end of the string.
/// If supplied, [start] must be non-negative and not greater than [length].
int lastIndexOf(Pattern pattern, [int? start]);
/// Whether this string is empty.
bool get isEmpty;
/// Whether this string is not empty.
bool get isNotEmpty;
/// Creates a new string by concatenating this string with [other].
///
/// Example:
/// ```dart
/// const string = 'dart' + 'lang'; // 'dartlang'
/// ```
String operator +(String other);
/// The substring of this string from [start], inclusive, to [end], exclusive.
///
/// Example:
/// ```dart
/// const string = 'dartlang';
/// var result = string.substring(1); // 'artlang'
/// result = string.substring(1, 4); // 'art'
/// ```
///
/// Both [start] and [end] must be non-negative and no greater than [length];
/// [end], if provided, must be greater than or equal to [start].
String substring(int start, [int? end]);
/// The string without any leading and trailing whitespace.
///
/// If the string contains leading or trailing whitespace, a new string with no
/// leading and no trailing whitespace is returned:
/// ```dart
/// final trimmed = '\tDart is fun\n'.trim();
/// print(trimmed); // 'Dart is fun'
/// ```
/// Otherwise, the original string itself is returned:
/// ```dart
/// const string1 = 'Dart';
/// final string2 = string1.trim(); // 'Dart'
/// print(identical(string1, string2)); // true
/// ```
/// Whitespace is defined by the Unicode White_Space property (as defined in
/// version 6.2 or later) and the BOM character, 0xFEFF.
///
/// Here is the list of trimmed characters according to Unicode version 6.3:
/// ```plaintext
/// 0009..000D ; White_Space # Cc <control-0009>..<control-000D>
/// 0020 ; White_Space # Zs SPACE
/// 0085 ; White_Space # Cc <control-0085>
/// 00A0 ; White_Space # Zs NO-BREAK SPACE
/// 1680 ; White_Space # Zs OGHAM SPACE MARK
/// 2000..200A ; White_Space # Zs EN QUAD..HAIR SPACE
/// 2028 ; White_Space # Zl LINE SEPARATOR
/// 2029 ; White_Space # Zp PARAGRAPH SEPARATOR
/// 202F ; White_Space # Zs NARROW NO-BREAK SPACE
/// 205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE
/// 3000 ; White_Space # Zs IDEOGRAPHIC SPACE
///
/// FEFF ; BOM ZERO WIDTH NO_BREAK SPACE
/// ```
/// Some later versions of Unicode do not include U+0085 as a whitespace
/// character. Whether it is trimmed depends on the Unicode version
/// used by the system.
String trim();
/// The string without any leading whitespace.
///
/// As [trim], but only removes leading whitespace.
/// ```dart
/// final string = ' Dart '.trimLeft();
/// print(string); // 'Dart '
/// ```
String trimLeft();
/// The string without any trailing whitespace.
///
/// As [trim], but only removes trailing whitespace.
/// ```dart
/// final string = ' Dart '.trimRight();
/// print(string); // ' Dart'
/// ```
String trimRight();
/// Creates a new string by concatenating this string with itself a number
/// of times.
///
/// The result of `str * n` is equivalent to
/// `str + str + ...`(n times)`... + str`.
///
/// ```dart
/// const string = 'Dart';
/// final multiplied = string * 3;
/// print(multiplied); // 'DartDartDart'
/// ```
/// Returns an empty string if [times] is zero or negative.
String operator *(int times);
/// Pads this string on the left if it is shorter than [width].
///
/// Returns a new string that prepends [padding] onto this string
/// one time for each position the length is less than [width].
///
/// ```dart
/// const string = 'D';
/// print(string.padLeft(4)); // ' D'
/// print(string.padLeft(2, 'x')); // 'xD'
/// print(string.padLeft(4, 'y')); // 'yyyD'
/// print(string.padLeft(4, '>>')); // '>>>>>>D'
/// ```
///
/// If [width] is already smaller than or equal to `this.length`,
/// no padding is added. A negative `width` is treated as zero.
///
/// If [padding] has length different from 1, the result will not
/// have length `width`. This may be useful for cases where the
/// padding is a longer string representing a single character, like
/// `" "` or `"\u{10002}`".
/// In that case, the user should make sure that `this.length` is
/// the correct measure of the string's length.
String padLeft(int width, [String padding = ' ']);
/// Pads this string on the right if it is shorter than [width].
///
/// Returns a new string that appends [padding] after this string
/// one time for each position the length is less than [width].
///
/// ```dart
/// const string = 'D';
/// print(string.padRight(4)); // 'D '
/// print(string.padRight(2, 'x')); // 'Dx'
/// print(string.padRight(4, 'y')); // 'Dyyy'
/// print(string.padRight(4, '>>')); // 'D>>>>>>'
/// ```
///
/// If [width] is already smaller than or equal to `this.length`,
/// no padding is added. A negative `width` is treated as zero.
///
/// If [padding] has length different from 1, the result will not
/// have length `width`. This may be useful for cases where the
/// padding is a longer string representing a single character, like
/// `" "` or `"\u{10002}`".
/// In that case, the user should make sure that `this.length` is
/// the correct measure of the string's length.
String padRight(int width, [String padding = ' ']);
/// Whether this string contains a match of [other].
///
/// Example:
/// ```dart
/// const string = 'Dart strings';
/// final containsD = string.contains('D'); // true
/// final containsUpperCase = string.contains(RegExp(r'[A-Z]')); // true
/// ```
/// If [startIndex] is provided, this method matches only at or after that
/// index:
/// ```dart
/// const string = 'Dart strings';
/// final containsD = string.contains(RegExp('D'), 0); // true
/// final caseSensitive = string.contains(RegExp(r'[A-Z]'), 1); // false
/// ```
/// The [startIndex] must not be negative or greater than [length].
bool contains(Pattern other, [int startIndex = 0]);
/// Creates a new string with the first occurrence of [from] replaced by [to].
///
/// Finds the first match of [from] in this string, starting from [startIndex],
/// and creates a new string where that match is replaced with the [to] string.
///
/// Example:
/// ```dart
/// '0.0001'.replaceFirst(RegExp(r'0'), ''); // '.0001'
/// '0.0001'.replaceFirst(RegExp(r'0'), '7', 1); // '0.7001'
/// ```
String replaceFirst(Pattern from, String to, [int startIndex = 0]);
/// Replace the first occurrence of [from] in this string.
///
/// ```dart
/// const string = 'Dart is fun';
/// print(string.replaceFirstMapped(
/// 'fun', (m) => 'open source')); // Dart is open source
///
/// print(string.replaceFirstMapped(
/// RegExp(r'\w(\w*)'), (m) => '<${m[0]}-${m[1]}>')); // <Dart-art> is fun
/// ```
///
/// Returns a new string, which is this string
/// except that the first match of [from], starting from [startIndex],
/// is replaced by the result of calling [replace] with the match object.
///
/// The [startIndex] must be non-negative and no greater than [length].
String replaceFirstMapped(Pattern from, String replace(Match match),
[int startIndex = 0]);
/// Replaces all substrings that match [from] with [replace].
///
/// Creates a new string in which the non-overlapping substrings matching
/// [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
/// by the literal string [replace].
/// ```dart
/// 'resume'.replaceAll(RegExp(r'e'), 'é'); // 'résumé'
/// ```
/// Notice that the [replace] string is not interpreted. If the replacement
/// depends on the match (for example, on a [RegExp]'s capture groups), use
/// the [replaceAllMapped] method instead.
String replaceAll(Pattern from, String replace);
/// Replace all substrings that match [from] by a computed string.
///
/// Creates a new string in which the non-overlapping substrings that match
/// [from] (the ones iterated by `from.allMatches(thisString)`) are replaced
/// by the result of calling [replace] on the corresponding [Match] object.
///
/// This can be used to replace matches with new content that depends on the
/// match, unlike [replaceAll] where the replacement string is always the same.
///
/// The [replace] function is called with the [Match] generated
/// by the pattern, and its result is used as replacement.
///
/// The function defined below converts each word in a string to simplified
/// 'pig latin' using [replaceAllMapped]:
/// ```dart
/// String pigLatin(String words) => words.replaceAllMapped(
/// RegExp(r'\b(\w*?)([aeiou]\w*)', caseSensitive: false),
/// (Match m) => "${m[2]}${m[1]}${m[1]!.isEmpty ? 'way' : 'ay'}");
///
/// final result = pigLatin('I have a secret now!');
/// print(result); // 'Iway avehay away ecretsay ownay!'
/// ```
String replaceAllMapped(Pattern from, String Function(Match match) replace);
/// Replaces the substring from [start] to [end] with [replacement].
///
/// Creates a new string equivalent to:
/// ```dart
/// this.substring(0, start) + replacement + this.substring(end)
/// ```
/// Example:
/// ```dart
/// const string = 'Dart is fun';
/// final result = string.replaceRange(8, null, 'open source');
/// print(result); // Dart is open source
/// ```
/// The [start] and [end] indices must specify a valid range of this string.
/// That is `0 <= start <= end <= this.length`.
/// If [end] is `null`, it defaults to [length].
String replaceRange(int start, int? end, String replacement);
/// Splits the string at matches of [pattern] and returns a list of substrings.
///
/// Finds all the matches of `pattern` in this string,
/// as by using [Pattern.allMatches],
/// and returns the list of the substrings between the matches,
/// before the first match, and after the last match.
/// ```dart
/// const string = 'Hello world!';
/// final splitted = string.split(' ');
/// print(splitted); // [Hello, world!];
/// ```
/// If the pattern doesn't match this string at all,
/// the result is always a list containing only the original string.
///
/// If the [pattern] is a [String], then it's always the case that:
/// ```dart
/// string.split(pattern).join(pattern) == string
/// ```
///
/// If the first match is an empty match at the start of the string,
/// the empty substring before it is not included in the result.
/// If the last match is an empty match at the end of the string,
/// the empty substring after it is not included in the result.
/// If a match is empty, and it immediately follows a previous
/// match (it starts at the position where the previous match ended),
/// then the empty substring between the two matches is not
/// included in the result.
/// ```dart
/// const string = 'abba';
/// final re = RegExp(r'b*');
/// // re.allMatches(string) will find four matches:
/// // * empty match before first "a".
/// // * match of "bb"
/// // * empty match after "bb", before second "a"
/// // * empty match after second "a".
/// print(string.split(re)); // [a, a]
/// ```
///
/// A non-empty match at the start or end of the string, or after another
/// match, is not treated specially, and will introduce empty substrings
/// in the result:
/// ```dart
/// const string = 'abbaa';
/// final splitted = string.split('a'); // ['', 'bb', '', '']
/// ```
///
/// If this string is the empty string, the result is an empty list
/// if `pattern` matches the empty string, since the empty string
/// before and after the first-and-last empty match are not included.
/// (It is still a list containing the original empty string `[""]`
/// if the pattern doesn't match).
/// ```dart
/// const string = '';
/// print(string.split('')); // []
/// print(string.split('a')); // []
/// ```
///
/// Splitting with an empty pattern splits the string into single-code unit
/// strings.
/// ```dart
/// const string = 'Pub';
/// print(string.split('')); // [P, u, b]
///
/// // Same as:
/// var codeUnitStrings = [
/// for (final unit in string.codeUnits) String.fromCharCode(unit)
/// ];
/// print(codeUnitStrings); // [P, u, b]
/// ```
///
/// Splitting happens at UTF-16 code unit boundaries,
/// and not at rune (Unicode code point) boundaries:
/// ```dart
/// // String made up of two code units, but one rune.
/// const string = '\u{1D11E}';
/// final splitted = string.split('');
/// print(splitted); // ['\ud834', '\udd1e'] - 2 unpaired surrogate values
/// ```
/// To get a list of strings containing the individual runes of a string,
/// you should not use split.
/// You can instead get a string for each rune as follows:
/// ```dart
/// const string = '\u{1F642}';
/// for (final rune in string.runes) {
/// print(String.fromCharCode(rune));
/// }
/// ```
List<String> split(Pattern pattern);
/// Splits the string, converts its parts, and combines them into a new
/// string.
///
/// The [pattern] is used to split the string
/// into parts and separating matches.
/// Each match of [Pattern.allMatches] of [pattern] on this string is
/// used as a match, and the substrings between the end of one match
/// (or the start of the string) and the start of the next match (or the
/// end of the string) is treated as a non-matched part.
/// (There is no omission of leading or trailing empty matchs, like
/// in [split], all matches and parts between the are included.)
///
/// Each match is converted to a string by calling [onMatch]. If [onMatch]
/// is omitted, the matched substring is used.
///
/// Each non-matched part is converted to a string by a call to [onNonMatch].
/// If [onNonMatch] is omitted, the non-matching substring itself is used.
///
/// Then all the converted parts are concatenated into the resulting string.
/// ```dart
/// final result = 'Eats shoots leaves'.splitMapJoin(RegExp(r'shoots'),
/// onMatch: (m) => '${m[0]}', // (or no onMatch at all)
/// onNonMatch: (n) => '*');
/// print(result); // *shoots*
/// ```
String splitMapJoin(Pattern pattern,
{String Function(Match)? onMatch, String Function(String)? onNonMatch});
/// An unmodifiable list of the UTF-16 code units of this string.
List<int> get codeUnits;
/// An [Iterable] of Unicode code-points of this string.
///
/// If the string contains surrogate pairs, they are combined and returned
/// as one integer by this iterator. Unmatched surrogate halves are treated
/// like valid 16-bit code-units.
Runes get runes;
/// Converts all characters in this string to lower case.
///
/// If the string is already in all lower case, this method returns `this`.
/// ```dart
/// 'ALPHABET'.toLowerCase(); // 'alphabet'
/// 'abc'.toLowerCase(); // 'abc'
/// ```
/// This function uses the language independent Unicode mapping and thus only
/// works in some languages.
// TODO(floitsch): document better. (See EcmaScript for description).
String toLowerCase();
/// Converts all characters in this string to upper case.
///
/// If the string is already in all upper case, this method returns `this`.
/// ```dart
/// 'alphabet'.toUpperCase(); // 'ALPHABET'
/// 'ABC'.toUpperCase(); // 'ABC'
/// ```
/// This function uses the language independent Unicode mapping and thus only
/// works in some languages.
// TODO(floitsch): document better. (See EcmaScript for description).
String toUpperCase();
}
/// The runes (integer Unicode code points) of a [String].
///
/// The characters of a string are encoded in UTF-16. Decoding UTF-16, which
/// combines surrogate pairs, yields Unicode code points. Following a similar
/// terminology to Go, Dart uses the name 'rune' for an integer representing a
/// Unicode code point. Use the `runes` property to get the runes of a string.
///
/// Example:
/// ```dart
/// const string = 'Dart';
/// final runes = string.runes.toList();
/// print(runes); // [68, 97, 114, 116]
/// ```
///
/// For a character outside the Basic Multilingual Plane (plane 0) that is
/// composed of a surrogate pair, runes combines the pair and returns a
/// single integer.
///
/// For example, the Unicode character for "Man" emoji ('👨', `U+1F468`) is
/// combined from the surrogates `U+d83d` and `U+dc68`.
///
/// Example:
/// ```dart
/// const emojiMan = '👨';
/// print(emojiMan.runes); // (128104)
///
/// // Surrogate pairs:
/// for (final item in emojiMan.codeUnits) {
/// print(item.toRadixString(16));
/// // d83d
/// // dc68
/// }
/// ```
///
/// **See also:**
/// * [Runes and grapheme clusters](
/// https://dart.dev/guides/language/language-tour#runes-and-grapheme-clusters)
/// in
/// [A tour of the Dart language](https://dart.dev/guides/language/language-tour).
final class Runes extends Iterable<int> {
/// The string that this is the runes of.
final String string;
/// Creates a [Runes] iterator for [string].
Runes(this.string);
RuneIterator get iterator => RuneIterator(string);
int get last {
if (string.length == 0) {
throw StateError('No elements.');
}
int length = string.length;
int code = string.codeUnitAt(length - 1);
if (_isTrailSurrogate(code) && string.length > 1) {
int previousCode = string.codeUnitAt(length - 2);
if (_isLeadSurrogate(previousCode)) {
return _combineSurrogatePair(previousCode, code);
}
}
return code;
}
}
// Is then code (a 16-bit unsigned integer) a UTF-16 lead surrogate.
bool _isLeadSurrogate(int code) => (code & 0xFC00) == 0xD800;
// Is then code (a 16-bit unsigned integer) a UTF-16 trail surrogate.
bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00;
// Combine a lead and a trail surrogate value into a single code point.
int _combineSurrogatePair(int start, int end) {
return 0x10000 + ((start & 0x3FF) << 10) + (end & 0x3FF);
}
/// [Iterator] for reading runes (integer Unicode code points) of a Dart string.
final class RuneIterator implements Iterator<int> {
/// String being iterated.
final String string;
/// Position before the current code point.
int _position;
/// Position after the current code point.
int _nextPosition;
/// Current code point.
///
/// If the iterator has hit either end, the [_currentCodePoint] is -1
/// and `_position == _nextPosition`.
int _currentCodePoint = -1;
/// Create an iterator positioned at the beginning of the string.
RuneIterator(String string)
: this.string = string,
_position = 0,
_nextPosition = 0;
/// Create an iterator positioned before the [index]th code unit of the string.
///
/// When created, there is no [current] value.
/// A [moveNext] will use the rune starting at [index] the current value,
/// and a [movePrevious] will use the rune ending just before [index] as
/// the current value.
///
/// The [index] position must not be in the middle of a surrogate pair.
RuneIterator.at(String string, int index)
: string = string,
_position = index,
_nextPosition = index {
RangeError.checkValueInInterval(index, 0, string.length);
_checkSplitSurrogate(index);
}
/// Throw an error if the index is in the middle of a surrogate pair.
void _checkSplitSurrogate(int index) {
if (index > 0 &&
index < string.length &&
_isLeadSurrogate(string.codeUnitAt(index - 1)) &&
_isTrailSurrogate(string.codeUnitAt(index))) {
throw ArgumentError('Index inside surrogate pair: $index');
}
}
/// The starting position of the current rune in the string.
///
/// Returns -1 if there is no current rune ([current] is -1).
int get rawIndex => (_position != _nextPosition) ? _position : -1;
/// Resets the iterator to the rune at the specified index of the string.
///
/// Setting a negative [rawIndex], or one greater than or equal to
/// `string.length`, is an error. So is setting it in the middle of a surrogate
/// pair.
///
/// Setting the position to the end of the string means that there is no
/// current rune.
void set rawIndex(int rawIndex) {
IndexError.check(rawIndex, string.length,
indexable: string, name: "rawIndex");
reset(rawIndex);
moveNext();
}
/// Resets the iterator to the given index into the string.
///
/// After this the [current] value is unset.
/// You must call [moveNext] make the rune at the position current,
/// or [movePrevious] for the last rune before the position.
///
/// The [rawIndex] must be non-negative and no greater than `string.length`.
/// It must also not be the index of the trailing surrogate of a surrogate
/// pair.
void reset([int rawIndex = 0]) {
RangeError.checkValueInInterval(rawIndex, 0, string.length, "rawIndex");
_checkSplitSurrogate(rawIndex);
_position = _nextPosition = rawIndex;
_currentCodePoint = -1;
}
/// The rune (integer Unicode code point) starting at the current position in
/// the string.
///
/// The value is -1 if there is no current code point.
int get current => _currentCodePoint;
/// The number of code units comprising the current rune.
///
/// Returns zero if there is no current rune ([current] is -1).
int get currentSize => _nextPosition - _position;
/// A string containing the current rune.
///
/// For runes outside the basic multilingual plane, this will be
/// a String of length 2, containing two code units.
///
/// Returns an empty string if there is no [current] value.
String get currentAsString {
if (_position == _nextPosition) return "";
if (_position + 1 == _nextPosition) return string[_position];
return string.substring(_position, _nextPosition);
}
/// Move to the next code point.
///
/// Returns `true` and updates [current] if there is a next code point.
/// Returns `false` otherwise, and then there is no current code point.
bool moveNext() {
_position = _nextPosition;
if (_position == string.length) {
_currentCodePoint = -1;
return false;
}
int codeUnit = string.codeUnitAt(_position);
int nextPosition = _position + 1;
if (_isLeadSurrogate(codeUnit) && nextPosition < string.length) {
int nextCodeUnit = string.codeUnitAt(nextPosition);
if (_isTrailSurrogate(nextCodeUnit)) {
_nextPosition = nextPosition + 1;
_currentCodePoint = _combineSurrogatePair(codeUnit, nextCodeUnit);
return true;
}
}
_nextPosition = nextPosition;
_currentCodePoint = codeUnit;
return true;
}
/// Move back to the previous code point.
///
/// Returns `true` and updates [current] if there is a previous code point.
/// Returns `false` otherwise, and then there is no current code point.
bool movePrevious() {
_nextPosition = _position;
if (_position == 0) {
_currentCodePoint = -1;
return false;
}
int position = _position - 1;
int codeUnit = string.codeUnitAt(position);
if (_isTrailSurrogate(codeUnit) && position > 0) {
int prevCodeUnit = string.codeUnitAt(position - 1);
if (_isLeadSurrogate(prevCodeUnit)) {
_position = position - 1;
_currentCodePoint = _combineSurrogatePair(prevCodeUnit, codeUnit);
return true;
}
}
_position = position;
_currentCodePoint = codeUnit;
return true;
}
}