-
Notifications
You must be signed in to change notification settings - Fork 4k
/
SourceText.cs
1173 lines (1018 loc) · 45.8 KB
/
SourceText.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.ComponentModel;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using Microsoft.CodeAnalysis.PooledObjects;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.Text
{
/// <summary>
/// An abstraction of source text.
/// </summary>
public abstract class SourceText
{
private const int CharBufferSize = 32 * 1024;
private const int CharBufferCount = 5;
internal const int LargeObjectHeapLimitInChars = 40 * 1024; // 40KB
private static readonly ObjectPool<char[]> s_charArrayPool = new ObjectPool<char[]>(() => new char[CharBufferSize], CharBufferCount);
private readonly SourceHashAlgorithm _checksumAlgorithm;
private SourceTextContainer? _lazyContainer;
private TextLineCollection? _lazyLineInfo;
private ImmutableArray<byte> _lazyChecksum;
private readonly ImmutableArray<byte> _precomputedEmbeddedTextBlob;
private static readonly Encoding s_utf8EncodingWithNoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false);
protected SourceText(ImmutableArray<byte> checksum = default, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, SourceTextContainer? container = null)
{
ValidateChecksumAlgorithm(checksumAlgorithm);
if (!checksum.IsDefault && checksum.Length != CryptographicHashProvider.GetHashSize(checksumAlgorithm))
{
throw new ArgumentException(CodeAnalysisResources.InvalidHash, nameof(checksum));
}
_checksumAlgorithm = checksumAlgorithm;
_lazyChecksum = checksum;
_lazyContainer = container;
}
internal SourceText(ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> embeddedTextBlob)
: this(checksum, checksumAlgorithm, container: null)
{
// We should never have precomputed the embedded text blob without precomputing the checksum.
Debug.Assert(embeddedTextBlob.IsDefault || !checksum.IsDefault);
if (!checksum.IsDefault && embeddedTextBlob.IsDefault)
{
// We can't compute the embedded text blob lazily if we're given a precomputed checksum.
// This happens when source bytes/stream were given, but canBeEmbedded=true was not passed.
_precomputedEmbeddedTextBlob = ImmutableArray<byte>.Empty;
}
else
{
_precomputedEmbeddedTextBlob = embeddedTextBlob;
}
}
internal static void ValidateChecksumAlgorithm(SourceHashAlgorithm checksumAlgorithm)
{
if (!SourceHashAlgorithms.IsSupportedAlgorithm(checksumAlgorithm))
{
throw new ArgumentException(CodeAnalysisResources.UnsupportedHashAlgorithm, nameof(checksumAlgorithm));
}
}
/// <summary>
/// Constructs a <see cref="SourceText"/> from text in a string.
/// </summary>
/// <param name="text">Text.</param>
/// <param name="encoding">
/// Encoding of the file that the <paramref name="text"/> was read from or is going to be saved to.
/// <c>null</c> if the encoding is unspecified.
/// If the encoding is not specified the resulting <see cref="SourceText"/> isn't debuggable.
/// If an encoding-less <see cref="SourceText"/> is written to a file a <see cref="Encoding.UTF8"/> shall be used as a default.
/// </param>
/// <param name="checksumAlgorithm">
/// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
/// </param>
/// <exception cref="ArgumentNullException"><paramref name="text"/> is null.</exception>
/// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception>
public static SourceText From(string text, Encoding? encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
return new StringText(text, encoding, checksumAlgorithm: checksumAlgorithm);
}
/// <summary>
/// Constructs a <see cref="SourceText"/> from text in a string.
/// </summary>
/// <param name="reader">TextReader</param>
/// <param name="length">length of content from <paramref name="reader"/></param>
/// <param name="encoding">
/// Encoding of the file that the <paramref name="reader"/> was read from or is going to be saved to.
/// <c>null</c> if the encoding is unspecified.
/// If the encoding is not specified the resulting <see cref="SourceText"/> isn't debuggable.
/// If an encoding-less <see cref="SourceText"/> is written to a file a <see cref="Encoding.UTF8"/> shall be used as a default.
/// </param>
/// <param name="checksumAlgorithm">
/// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
/// </param>
/// <exception cref="ArgumentNullException"><paramref name="reader"/> is null.</exception>
/// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception>
public static SourceText From(
TextReader reader,
int length,
Encoding? encoding = null,
SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1)
{
if (reader == null)
{
throw new ArgumentNullException(nameof(reader));
}
// If the resulting string would end up on the large object heap, then use LargeEncodedText.
if (length >= LargeObjectHeapLimitInChars)
{
return LargeText.Decode(reader, length, encoding, checksumAlgorithm);
}
string text = reader.ReadToEnd();
return From(text, encoding, checksumAlgorithm);
}
// 1.0 BACKCOMPAT OVERLOAD - DO NOT TOUCH
[EditorBrowsable(EditorBrowsableState.Never)]
public static SourceText From(Stream stream, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected)
=> From(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded: false);
/// <summary>
/// Constructs a <see cref="SourceText"/> from stream content.
/// </summary>
/// <param name="stream">Stream. The stream must be seekable.</param>
/// <param name="encoding">
/// Data encoding to use if the stream doesn't start with Byte Order Mark specifying the encoding.
/// <see cref="Encoding.UTF8"/> if not specified.
/// </param>
/// <param name="checksumAlgorithm">
/// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
/// </param>
/// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL
/// characters, then an <see cref="InvalidDataException"/> is thrown.</param>
/// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param>
/// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception>
/// <exception cref="ArgumentException">
/// <paramref name="stream"/> doesn't support reading or seeking.
/// <paramref name="checksumAlgorithm"/> is not supported.
/// </exception>
/// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
/// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception>
/// <exception cref="IOException">An I/O error occurs.</exception>
/// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks>
public static SourceText From(
Stream stream,
Encoding? encoding = null,
SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1,
bool throwIfBinaryDetected = false,
bool canBeEmbedded = false)
{
if (stream == null)
{
throw new ArgumentNullException(nameof(stream));
}
if (!stream.CanRead)
{
throw new ArgumentException(CodeAnalysisResources.StreamMustSupportReadAndSeek, nameof(stream));
}
ValidateChecksumAlgorithm(checksumAlgorithm);
encoding = encoding ?? s_utf8EncodingWithNoBOM;
if (stream.CanSeek)
{
// If the resulting string would end up on the large object heap, then use LargeEncodedText.
if (encoding.GetMaxCharCountOrThrowIfHuge(stream) >= LargeObjectHeapLimitInChars)
{
return LargeText.Decode(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded);
}
}
string text = Decode(stream, encoding, out encoding);
if (throwIfBinaryDetected && IsBinary(text))
{
throw new InvalidDataException();
}
// We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
// We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
var checksum = CalculateChecksum(stream, checksumAlgorithm);
var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>);
return new StringText(text, encoding, checksum, checksumAlgorithm, embeddedTextBlob);
}
// 1.0 BACKCOMPAT OVERLOAD - DO NOT TOUCH
[EditorBrowsable(EditorBrowsableState.Never)]
public static SourceText From(byte[] buffer, int length, Encoding? encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected)
=> From(buffer, length, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded: false);
/// <summary>
/// Constructs a <see cref="SourceText"/> from a byte array.
/// </summary>
/// <param name="buffer">The encoded source buffer.</param>
/// <param name="length">The number of bytes to read from the buffer.</param>
/// <param name="encoding">
/// Data encoding to use if the encoded buffer doesn't start with Byte Order Mark.
/// <see cref="Encoding.UTF8"/> if not specified.
/// </param>
/// <param name="checksumAlgorithm">
/// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
/// </param>
/// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL
/// characters, then an <see cref="InvalidDataException"/> is thrown.</param>
/// <returns>The decoded text.</returns>
/// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param>
/// <exception cref="ArgumentNullException">The <paramref name="buffer"/> is null.</exception>
/// <exception cref="ArgumentOutOfRangeException">The <paramref name="length"/> is negative or longer than the <paramref name="buffer"/>.</exception>
/// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception>
/// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
/// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception>
public static SourceText From(
byte[] buffer,
int length,
Encoding? encoding = null,
SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1,
bool throwIfBinaryDetected = false,
bool canBeEmbedded = false)
{
if (buffer == null)
{
throw new ArgumentNullException(nameof(buffer));
}
if (length < 0 || length > buffer.Length)
{
throw new ArgumentOutOfRangeException(nameof(length));
}
ValidateChecksumAlgorithm(checksumAlgorithm);
string text = Decode(buffer, length, encoding ?? s_utf8EncodingWithNoBOM, out encoding);
if (throwIfBinaryDetected && IsBinary(text))
{
throw new InvalidDataException();
}
// We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
// We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
var checksum = CalculateChecksum(buffer, 0, length, checksumAlgorithm);
var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(new ArraySegment<byte>(buffer, 0, length)) : default(ImmutableArray<byte>);
return new StringText(text, encoding, checksum, checksumAlgorithm, embeddedTextBlob);
}
/// <summary>
/// Decode text from a stream.
/// </summary>
/// <param name="stream">The stream containing encoded text.</param>
/// <param name="encoding">The encoding to use if an encoding cannot be determined from the byte order mark.</param>
/// <param name="actualEncoding">The actual encoding used.</param>
/// <returns>The decoded text.</returns>
/// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
private static string Decode(Stream stream, Encoding encoding, out Encoding actualEncoding)
{
RoslynDebug.Assert(stream != null);
RoslynDebug.Assert(encoding != null);
const int maxBufferSize = 4096;
int bufferSize = maxBufferSize;
if (stream.CanSeek)
{
stream.Seek(0, SeekOrigin.Begin);
int length = (int)stream.Length;
if (length == 0)
{
actualEncoding = encoding;
return string.Empty;
}
bufferSize = Math.Min(maxBufferSize, length);
}
// Note: We are setting the buffer size to 4KB instead of the default 1KB. That's
// because we can reach this code path for FileStreams and, to avoid FileStream
// buffer allocations for small files, we may intentionally be using a FileStream
// with a very small (1 byte) buffer. Using 4KB here matches the default buffer
// size for FileStream and means we'll still be doing file I/O in 4KB chunks.
using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: bufferSize, leaveOpen: true))
{
string text = reader.ReadToEnd();
actualEncoding = reader.CurrentEncoding;
return text;
}
}
/// <summary>
/// Decode text from a byte array.
/// </summary>
/// <param name="buffer">The byte array containing encoded text.</param>
/// <param name="length">The count of valid bytes in <paramref name="buffer"/>.</param>
/// <param name="encoding">The encoding to use if an encoding cannot be determined from the byte order mark.</param>
/// <param name="actualEncoding">The actual encoding used.</param>
/// <returns>The decoded text.</returns>
/// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
private static string Decode(byte[] buffer, int length, Encoding encoding, out Encoding actualEncoding)
{
RoslynDebug.Assert(buffer != null);
RoslynDebug.Assert(encoding != null);
int preambleLength;
actualEncoding = TryReadByteOrderMark(buffer, length, out preambleLength) ?? encoding;
return actualEncoding.GetString(buffer, preambleLength, length - preambleLength);
}
/// <summary>
/// Check for occurrence of two consecutive NUL (U+0000) characters.
/// This is unlikely to appear in genuine text, so it's a good heuristic
/// to detect binary files.
/// </summary>
/// <remarks>
/// internal for unit testing
/// </remarks>
internal static bool IsBinary(ReadOnlySpan<char> text)
{
#if NETCOREAPP
// On .NET Core, Contains has an optimized vectorized implementation, much faster than a custom loop.
return text.Contains("\0\0", StringComparison.Ordinal);
#else
// PERF: We can advance two chars at a time unless we find a NUL.
for (int i = 1; i < text.Length;)
{
if (text[i] == '\0')
{
if (text[i - 1] == '\0')
{
return true;
}
i += 1;
}
else
{
i += 2;
}
}
return false;
#endif
}
/// <inheritdoc cref="IsBinary(ReadOnlySpan{char})" />
internal static bool IsBinary(string text) => IsBinary(text.AsSpan());
/// <summary>
/// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
/// </summary>
public SourceHashAlgorithm ChecksumAlgorithm => _checksumAlgorithm;
/// <summary>
/// Encoding of the file that the text was read from or is going to be saved to.
/// <c>null</c> if the encoding is unspecified.
/// </summary>
/// <remarks>
/// If the encoding is not specified the source isn't debuggable.
/// If an encoding-less <see cref="SourceText"/> is written to a file a <see cref="Encoding.UTF8"/> shall be used as a default.
/// </remarks>
public abstract Encoding? Encoding { get; }
/// <summary>
/// The length of the text in characters.
/// </summary>
public abstract int Length { get; }
/// <summary>
/// The size of the storage representation of the text (in characters).
/// This can differ from length when storage buffers are reused to represent fragments/subtext.
/// </summary>
internal virtual int StorageSize
{
get { return this.Length; }
}
internal virtual ImmutableArray<SourceText> Segments
{
get { return ImmutableArray<SourceText>.Empty; }
}
internal virtual SourceText StorageKey
{
get { return this; }
}
/// <summary>
/// Indicates whether this source text can be embedded in the PDB.
/// </summary>
/// <remarks>
/// If this text was constructed via <see cref="From(byte[], int, Encoding, SourceHashAlgorithm, bool, bool)"/> or
/// <see cref="From(Stream, Encoding, SourceHashAlgorithm, bool, bool)"/>, then the canBeEmbedded arg must have
/// been true.
///
/// Otherwise, <see cref="Encoding" /> must be non-null.
/// </remarks>
public bool CanBeEmbedded
{
get
{
if (_precomputedEmbeddedTextBlob.IsDefault)
{
// If we didn't precompute the embedded text blob from bytes/stream,
// we can only support embedding if we have an encoding with which
// to encode the text in the PDB.
return Encoding != null;
}
// We use a sentinel empty blob to indicate that embedding has been disallowed.
return !_precomputedEmbeddedTextBlob.IsEmpty;
}
}
/// <summary>
/// If the text was created from a stream or byte[] and canBeEmbedded argument was true,
/// this provides the embedded text blob that was precomputed using the original stream
/// or byte[]. The precomputation was required in that case so that the bytes written to
/// the PDB match the original bytes exactly (and match the checksum of the original
/// bytes).
/// </summary>
internal ImmutableArray<byte> PrecomputedEmbeddedTextBlob => _precomputedEmbeddedTextBlob;
/// <summary>
/// Returns a character at given position.
/// </summary>
/// <param name="position">The position to get the character from.</param>
/// <returns>The character.</returns>
/// <exception cref="ArgumentOutOfRangeException">When position is negative or
/// greater than <see cref="Length"/>.</exception>
public abstract char this[int position] { get; }
/// <summary>
/// Copy a range of characters from this SourceText to a destination array.
/// </summary>
public abstract void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count);
/// <summary>
/// The container of this <see cref="SourceText"/>.
/// </summary>
public virtual SourceTextContainer Container
{
get
{
if (_lazyContainer == null)
{
Interlocked.CompareExchange(ref _lazyContainer, new StaticContainer(this), null);
}
return _lazyContainer;
}
}
internal void CheckSubSpan(TextSpan span)
{
Debug.Assert(0 <= span.Start && span.Start <= span.End);
if (span.End > this.Length)
{
throw new ArgumentOutOfRangeException(nameof(span));
}
}
/// <summary>
/// Gets a <see cref="SourceText"/> that contains the characters in the specified span of this text.
/// </summary>
public virtual SourceText GetSubText(TextSpan span)
{
CheckSubSpan(span);
int spanLength = span.Length;
if (spanLength == 0)
{
return SourceText.From(string.Empty, this.Encoding, this.ChecksumAlgorithm);
}
else if (spanLength == this.Length && span.Start == 0)
{
return this;
}
else
{
return new SubText(this, span);
}
}
/// <summary>
/// Returns a <see cref="SourceText"/> that has the contents of this text including and after the start position.
/// </summary>
public SourceText GetSubText(int start)
{
if (start < 0 || start > this.Length)
{
throw new ArgumentOutOfRangeException(nameof(start));
}
if (start == 0)
{
return this;
}
else
{
return this.GetSubText(new TextSpan(start, this.Length - start));
}
}
/// <summary>
/// Write this <see cref="SourceText"/> to a text writer.
/// </summary>
public void Write(TextWriter textWriter, CancellationToken cancellationToken = default(CancellationToken))
{
this.Write(textWriter, new TextSpan(0, this.Length), cancellationToken);
}
/// <summary>
/// Write a span of text to a text writer.
/// </summary>
public virtual void Write(TextWriter writer, TextSpan span, CancellationToken cancellationToken = default(CancellationToken))
{
CheckSubSpan(span);
var buffer = s_charArrayPool.Allocate();
try
{
int offset = span.Start;
int end = span.End;
while (offset < end)
{
cancellationToken.ThrowIfCancellationRequested();
int count = Math.Min(buffer.Length, end - offset);
this.CopyTo(offset, buffer, 0, count);
writer.Write(buffer, 0, count);
offset += count;
}
}
finally
{
s_charArrayPool.Free(buffer);
}
}
public ImmutableArray<byte> GetChecksum()
{
if (_lazyChecksum.IsDefault)
{
using (var stream = new SourceTextStream(this, useDefaultEncodingIfNull: true))
{
ImmutableInterlocked.InterlockedInitialize(ref _lazyChecksum, CalculateChecksum(stream, _checksumAlgorithm));
}
}
return _lazyChecksum;
}
internal static ImmutableArray<byte> CalculateChecksum(byte[] buffer, int offset, int count, SourceHashAlgorithm algorithmId)
{
using (var algorithm = CryptographicHashProvider.TryGetAlgorithm(algorithmId))
{
RoslynDebug.Assert(algorithm != null);
return ImmutableArray.Create(algorithm.ComputeHash(buffer, offset, count));
}
}
internal static ImmutableArray<byte> CalculateChecksum(Stream stream, SourceHashAlgorithm algorithmId)
{
using (var algorithm = CryptographicHashProvider.TryGetAlgorithm(algorithmId))
{
RoslynDebug.Assert(algorithm != null);
if (stream.CanSeek)
{
stream.Seek(0, SeekOrigin.Begin);
}
return ImmutableArray.Create(algorithm.ComputeHash(stream));
}
}
/// <summary>
/// Provides a string representation of the SourceText.
/// </summary>
public override string ToString()
{
return ToString(new TextSpan(0, this.Length));
}
/// <summary>
/// Gets a string containing the characters in specified span.
/// </summary>
/// <exception cref="ArgumentOutOfRangeException">When given span is outside of the text range.</exception>
public virtual string ToString(TextSpan span)
{
CheckSubSpan(span);
// default implementation constructs text using CopyTo
var builder = PooledStringBuilder.GetInstance();
var buffer = s_charArrayPool.Allocate();
int position = Math.Max(Math.Min(span.Start, this.Length), 0);
int length = Math.Min(span.End, this.Length) - position;
builder.Builder.EnsureCapacity(length);
while (position < this.Length && length > 0)
{
int copyLength = Math.Min(buffer.Length, length);
this.CopyTo(position, buffer, 0, copyLength);
builder.Builder.Append(buffer, 0, copyLength);
length -= copyLength;
position += copyLength;
}
s_charArrayPool.Free(buffer);
return builder.ToStringAndFree();
}
#region Changes
/// <summary>
/// Constructs a new SourceText from this text with the specified changes.
/// </summary>
public virtual SourceText WithChanges(IEnumerable<TextChange> changes)
{
if (changes == null)
{
throw new ArgumentNullException(nameof(changes));
}
if (!changes.Any())
{
return this;
}
var segments = ArrayBuilder<SourceText>.GetInstance();
var changeRanges = ArrayBuilder<TextChangeRange>.GetInstance();
try
{
int position = 0;
foreach (var change in changes)
{
if (change.Span.End > this.Length)
throw new ArgumentException(CodeAnalysisResources.ChangesMustBeWithinBoundsOfSourceText, nameof(changes));
// there can be no overlapping changes
if (change.Span.Start < position)
{
// Handle the case of unordered changes by sorting the input and retrying. This is inefficient, but
// downstream consumers have been known to hit this case in the past and we want to avoid crashes.
// https://github.com/dotnet/roslyn/pull/26339
if (change.Span.End <= changeRanges.Last().Span.Start)
{
changes = (from c in changes
where !c.Span.IsEmpty || c.NewText?.Length > 0
orderby c.Span
select c).ToList();
return WithChanges(changes);
}
throw new ArgumentException(CodeAnalysisResources.ChangesMustNotOverlap, nameof(changes));
}
var newTextLength = change.NewText?.Length ?? 0;
// ignore changes that don't change anything
if (change.Span.Length == 0 && newTextLength == 0)
continue;
// if we've skipped a range, add
if (change.Span.Start > position)
{
var subText = this.GetSubText(new TextSpan(position, change.Span.Start - position));
CompositeText.AddSegments(segments, subText);
}
if (newTextLength > 0)
{
var segment = SourceText.From(change.NewText!, this.Encoding, this.ChecksumAlgorithm);
CompositeText.AddSegments(segments, segment);
}
position = change.Span.End;
changeRanges.Add(new TextChangeRange(change.Span, newTextLength));
}
// no changes actually happened?
if (position == 0 && segments.Count == 0)
{
return this;
}
if (position < this.Length)
{
var subText = this.GetSubText(new TextSpan(position, this.Length - position));
CompositeText.AddSegments(segments, subText);
}
var newText = CompositeText.ToSourceText(segments, this, adjustSegments: true);
if (newText != this)
{
return new ChangedText(this, newText, changeRanges.ToImmutable());
}
else
{
return this;
}
}
finally
{
segments.Free();
changeRanges.Free();
}
}
/// <summary>
/// Constructs a new SourceText from this text with the specified changes.
/// </summary>
/// <remarks>
/// Changes do not have to be in sorted order. However, <see cref="WithChanges(IEnumerable{TextChange})"/> will
/// perform better if they are.
/// </remarks>
/// <exception cref="ArgumentException">If any changes are not in bounds of this <see cref="SourceText"/>.</exception>
/// <exception cref="ArgumentException">If any changes overlap other changes.</exception>
public SourceText WithChanges(params TextChange[] changes)
{
return this.WithChanges((IEnumerable<TextChange>)changes);
}
/// <summary>
/// Returns a new SourceText with the specified span of characters replaced by the new text.
/// </summary>
public SourceText Replace(TextSpan span, string newText)
{
return this.WithChanges(new TextChange(span, newText));
}
/// <summary>
/// Returns a new SourceText with the specified range of characters replaced by the new text.
/// </summary>
public SourceText Replace(int start, int length, string newText)
{
return this.Replace(new TextSpan(start, length), newText);
}
/// <summary>
/// Gets the set of <see cref="TextChangeRange"/> that describe how the text changed
/// between this text an older version. This may be multiple detailed changes
/// or a single change encompassing the entire text.
/// </summary>
public virtual IReadOnlyList<TextChangeRange> GetChangeRanges(SourceText oldText)
{
if (oldText == null)
{
throw new ArgumentNullException(nameof(oldText));
}
if (oldText == this)
{
return TextChangeRange.NoChanges;
}
else
{
return ImmutableArray.Create(new TextChangeRange(new TextSpan(0, oldText.Length), this.Length));
}
}
/// <summary>
/// Gets the set of <see cref="TextChange"/> that describe how the text changed
/// between this text and an older version. This may be multiple detailed changes
/// or a single change encompassing the entire text.
/// </summary>
public virtual IReadOnlyList<TextChange> GetTextChanges(SourceText oldText)
{
int newPosDelta = 0;
var ranges = this.GetChangeRanges(oldText).ToList();
var textChanges = new List<TextChange>(ranges.Count);
foreach (var range in ranges)
{
var newPos = range.Span.Start + newPosDelta;
// determine where in the newText this text exists
string newt;
if (range.NewLength > 0)
{
var span = new TextSpan(newPos, range.NewLength);
newt = this.ToString(span);
}
else
{
newt = string.Empty;
}
textChanges.Add(new TextChange(range.Span, newt));
newPosDelta += range.NewLength - range.Span.Length;
}
return textChanges.ToImmutableArrayOrEmpty();
}
#endregion
#region Lines
/// <summary>
/// The collection of individual text lines.
/// </summary>
public TextLineCollection Lines
{
get
{
var info = _lazyLineInfo;
return info ?? Interlocked.CompareExchange(ref _lazyLineInfo, info = GetLinesCore(), null) ?? info;
}
}
internal bool TryGetLines([NotNullWhen(returnValue: true)] out TextLineCollection? lines)
{
lines = _lazyLineInfo;
return lines != null;
}
/// <summary>
/// Called from <see cref="Lines"/> to initialize the <see cref="TextLineCollection"/>. Thereafter,
/// the collection is cached.
/// </summary>
/// <returns>A new <see cref="TextLineCollection"/> representing the individual text lines.</returns>
protected virtual TextLineCollection GetLinesCore()
{
return new LineInfo(this, ParseLineStarts());
}
internal sealed class LineInfo : TextLineCollection
{
private readonly SourceText _text;
private readonly int[] _lineStarts;
private int _lastLineNumber;
public LineInfo(SourceText text, int[] lineStarts)
{
_text = text;
_lineStarts = lineStarts;
}
public override int Count => _lineStarts.Length;
public override TextLine this[int index]
{
get
{
if (index < 0 || index >= _lineStarts.Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
int start = _lineStarts[index];
if (index == _lineStarts.Length - 1)
{
return TextLine.FromSpan(_text, TextSpan.FromBounds(start, _text.Length));
}
else
{
int end = _lineStarts[index + 1];
return TextLine.FromSpan(_text, TextSpan.FromBounds(start, end));
}
}
}
public override int IndexOf(int position)
{
if (position < 0 || position > _text.Length)
{
throw new ArgumentOutOfRangeException(nameof(position));
}
int lineNumber;
// it is common to ask about position on the same line
// as before or on the next couple lines
var lastLineNumber = _lastLineNumber;
if (position >= _lineStarts[lastLineNumber])
{
var limit = Math.Min(_lineStarts.Length, lastLineNumber + 4);
for (int i = lastLineNumber; i < limit; i++)
{
if (position < _lineStarts[i])
{
lineNumber = i - 1;
_lastLineNumber = lineNumber;
return lineNumber;
}
}
}
// Binary search to find the right line
// if no lines start exactly at position, round to the left
// EoF position will map to the last line.
lineNumber = _lineStarts.BinarySearch(position);
if (lineNumber < 0)
{
lineNumber = (~lineNumber) - 1;
}
_lastLineNumber = lineNumber;
return lineNumber;
}
public override TextLine GetLineFromPosition(int position)
{
return this[IndexOf(position)];
}
}
private void EnumerateChars(Action<int, char[], int> action)
{
var position = 0;
var buffer = s_charArrayPool.Allocate();
var length = this.Length;
while (position < length)
{
var contentLength = Math.Min(length - position, buffer.Length);
this.CopyTo(position, buffer, 0, contentLength);
action(position, buffer, contentLength);
position += contentLength;
}
// once more with zero length to signal the end
action(position, buffer, 0);
s_charArrayPool.Free(buffer);
}
private int[] ParseLineStarts()
{
// Corner case check
if (0 == this.Length)
{
return new[] { 0 };
}
var lineStarts = ArrayBuilder<int>.GetInstance();
lineStarts.Add(0); // there is always the first line
var lastWasCR = false;
// The following loop goes through every character in the text. It is highly
// performance critical, and thus inlines knowledge about common line breaks
// and non-line breaks.
EnumerateChars((int position, char[] buffer, int length) =>
{
var index = 0;
if (lastWasCR)
{
if (length > 0 && buffer[0] == '\n')
{
index++;
}
lineStarts.Add(position + index);
lastWasCR = false;
}
while (index < length)
{
char c = buffer[index];
index++;
// Common case - ASCII & not a line break
// if (c > '\r' && c <= 127)
// if (c >= ('\r'+1) && c <= 127)
const uint bias = '\r' + 1;
if (unchecked(c - bias) <= (127 - bias))
{
continue;
}
// Assumes that the only 2-char line break sequence is CR+LF