diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs index 23e35a6e70c694..06f67c379b388a 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs @@ -54,9 +54,14 @@ internal static partial class JsonConstants public const int RemoveFlagsBitMask = 0x7FFFFFFF; // In the worst case, an ASCII character represented as a single utf-8 byte could expand 6x when escaped. - // For example: '+' becomes '\u0043' + // For example: '+' becomes '\u002B' // Escaping surrogate pairs (represented by 3 or 4 utf-8 bytes) would expand to 12 bytes (which is still <= 6x). // The same factor applies to utf-16 characters. + // This factor also serves as an upper bound for the combined escaping-and-transcoding pipeline. + // A non-ASCII unicode character is either: + // - escaped into an ASCII sequence (e.g. \uXXXX), so 1 UTF-16 char -> at most 6 UTF-8 bytes, or + // - written directly as UTF-8 (e.g. when using a non-default encoder such as UnsafeRelaxedJsonEscaping), + // expanding at most 3x (MaxExpansionFactorWhileTranscoding), which is <= 6. public const int MaxExpansionFactorWhileEscaping = 6; // In the worst case, a single UTF-16 character could be expanded to 3 UTF-8 bytes. diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs index 5ff4064d2b59bd..fcbc5b6f9e0583 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs @@ -91,11 +91,12 @@ private void WriteStringEscape(ReadOnlySpan value) } else { - WriteStringByOptions(value); + // Each input char may transcode to up to 3 bytes. + WriteStringByOptions(value, value.Length * JsonConstants.MaxExpansionFactorWhileTranscoding); } } - private void WriteStringByOptions(ReadOnlySpan value) + private void WriteStringByOptions(ReadOnlySpan value, int maxRequiredBytes) { if (!_options.SkipValidation) { @@ -104,22 +105,21 @@ private void WriteStringByOptions(ReadOnlySpan value) if (_options.Indented) { - WriteStringIndented(value); + WriteStringIndented(value, maxRequiredBytes); } else { - WriteStringMinimized(value); + WriteStringMinimized(value, maxRequiredBytes); } } // TODO: https://github.com/dotnet/runtime/issues/29293 - private void WriteStringMinimized(ReadOnlySpan escapedValue) + private void WriteStringMinimized(ReadOnlySpan escapedValue, int maxRequiredBytes) { - Debug.Assert(escapedValue.Length < (int.MaxValue / JsonConstants.MaxExpansionFactorWhileTranscoding) - 3); + Debug.Assert(maxRequiredBytes >= 0 && maxRequiredBytes < int.MaxValue - 3); - // All ASCII, 2 quotes => escapedValue.Length + 2 - // Optionally, 1 list separator, and up to 3x growth when transcoding - int maxRequired = (escapedValue.Length * JsonConstants.MaxExpansionFactorWhileTranscoding) + 3; + // 2 quotes + optional 1 list separator, plus precomputed max bytes for the payload. + int maxRequired = maxRequiredBytes + 3; if (_memory.Length - BytesPending < maxRequired) { @@ -140,16 +140,14 @@ private void WriteStringMinimized(ReadOnlySpan escapedValue) } // TODO: https://github.com/dotnet/runtime/issues/29293 - private void WriteStringIndented(ReadOnlySpan escapedValue) + private void WriteStringIndented(ReadOnlySpan escapedValue, int maxRequiredBytes) { int indent = Indentation; Debug.Assert(indent <= _indentLength * _options.MaxDepth); + Debug.Assert(maxRequiredBytes >= 0 && maxRequiredBytes < int.MaxValue - indent - 3 - _newLineLength); - Debug.Assert(escapedValue.Length < (int.MaxValue / JsonConstants.MaxExpansionFactorWhileTranscoding) - indent - 3 - _newLineLength); - - // All ASCII, 2 quotes => indent + escapedValue.Length + 2 - // Optionally, 1 list separator, 1-2 bytes for new line, and up to 3x growth when transcoding - int maxRequired = indent + (escapedValue.Length * JsonConstants.MaxExpansionFactorWhileTranscoding) + 3 + _newLineLength; + // indent + 2 quotes + optional 1 list separator + 1-2 bytes for new line, plus precomputed max bytes for the payload. + int maxRequired = indent + maxRequiredBytes + 3 + _newLineLength; if (_memory.Length - BytesPending < maxRequired) { @@ -195,7 +193,11 @@ private void WriteStringEscapeValue(ReadOnlySpan value, int firstEscapeInd JsonWriterHelper.EscapeString(value, escapedValue, firstEscapeIndexVal, _options.Encoder, out int written); - WriteStringByOptions(escapedValue.Slice(0, written)); + // Each original input char expands to at most MaxExpansionFactorWhileEscaping bytes to the output. + // Escaped sequences are all ASCII (1 byte each), so × 6 ≥ transcoded bytes. + int requiredBytes = value.Length * JsonConstants.MaxExpansionFactorWhileEscaping; + + WriteStringByOptions(escapedValue.Slice(0, written), requiredBytes); if (valueArray != null) { diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs index 435ad803c3975a..02fd7b283cc27d 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text.Encodings.Web; +using Microsoft.DotNet.XUnitExtensions; using Newtonsoft.Json; using Xunit; @@ -22,6 +23,41 @@ public static void WriteStringWithRelaxedEscaper() Assert.NotEqual(expected, JsonSerializer.Serialize(inputString)); } + // NOTE: WriteExtremelyLargeStrings test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] + [OuterLoop] + [InlineData(120_000_000)] + public static void WriteExtremelyLargeStrings(int strLength) + { + const char InputCharacter = '\u007F'; + const string EscapedCharacter = "\\u007F"; + + try + { + string value = new string(InputCharacter, strLength); + string json = JsonSerializer.Serialize(value, JsonSerializerOptions.Default); + + int expectedJsonLength = 2 + (strLength * EscapedCharacter.Length); + int middleSegmentStart = 1 + ((strLength / 2) * EscapedCharacter.Length); + int lastSegmentStart = 1 + ((strLength - 1) * EscapedCharacter.Length); + + Assert.Equal(expectedJsonLength, json.Length); + Assert.Equal('"', json[0]); + Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[^1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException($"Insufficient memory to run {nameof(WriteExtremelyLargeStrings)} with length {strLength}."); + } + } + [Fact] public static void WritePrimitives() { diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs index b545496857b71d..b329547b451223 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs @@ -3590,7 +3590,7 @@ public void WritingTooLargeBase64Bytes(JsonWriterOptions options) } } - // NOTE: WritingTooLargeProperty test is constrained to run on Windows and MacOSX because it causes + // NOTE: WritingHugeBase64Bytes test is constrained to run on Windows and MacOSX because it causes // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the // time the memory is accessed which triggers the full memory allocation. @@ -8271,6 +8271,85 @@ public static void WriteValueWithExtremelyLongValue_ThrowsArgumentException() Assert.Throws(() => writer.WriteStringValue(longValue.AsSpan())); } + // NOTE: WriteExtremelyLargeEscapedStringValue_Minimized test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] + [OuterLoop] + public static void WriteExtremelyLargeEscapedStringValue_Minimized() + { + const char InputCharacter = '\u007F'; + const int EscapedCharacterByteLength = 6; + + try + { + char[] value = new char[MaxUnescapedTokenSize]; + value.AsSpan().Fill(InputCharacter); + + int expectedByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + var output = new ArrayBufferWriter(expectedByteLength); + using var writer = new Utf8JsonWriter(output); + writer.WriteStringValue(value.AsSpan()); + writer.Flush(); + + ReadOnlySpan written = output.WrittenSpan; + Assert.Equal(expectedByteLength, written.Length); + Assert.Equal((byte)'"', written[0]); + Assert.Equal((byte)'"', written[^1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException("Out of memory allocating large objects"); + } + } + + // NOTE: WriteExtremelyLargeEscapedStringValue_Indented test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] + [OuterLoop] + public static void WriteExtremelyLargeEscapedStringValue_Indented() + { + const char InputCharacter = '\u007F'; + const int EscapedCharacterByteLength = 6; + const int IndentSize = 127; + const string NewLine = "\n"; + + try + { + char[] value = new char[MaxUnescapedTokenSize]; + value.AsSpan().Fill(InputCharacter); + + int escapedStrByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + int expectedByteLength = 1 + NewLine.Length + IndentSize + escapedStrByteLength + NewLine.Length + 1; + + var options = new JsonWriterOptions { Indented = true, IndentSize = IndentSize, NewLine = NewLine }; + var output = new ArrayBufferWriter(expectedByteLength); + using var writer = new Utf8JsonWriter(output, options); + writer.WriteStartArray(); + writer.WriteStringValue(value.AsSpan()); + writer.WriteEndArray(); + writer.Flush(); + + // Layout: [ \n "escapedStr" \n ] + ReadOnlySpan written = output.WrittenSpan; + Assert.Equal(expectedByteLength, written.Length); + Assert.Equal((byte)'[', written[0]); + Assert.Equal((byte)']', written[^1]); + int stringStart = 1 + NewLine.Length + IndentSize; + Assert.Equal((byte)'"', written[stringStart]); + Assert.Equal((byte)'"', written[stringStart + escapedStrByteLength - 1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException("Out of memory allocating large objects"); + } + } + [Fact] public static void WriteRawValueWithInvalidJson_ValidationDisabled() {