/
DefaultHtmlEncoder.cs
211 lines (170 loc) · 9.45 KB
/
DefaultHtmlEncoder.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
using System.Diagnostics;
using System.Numerics;
using System.Text.Unicode;
namespace System.Text.Encodings.Web
{
internal sealed class DefaultHtmlEncoder : HtmlEncoder
{
internal static readonly DefaultHtmlEncoder BasicLatinSingleton = new DefaultHtmlEncoder(new TextEncoderSettings(UnicodeRanges.BasicLatin));
private readonly OptimizedInboxTextEncoder _innerEncoder;
internal DefaultHtmlEncoder(TextEncoderSettings settings)
{
if (settings is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.settings);
}
_innerEncoder = new OptimizedInboxTextEncoder(EscaperImplementation.Singleton, settings.GetAllowedCodePointsBitmap());
}
public override int MaxOutputCharactersPerInputCharacter => 8; // "" is worst case for single char ("" [10 chars] worst case for arbitrary scalar value)
/*
* These overrides should be copied to all other subclasses that are backed
* by the fast inbox escaping mechanism.
*/
#pragma warning disable CS0618 // some of the adapters are intentionally marked [Obsolete]
private protected override OperationStatus EncodeCore(ReadOnlySpan<char> source, Span<char> destination, out int charsConsumed, out int charsWritten, bool isFinalBlock)
=> _innerEncoder.Encode(source, destination, out charsConsumed, out charsWritten, isFinalBlock);
private protected override OperationStatus EncodeUtf8Core(ReadOnlySpan<byte> utf8Source, Span<byte> utf8Destination, out int bytesConsumed, out int bytesWritten, bool isFinalBlock)
=> _innerEncoder.EncodeUtf8(utf8Source, utf8Destination, out bytesConsumed, out bytesWritten, isFinalBlock);
private protected override int FindFirstCharacterToEncode(ReadOnlySpan<char> text)
=> _innerEncoder.GetIndexOfFirstCharToEncode(text);
public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
=> _innerEncoder.FindFirstCharacterToEncode(text, textLength);
public override int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8Text)
=> _innerEncoder.GetIndexOfFirstByteToEncode(utf8Text);
public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
=> _innerEncoder.TryEncodeUnicodeScalar(unicodeScalar, buffer, bufferLength, out numberOfCharactersWritten);
public override bool WillEncode(int unicodeScalar)
=> !_innerEncoder.IsScalarValueAllowed(new Rune(unicodeScalar));
#pragma warning restore CS0618
/*
* End overrides section.
*/
private sealed class EscaperImplementation : ScalarEscaperBase
{
internal static readonly EscaperImplementation Singleton = new EscaperImplementation();
private EscaperImplementation() { }
internal override int EncodeUtf8(Rune value, Span<byte> destination)
{
if (value.Value == '<')
{
if (!SpanUtility.TryWriteBytes(destination, (byte)'&', (byte)'l', (byte)'t', (byte)';')) { goto OutOfSpace; }
return 4;
}
else if (value.Value == '>')
{
if (!SpanUtility.TryWriteBytes(destination, (byte)'&', (byte)'g', (byte)'t', (byte)';')) { goto OutOfSpace; }
return 4;
}
else if (value.Value == '&')
{
if (!SpanUtility.TryWriteBytes(destination, (byte)'&', (byte)'a', (byte)'m', (byte)'p', (byte)';')) { goto OutOfSpace; }
return 5;
}
else if (value.Value == '\"')
{
if (!SpanUtility.TryWriteBytes(destination, (byte)'&', (byte)'q', (byte)'u', (byte)'o', (byte)'t', (byte)';')) { goto OutOfSpace; }
return 6;
}
else
{
return TryEncodeScalarAsHex(this, (uint)value.Value, destination);
}
OutOfSpace:
return -1;
#pragma warning disable IDE0060 // 'this' taken explicitly to avoid argument shuffling by caller
static int TryEncodeScalarAsHex(object @this, uint scalarValue, Span<byte> destination)
#pragma warning restore IDE0060
{
UnicodeDebug.AssertIsValidScalar(scalarValue);
// See comments in the UTF-16 equivalent method later in this file.
int idxOfSemicolon = (int)((uint)BitOperations.Log2(scalarValue) / 4) + 4;
Debug.Assert(4 <= idxOfSemicolon && idxOfSemicolon <= 9, "Expected '�'..''.");
if (!SpanUtility.IsValidIndex(destination, idxOfSemicolon)) { goto OutOfSpaceInner; }
destination[idxOfSemicolon] = (byte)';';
if (!SpanUtility.TryWriteBytes(destination, (byte)'&', (byte)'#', (byte)'x', (byte)'0'))
{
Debug.Fail("We should've had enough room to write 4 bytes.");
}
destination = destination.Slice(3, idxOfSemicolon - 3);
for (int i = destination.Length - 1; SpanUtility.IsValidIndex(destination, i); i--)
{
char asUpperHex = HexConverter.ToCharUpper((int)scalarValue);
destination[i] = (byte)asUpperHex;
scalarValue >>= 4; // write a nibble - not a byte - at a time
}
return destination.Length + 4;
OutOfSpaceInner:
return -1;
}
}
internal override int EncodeUtf16(Rune value, Span<char> destination)
{
if (value.Value == '<')
{
if (!SpanUtility.TryWriteChars(destination, '&', 'l', 't', ';')) { goto OutOfSpace; }
return 4;
}
else if (value.Value == '>')
{
if (!SpanUtility.TryWriteChars(destination, '&', 'g', 't', ';')) { goto OutOfSpace; }
return 4;
}
else if (value.Value == '&')
{
if (!SpanUtility.TryWriteChars(destination, '&', 'a', 'm', 'p', ';')) { goto OutOfSpace; }
return 5;
}
else if (value.Value == '\"')
{
if (!SpanUtility.TryWriteChars(destination, '&', 'q', 'u', 'o', 't', ';')) { goto OutOfSpace; }
return 6;
}
else
{
return TryEncodeScalarAsHex(this, (uint)value.Value, destination);
}
OutOfSpace:
return -1;
#pragma warning disable IDE0060 // 'this' taken explicitly to avoid argument shuffling by caller
static int TryEncodeScalarAsHex(object @this, uint scalarValue, Span<char> destination)
#pragma warning restore IDE0060
{
UnicodeDebug.AssertIsValidScalar(scalarValue);
// For inputs 0x0000..0x10FFFF, log2 will return 0..20.
// (It counts the number of bits following the highest set bit.)
//
// We divide by 4 to get the number of nibbles (this rounds down),
// then +1 to account for rounding effects. This also accounts for
// that when log2 results in an exact multiple of 4, no rounding has
// taken place, but we need to include a char for the preceding '0x1'.
// Finally, we +4 to account for the "&#x" prefix and the ";" suffix,
// then -1 to get the index of the last legal location we want to write to.
// >> +1 +4 -1 = +4
int idxOfSemicolon = (int)((uint)BitOperations.Log2(scalarValue) / 4) + 4;
Debug.Assert(4 <= idxOfSemicolon && idxOfSemicolon <= 9, "Expected '�'..''.");
if (!SpanUtility.IsValidIndex(destination, idxOfSemicolon)) { goto OutOfSpaceInner; }
destination[idxOfSemicolon] = ';';
// It's more efficient to write 4 chars at a time instead of 1 char.
// The '0' at the end will be overwritten.
if (!SpanUtility.TryWriteChars(destination, '&', '#', 'x', '0'))
{
Debug.Fail("We should've had enough room to write 4 chars.");
}
destination = destination.Slice(3, idxOfSemicolon - 3);
for (int i = destination.Length - 1; SpanUtility.IsValidIndex(destination, i); i--)
{
char asUpperHex = HexConverter.ToCharUpper((int)scalarValue);
destination[i] = asUpperHex;
scalarValue >>= 4; // write a nibble - not a byte - at a time
}
return destination.Length + 4;
OutOfSpaceInner:
return -1;
}
}
}
}
}