This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5k
/
HttpRuleParser.cs
455 lines (386 loc) · 16.9 KB
/
HttpRuleParser.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.Globalization;
using System.Text;
namespace System.Net.Http
{
internal static class HttpRuleParser
{
private static readonly bool[] s_tokenChars = CreateTokenChars();
private const int maxNestedCount = 5;
internal const char CR = (char)13;
internal const char LF = (char)10;
internal const int MaxInt64Digits = 19;
internal const int MaxInt32Digits = 10;
// iso-8859-1, Western European (ISO)
#if uap
internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding("iso-8859-1");
#else
internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding(28591);
#endif
private static bool[] CreateTokenChars()
{
// token = 1*<any CHAR except CTLs or separators>
// CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
var tokenChars = new bool[128]; // All elements default to "false".
for (int i = 33; i < 127; i++) // Skip Space (32) & DEL (127).
{
tokenChars[i] = true;
}
// Remove separators: these are not valid token characters.
tokenChars[(byte)'('] = false;
tokenChars[(byte)')'] = false;
tokenChars[(byte)'<'] = false;
tokenChars[(byte)'>'] = false;
tokenChars[(byte)'@'] = false;
tokenChars[(byte)','] = false;
tokenChars[(byte)';'] = false;
tokenChars[(byte)':'] = false;
tokenChars[(byte)'\\'] = false;
tokenChars[(byte)'"'] = false;
tokenChars[(byte)'/'] = false;
tokenChars[(byte)'['] = false;
tokenChars[(byte)']'] = false;
tokenChars[(byte)'?'] = false;
tokenChars[(byte)'='] = false;
tokenChars[(byte)'{'] = false;
tokenChars[(byte)'}'] = false;
return tokenChars;
}
internal static bool IsTokenChar(char character)
{
// Must be between 'space' (32) and 'DEL' (127).
if (character > 127)
{
return false;
}
return s_tokenChars[character];
}
[Pure]
internal static int GetTokenLength(string input, int startIndex)
{
Debug.Assert(input != null);
Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex)));
if (startIndex >= input.Length)
{
return 0;
}
int current = startIndex;
while (current < input.Length)
{
if (!IsTokenChar(input[current]))
{
return current - startIndex;
}
current++;
}
return input.Length - startIndex;
}
[Pure]
internal static bool IsToken(string input)
{
for (int i = 0; i < input.Length; i++)
{
if (!IsTokenChar(input[i]))
{
return false;
}
}
return true;
}
[Pure]
internal static bool IsToken(ReadOnlySpan<byte> input)
{
for (int i = 0; i < input.Length; i++)
{
if (!IsTokenChar((char) input[i]))
{
return false;
}
}
return true;
}
internal static string GetTokenString(ReadOnlySpan<byte> input)
{
Debug.Assert(IsToken(input));
return Encoding.ASCII.GetString(input);
}
internal static int GetWhitespaceLength(string input, int startIndex)
{
Debug.Assert(input != null);
Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex)));
if (startIndex >= input.Length)
{
return 0;
}
int current = startIndex;
char c;
while (current < input.Length)
{
c = input[current];
if ((c == ' ') || (c == '\t'))
{
current++;
continue;
}
if (c == '\r')
{
// If we have a #13 char, it must be followed by #10 and then at least one SP or HT.
if ((current + 2 < input.Length) && (input[current + 1] == '\n'))
{
char spaceOrTab = input[current + 2];
if ((spaceOrTab == ' ') || (spaceOrTab == '\t'))
{
current += 3;
continue;
}
}
}
return current - startIndex;
}
// All characters between startIndex and the end of the string are LWS characters.
return input.Length - startIndex;
}
internal static bool ContainsInvalidNewLine(string value)
{
return ContainsInvalidNewLine(value, 0);
}
internal static bool ContainsInvalidNewLine(string value, int startIndex)
{
// Search for newlines followed by non-whitespace: This is not allowed in any header (be it a known or
// custom header). E.g. "value\r\nbadformat: header" is invalid. However "value\r\n goodformat: header"
// is valid: newlines followed by whitespace are allowed in header values.
int current = startIndex;
while (current < value.Length)
{
if (value[current] == '\r')
{
int char10Index = current + 1;
if ((char10Index < value.Length) && (value[char10Index] == '\n'))
{
current = char10Index + 1;
if (current == value.Length)
{
return true; // We have a string terminating with \r\n. This is invalid.
}
char c = value[current];
if ((c != ' ') && (c != '\t'))
{
return true;
}
}
}
current++;
}
return false;
}
internal static int GetNumberLength(string input, int startIndex, bool allowDecimal)
{
Debug.Assert(input != null);
Debug.Assert((startIndex >= 0) && (startIndex < input.Length));
Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex)));
int current = startIndex;
char c;
// If decimal values are not allowed, we pretend to have read the '.' character already. I.e. if a dot is
// found in the string, parsing will be aborted.
bool haveDot = !allowDecimal;
// The RFC doesn't allow decimal values starting with dot. I.e. value ".123" is invalid. It must be in the
// form "0.123". Also, there are no negative values defined in the RFC. So we'll just parse non-negative
// values.
// The RFC only allows decimal dots not ',' characters as decimal separators. Therefore value "1,23" is
// considered invalid and must be represented as "1.23".
if (input[current] == '.')
{
return 0;
}
while (current < input.Length)
{
c = input[current];
if ((c >= '0') && (c <= '9'))
{
current++;
}
else if (!haveDot && (c == '.'))
{
// Note that value "1." is valid.
haveDot = true;
current++;
}
else
{
break;
}
}
return current - startIndex;
}
internal static int GetHostLength(string input, int startIndex, bool allowToken, out string host)
{
Debug.Assert(input != null);
Debug.Assert(startIndex >= 0);
Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex)));
host = null;
if (startIndex >= input.Length)
{
return 0;
}
// A 'host' is either a token (if 'allowToken' == true) or a valid host name as defined by the URI RFC.
// So we first iterate through the string and search for path delimiters and whitespace. When found, stop
// and try to use the substring as token or URI host name. If it works, we have a host name, otherwise not.
int current = startIndex;
bool isToken = true;
while (current < input.Length)
{
char c = input[current];
if (c == '/')
{
return 0; // Host header must not contain paths.
}
if ((c == ' ') || (c == '\t') || (c == '\r') || (c == ','))
{
break; // We hit a delimiter (',' or whitespace). Stop here.
}
isToken = isToken && IsTokenChar(c);
current++;
}
int length = current - startIndex;
if (length == 0)
{
return 0;
}
string result = input.Substring(startIndex, length);
if ((!allowToken || !isToken) && !IsValidHostName(result))
{
return 0;
}
host = result;
return length;
}
internal static HttpParseResult GetCommentLength(string input, int startIndex, out int length)
{
int nestedCount = 0;
return GetExpressionLength(input, startIndex, '(', ')', true, ref nestedCount, out length);
}
internal static HttpParseResult GetQuotedStringLength(string input, int startIndex, out int length)
{
int nestedCount = 0;
return GetExpressionLength(input, startIndex, '"', '"', false, ref nestedCount, out length);
}
// quoted-pair = "\" CHAR
// CHAR = <any US-ASCII character (octets 0 - 127)>
internal static HttpParseResult GetQuotedPairLength(string input, int startIndex, out int length)
{
Debug.Assert(input != null);
Debug.Assert((startIndex >= 0) && (startIndex < input.Length));
Contract.Ensures((Contract.ValueAtReturn(out length) >= 0) &&
(Contract.ValueAtReturn(out length) <= (input.Length - startIndex)));
length = 0;
if (input[startIndex] != '\\')
{
return HttpParseResult.NotParsed;
}
// Quoted-char has 2 characters. Check whether there are 2 chars left ('\' + char)
// If so, check whether the character is in the range 0-127. If not, it's an invalid value.
if ((startIndex + 2 > input.Length) || (input[startIndex + 1] > 127))
{
return HttpParseResult.InvalidFormat;
}
// It doesn't matter what the char next to '\' is so we can skip along.
length = 2;
return HttpParseResult.Parsed;
}
// TEXT = <any OCTET except CTLs, but including LWS>
// LWS = [CRLF] 1*( SP | HT )
// CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
//
// Since we don't really care about the content of a quoted string or comment, we're more tolerant and
// allow these characters. We only want to find the delimiters ('"' for quoted string and '(', ')' for comment).
//
// 'nestedCount': Comments can be nested. We allow a depth of up to 5 nested comments, i.e. something like
// "(((((comment)))))". If we wouldn't define a limit an attacker could send a comment with hundreds of nested
// comments, resulting in a stack overflow exception. In addition having more than 1 nested comment (if any)
// is unusual.
private static HttpParseResult GetExpressionLength(string input, int startIndex, char openChar,
char closeChar, bool supportsNesting, ref int nestedCount, out int length)
{
Debug.Assert(input != null);
Debug.Assert((startIndex >= 0) && (startIndex < input.Length));
Contract.Ensures((Contract.Result<HttpParseResult>() != HttpParseResult.Parsed) ||
(Contract.ValueAtReturn<int>(out length) > 0));
length = 0;
if (input[startIndex] != openChar)
{
return HttpParseResult.NotParsed;
}
int current = startIndex + 1; // Start parsing with the character next to the first open-char.
while (current < input.Length)
{
// Only check whether we have a quoted char, if we have at least 3 characters left to read (i.e.
// quoted char + closing char). Otherwise the closing char may be considered part of the quoted char.
int quotedPairLength = 0;
if ((current + 2 < input.Length) &&
(GetQuotedPairLength(input, current, out quotedPairLength) == HttpParseResult.Parsed))
{
// We ignore invalid quoted-pairs. Invalid quoted-pairs may mean that it looked like a quoted pair,
// but we actually have a quoted-string: e.g. "\ü" ('\' followed by a char >127 - quoted-pair only
// allows ASCII chars after '\'; qdtext allows both '\' and >127 chars).
current = current + quotedPairLength;
continue;
}
// If we support nested expressions and we find an open-char, then parse the nested expressions.
if (supportsNesting && (input[current] == openChar))
{
nestedCount++;
try
{
// Check if we exceeded the number of nested calls.
if (nestedCount > maxNestedCount)
{
return HttpParseResult.InvalidFormat;
}
int nestedLength = 0;
HttpParseResult nestedResult = GetExpressionLength(input, current, openChar, closeChar,
supportsNesting, ref nestedCount, out nestedLength);
switch (nestedResult)
{
case HttpParseResult.Parsed:
current += nestedLength; // Add the length of the nested expression and continue.
break;
case HttpParseResult.NotParsed:
Debug.Assert(false, "'NotParsed' is unexpected: We started nested expression " +
"parsing, because we found the open-char. So either it's a valid nested " +
"expression or it has invalid format.");
break;
case HttpParseResult.InvalidFormat:
// If the nested expression is invalid, we can't continue, so we fail with invalid format.
return HttpParseResult.InvalidFormat;
default:
Debug.Assert(false, "Unknown enum result: " + nestedResult);
break;
}
}
finally
{
nestedCount--;
}
}
if (input[current] == closeChar)
{
length = current - startIndex + 1;
return HttpParseResult.Parsed;
}
current++;
}
// We didn't find the final quote, therefore we have an invalid expression string.
return HttpParseResult.InvalidFormat;
}
private static bool IsValidHostName(string host)
{
// Also add user info (u@) to make sure 'host' doesn't include user info.
Uri hostUri;
return Uri.TryCreate("http://u@" + host + "/", UriKind.Absolute, out hostUri);
}
}
}