Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit efdbbe6

Browse files
authored
Utf8Parsing - the remaining 'N' format overloads (#26683)
* Utf8Parsing - the remaining 'N' format overloads Fixes https://github.com/dotnet/corefx/issues/24986 This is a piece of the Utf8Parser that was punted for time last year. There are 8 overloads for each of the 8 integer types. Int32 was done in a previous PR - this completes the set. The "N" format prints out integers like this: `"N2" => 12,345.00` This parser mimics the behavior of `int.TryParse(v, NumberStyles.Integer | AllowThousands | AllowDecimalPoint)` The thing that may look strange is that the parser allows commas anywhere, not just on the 10^3 digits. This mimics the desktop api behavior. Comma placement is culture-dependent and this is an api that is culture-agnostic. The test data was confirmed on a control implementation that calls the classic int.TryParse(). * Added comment * Fix comment typo
1 parent 8ae3df0 commit efdbbe6

File tree

5 files changed

+586
-44
lines changed

5 files changed

+586
-44
lines changed

src/System.Memory/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.N.cs

Lines changed: 267 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,186 @@ public static partial class Utf8Parser
88
{
99
private static bool TryParseSByteN(ReadOnlySpan<byte> text, out sbyte value, out int bytesConsumed)
1010
{
11-
throw NotImplemented.ActiveIssue("https://github.com/dotnet/corefx/issues/24986");
11+
if (text.Length < 1)
12+
goto FalseExit;
13+
14+
int sign = 1;
15+
int index = 0;
16+
int c = text[index];
17+
if (c == '-')
18+
{
19+
sign = -1;
20+
index++;
21+
if ((uint)index >= (uint)text.Length)
22+
goto FalseExit;
23+
c = text[index];
24+
}
25+
else if (c == '+')
26+
{
27+
index++;
28+
if ((uint)index >= (uint)text.Length)
29+
goto FalseExit;
30+
c = text[index];
31+
}
32+
33+
int answer;
34+
35+
// Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines
36+
// (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least
37+
// one digit after the decimal point.)
38+
if (c == Utf8Constants.Period)
39+
goto FractionalPartWithoutLeadingDigits;
40+
if (!ParserHelpers.IsDigit(c))
41+
goto FalseExit;
42+
answer = c - '0';
43+
44+
for (; ; )
45+
{
46+
index++;
47+
if ((uint)index >= (uint)text.Length)
48+
goto Done;
49+
50+
c = text[index];
51+
if (c == Utf8Constants.Comma)
52+
continue;
53+
54+
if (c == Utf8Constants.Period)
55+
goto FractionalDigits;
56+
57+
if (!ParserHelpers.IsDigit(c))
58+
goto Done;
59+
60+
answer = answer * 10 + c - '0';
61+
62+
// if sign < 0, (-1 * sign + 1) / 2 = 1
63+
// else, (-1 * sign + 1) / 2 = 0
64+
if (answer > sbyte.MaxValue + (-1 * sign + 1) / 2)
65+
goto FalseExit; // Overflow
66+
}
67+
68+
FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point.
69+
answer = 0;
70+
index++;
71+
if ((uint)index >= (uint)text.Length)
72+
goto FalseExit;
73+
if (text[index] != '0')
74+
goto FalseExit;
75+
76+
FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0.
77+
do
78+
{
79+
index++;
80+
if ((uint)index >= (uint)text.Length)
81+
goto Done;
82+
c = text[index];
83+
}
84+
while (c == '0');
85+
86+
if (ParserHelpers.IsDigit(c))
87+
goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination.
88+
goto Done;
89+
90+
FalseExit:
91+
bytesConsumed = default;
92+
value = default;
93+
return false;
94+
95+
Done:
96+
bytesConsumed = index;
97+
value = (sbyte)(answer * sign);
98+
return true;
1299
}
13100

14101
private static bool TryParseInt16N(ReadOnlySpan<byte> text, out short value, out int bytesConsumed)
15102
{
16-
throw NotImplemented.ActiveIssue("https://github.com/dotnet/corefx/issues/24986");
103+
if (text.Length < 1)
104+
goto FalseExit;
105+
106+
int sign = 1;
107+
int index = 0;
108+
int c = text[index];
109+
if (c == '-')
110+
{
111+
sign = -1;
112+
index++;
113+
if ((uint)index >= (uint)text.Length)
114+
goto FalseExit;
115+
c = text[index];
116+
}
117+
else if (c == '+')
118+
{
119+
index++;
120+
if ((uint)index >= (uint)text.Length)
121+
goto FalseExit;
122+
c = text[index];
123+
}
124+
125+
int answer;
126+
127+
// Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines
128+
// (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least
129+
// one digit after the decimal point.)
130+
if (c == Utf8Constants.Period)
131+
goto FractionalPartWithoutLeadingDigits;
132+
if (!ParserHelpers.IsDigit(c))
133+
goto FalseExit;
134+
answer = c - '0';
135+
136+
for (; ; )
137+
{
138+
index++;
139+
if ((uint)index >= (uint)text.Length)
140+
goto Done;
141+
142+
c = text[index];
143+
if (c == Utf8Constants.Comma)
144+
continue;
145+
146+
if (c == Utf8Constants.Period)
147+
goto FractionalDigits;
148+
149+
if (!ParserHelpers.IsDigit(c))
150+
goto Done;
151+
152+
answer = answer * 10 + c - '0';
153+
154+
// if sign < 0, (-1 * sign + 1) / 2 = 1
155+
// else, (-1 * sign + 1) / 2 = 0
156+
if (answer > short.MaxValue + (-1 * sign + 1) / 2)
157+
goto FalseExit; // Overflow
158+
}
159+
160+
FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point.
161+
answer = 0;
162+
index++;
163+
if ((uint)index >= (uint)text.Length)
164+
goto FalseExit;
165+
if (text[index] != '0')
166+
goto FalseExit;
167+
168+
FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0.
169+
do
170+
{
171+
index++;
172+
if ((uint)index >= (uint)text.Length)
173+
goto Done;
174+
c = text[index];
175+
}
176+
while (c == '0');
177+
178+
if (ParserHelpers.IsDigit(c))
179+
goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination.
180+
goto Done;
181+
182+
FalseExit:
183+
bytesConsumed = default;
184+
value = default;
185+
return false;
186+
187+
Done:
188+
bytesConsumed = index;
189+
value = (short)(answer * sign);
190+
return true;
17191
}
18192

19193
private static bool TryParseInt32N(ReadOnlySpan<byte> text, out int value, out int bytesConsumed)
@@ -113,7 +287,97 @@ private static bool TryParseInt32N(ReadOnlySpan<byte> text, out int value, out i
113287

114288
private static bool TryParseInt64N(ReadOnlySpan<byte> text, out long value, out int bytesConsumed)
115289
{
116-
throw NotImplemented.ActiveIssue("https://github.com/dotnet/corefx/issues/24986");
290+
if (text.Length < 1)
291+
goto FalseExit;
292+
293+
int sign = 1;
294+
int index = 0;
295+
int c = text[index];
296+
if (c == '-')
297+
{
298+
sign = -1;
299+
index++;
300+
if ((uint)index >= (uint)text.Length)
301+
goto FalseExit;
302+
c = text[index];
303+
}
304+
else if (c == '+')
305+
{
306+
index++;
307+
if ((uint)index >= (uint)text.Length)
308+
goto FalseExit;
309+
c = text[index];
310+
}
311+
312+
long answer;
313+
314+
// Handle the first digit (or period) as a special case. This ensures some compatible edge-case behavior with the classic parse routines
315+
// (at least one digit must precede any commas, and a string without any digits prior to the decimal point must have at least
316+
// one digit after the decimal point.)
317+
if (c == Utf8Constants.Period)
318+
goto FractionalPartWithoutLeadingDigits;
319+
if (!ParserHelpers.IsDigit(c))
320+
goto FalseExit;
321+
answer = c - '0';
322+
323+
for (; ; )
324+
{
325+
index++;
326+
if ((uint)index >= (uint)text.Length)
327+
goto Done;
328+
329+
c = text[index];
330+
if (c == Utf8Constants.Comma)
331+
continue;
332+
333+
if (c == Utf8Constants.Period)
334+
goto FractionalDigits;
335+
336+
if (!ParserHelpers.IsDigit(c))
337+
goto Done;
338+
339+
if (((ulong)answer) > long.MaxValue / 10)
340+
goto FalseExit;
341+
342+
answer = answer * 10 + c - '0';
343+
344+
// if sign < 0, (-1 * sign + 1) / 2 = 1
345+
// else, (-1 * sign + 1) / 2 = 0
346+
if ((ulong)answer > (ulong)(long.MaxValue + (-1 * sign + 1) / 2))
347+
goto FalseExit; // Overflow
348+
}
349+
350+
FractionalPartWithoutLeadingDigits: // If we got here, we found a decimal point before we found any digits. This is legal as long as there's at least one zero after the decimal point.
351+
answer = 0;
352+
index++;
353+
if ((uint)index >= (uint)text.Length)
354+
goto FalseExit;
355+
if (text[index] != '0')
356+
goto FalseExit;
357+
358+
FractionalDigits: // "N" format allows a fractional portion despite being an integer format but only if the post-fraction digits are all 0.
359+
do
360+
{
361+
index++;
362+
if ((uint)index >= (uint)text.Length)
363+
goto Done;
364+
c = text[index];
365+
}
366+
while (c == '0');
367+
368+
if (ParserHelpers.IsDigit(c))
369+
goto FalseExit; // The fractional portion contained a non-zero digit. Treat this as an error, not an early termination.
370+
goto Done;
371+
372+
FalseExit:
373+
bytesConsumed = default;
374+
value = default;
375+
return false;
376+
377+
Done:
378+
bytesConsumed = index;
379+
value = answer * sign;
380+
return true;
117381
}
118382
}
119383
}

0 commit comments

Comments
 (0)