-
Notifications
You must be signed in to change notification settings - Fork 10.4k
/
SwiftDtoa.cpp
2769 lines (2562 loc) · 117 KB
/
SwiftDtoa.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===--- SwiftDtoa.cpp ---------------------------------------------*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2018-2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===---------------------------------------------------------------------===//
//
// Note: This source file is used in different projects where it gets
// compiled variously as ".c" or ".cpp". Please keep the code clean
// portable C so others can share your improvements.
//
/// For binary16, this uses a simple approach that is normally
/// implemented with variable-length arithmetic. However, due to
/// the limited range of binary16, this can be implemented simply
/// with only 32-bit integer arithmetic.
///
/// For other formats, SwiftDtoa uses a modified form of the Grisu2
/// algorithm from Florian Loitsch; "Printing Floating-Point Numbers
/// Quickly and Accurately with Integers", 2010.
/// https://doi.org/10.1145/1806596.1806623
///
/// Some of the Grisu2 modifications were suggested by the "Errol
/// paper": Marc Andrysco, Ranjit Jhala, Sorin Lerner; "Printing
/// Floating-Point Numbers: A Faster, Always Correct Method", 2016.
/// https://doi.org/10.1145/2837614.2837654
/// In particular, the Errol paper explored the impact of higher-precision
/// fixed-width arithmetic on Grisu2 and showed a way to rapidly test
/// the correctness of such algorithms.
///
/// A few further improvements were inspired by the Ryu algorithm
/// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018.
/// https://doi.org/10.1145/3296979.3192369
///
/// In summary, this implementation is:
///
/// * Fast. It uses only fixed-width integer arithmetic and has
/// constant memory requirements. For double-precision values on
/// 64-bit processors, it is competitive with Ryu. For double-precision
/// values on 32-bit processors, and higher-precision values on all
/// processors, it is considerably faster.
///
/// * Always Accurate. Converting the decimal form back to binary
/// will always yield exactly the same value. For the IEEE 754
/// formats, the round-trip will produce exactly the same bit
/// pattern in memory.
///
/// * Always Short. This always selects an accurate result with the
/// minimum number of significant digits.
///
/// * Always Close. Among all accurate, short results, this always
/// chooses the result that is closest to the exact floating-point
/// value. (In case of an exact tie, it rounds the last digit even.)
///
/// * Portable. The code is written in portable C99. The core
/// implementations utilize only fixed-size integer arithmetic.
/// 128-bit integer support is utilized if present but is not
/// necessary. There are thin wrappers that accept platform-native
/// floating point types and delegate to the portable core
/// functions.
///
// ----------------------------------------------------------------------------
#include <inttypes.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "swift/Runtime/SwiftDtoa.h"
#if defined(__SIZEOF_INT128__)
// We get a significant speed boost if we can use the __uint128_t
// type that's present in GCC and Clang on 64-bit architectures. In
// particular, we can do 128-bit arithmetic directly and can
// represent 256-bit integers as collections of 64-bit elements.
#define HAVE_UINT128_T 1
#else
// On 32-bit, we use slower code that manipulates 128-bit
// and 256-bit integers as collections of 32-bit elements.
#define HAVE_UINT128_T 0
#endif
//
// Predefine various arithmetic helpers. Most implementations and extensive
// comments are at the bottom of this file.
//
#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT
// The power-of-10 tables do not directly store the associated binary
// exponent. That's because the binary exponent is a simple linear
// function of the decimal power (and vice versa), so it's just as
// fast (and uses much less memory) to compute it:
// The binary exponent corresponding to a particular power of 10.
// This matches the power-of-10 tables across the full range of binary128.
#define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1))
// A decimal exponent that approximates a particular binary power.
#define decimalExponentFor2ToThe(e) ((int)(((int64_t)e * 20201781) >> 26))
#endif
//
// Helper functions used only by the single-precision binary32 formatter
//
#if SWIFT_DTOA_BINARY32_SUPPORT
static uint64_t multiply64x32RoundingDown(uint64_t lhs, uint32_t rhs) {
static const uint64_t mask32 = UINT32_MAX;
uint64_t t = ((lhs & mask32) * rhs) >> 32;
return t + (lhs >> 32) * rhs;
}
static uint64_t multiply64x32RoundingUp(uint64_t lhs, uint32_t rhs) {
static const uint64_t mask32 = UINT32_MAX;
uint64_t t = (((lhs & mask32) * rhs) + mask32) >> 32;
return t + (lhs >> 32) * rhs;
}
static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent);
#endif
//
// Helpers used by binary32, binary64, float80, and binary128.
//
#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT
#if HAVE_UINT128_T
typedef __uint128_t swift_uint128_t;
#define initialize128WithHighLow64(dest, high64, low64) ((dest) = ((__uint128_t)(high64) << 64) | (low64))
#define shiftLeft128(u128, shift) (*(u128) <<= shift)
#else
typedef struct {
uint32_t low, b, c, high;
} swift_uint128_t;
#define initialize128WithHighLow64(dest, high64, low64) \
((dest).low = (uint32_t)(low64), \
(dest).b = (uint32_t)((low64) >> 32), \
(dest).c = (uint32_t)(high64), \
(dest).high = (uint32_t)((high64) >> 32))
static void shiftLeft128(swift_uint128_t *, int shift);
#endif
inline static int finishFormatting(char *, size_t, char *, char *, int, int);
#endif
//
// Helper functions needed by the binary64 formatter.
//
#if SWIFT_DTOA_BINARY64_SUPPORT
#if HAVE_UINT128_T
#define isLessThan128x128(lhs, rhs) ((lhs) < (rhs))
#define subtract128x128(lhs, rhs) (*(lhs) -= (rhs))
#define multiply128xu32(lhs, rhs) (*(lhs) *= (rhs))
#define initialize128WithHigh64(dest, value) ((dest) = (__uint128_t)(value) << 64)
#define extractHigh64From128(arg) ((uint64_t)((arg) >> 64))
#define is128bitZero(arg) ((arg) == 0)
static int extractIntegerPart128(__uint128_t *fixed128, int integerBits) {
const int fractionBits = 128 - integerBits;
int integerPart = (int)(*fixed128 >> fractionBits);
const swift_uint128_t fixedPointMask = (((__uint128_t)1 << fractionBits) - 1);
*fixed128 &= fixedPointMask;
return integerPart;
}
#define shiftRightRoundingDown128(val, shift) ((val) >> (shift))
#define shiftRightRoundingUp128(val, shift) (((val) + (((uint64_t)1 << (shift)) - 1)) >> (shift))
#else
static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs);
static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs);
static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs);
#define initialize128WithHigh64(dest, value) \
((dest).low = (dest).b = 0, \
(dest).c = (uint32_t)(value), \
(dest).high = (uint32_t)((value) >> 32))
#define extractHigh64From128(arg) (((uint64_t)(arg).high << 32)|((arg).c))
#define is128bitZero(dest) \
(((dest).low | (dest).b | (dest).c | (dest).high) == 0)
// Treat a uint128_t as a fixed-point value with `integerBits` bits in
// the integer portion. Return the integer portion and zero it out.
static int extractIntegerPart128(swift_uint128_t *fixed128, int integerBits) {
const int highFractionBits = 32 - integerBits;
int integerPart = (int)(fixed128->high >> highFractionBits);
fixed128->high &= ((uint32_t)1 << highFractionBits) - 1;
return integerPart;
}
static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift);
static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift);
#endif
static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs);
static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs);
static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent);
#endif
//
// Helper functions used by the 256-bit backend needed for
// float80 and binary128
//
#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT
#if HAVE_UINT128_T
// A 256-bit unsigned integer type stored as 3 64-bit words
typedef struct {uint64_t low, midlow, midhigh, high;} swift_uint256_t;
#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \
((dest).low = (low64), \
(dest).midlow = (midlow64), \
(dest).midhigh = (midhigh64), \
(dest).high = (high64))
#define is256bitZero(dest) \
(((dest).low | (dest).midlow | (dest).midhigh | (dest).high) == 0)
static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) {
int integerPart = (int)(fixed256->high >> (64 - integerBits));
const uint64_t fixedPointMask = (((uint64_t)1 << (64 - integerBits)) - 1);
fixed256->high &= fixedPointMask;
return integerPart;
}
#else
// A 256-bit unsigned integer type stored as 8 32-bit words
typedef struct { uint32_t elt[8]; } swift_uint256_t; // [0]=low, [7]=high
#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \
((dest).elt[0] = (uint64_t)(low64), \
(dest).elt[1] = (uint64_t)(low64) >> 32, \
(dest).elt[2] = (uint64_t)(midlow64), \
(dest).elt[3] = (uint64_t)(midlow64) >> 32, \
(dest).elt[4] = (uint64_t)(midhigh64), \
(dest).elt[5] = (uint64_t)(midhigh64) >> 32, \
(dest).elt[6] = (uint64_t)(high64), \
(dest).elt[7] = (uint64_t)(high64) >> 32)
#define is256bitZero(dest) \
(((dest).elt[0] | (dest).elt[1] | (dest).elt[2] | (dest).elt[3] \
| (dest).elt[4] | (dest).elt[5] | (dest).elt[6] | (dest).elt[7]) == 0)
static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) {
int integerPart = (int)(fixed256->elt[7] >> (32 - integerBits));
const uint64_t fixedPointMask = (((uint64_t)1 << (32 - integerBits)) - 1);
fixed256->elt[7] &= fixedPointMask;
return integerPart;
}
#endif
static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs);
// Multiply a 256-bit fraction times a 128-bit fraction, with controlled rounding
static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs);
static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs);
static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs);
static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs);
static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift);
static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift);
static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent);
static size_t _swift_dtoa_256bit_backend(char *, size_t, swift_uint128_t, swift_uint128_t, int, int, int, int, bool);
#endif
// A table of all two-digit decimal numbers
#if SWIFT_DTOA_BINARY16_SUPPORT || SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT
static const char asciiDigitTable[] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
"4041424344454647484950515253545556575859"
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";
#endif
// ================================================================
//
// Helpers to output formatted results for infinity, zero, and NaN
//
// ================================================================
static size_t infinity(char *dest, size_t len, int negative) {
if (negative) {
if (len >= 5) {
memcpy(dest, "-inf", 5);
return 4;
}
} else {
if (len >= 4) {
memcpy(dest, "inf", 4);
return 3;
}
}
if (len > 0) {
dest[0] = '\0';
}
return 0;
}
static size_t zero(char *dest, size_t len, int negative) {
if (negative) {
if (len >= 5) {
memcpy(dest, "-0.0", 5);
return 4;
}
} else {
if (len >= 4) {
memcpy(dest, "0.0", 4);
return 3;
}
}
if (len > 0) {
dest[0] = '\0';
}
return 0;
}
static size_t nan_details(char *dest, size_t len, int negative, int quiet, uint64_t payloadHigh, uint64_t payloadLow) {
const char *sign = negative ? "-" : "";
const char *signalingChar = quiet ? "" : "s";
char buff[64];
if (payloadLow != 0) {
if (payloadHigh != 0) {
snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 "%016" PRIx64 ")",
sign, signalingChar, payloadHigh, payloadLow);
} else {
snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 ")",
sign, signalingChar, payloadLow);
}
} else {
snprintf(buff, sizeof(buff), "%s%snan",
sign, signalingChar);
}
size_t nanlen = strlen(buff);
if (nanlen < len) {
memcpy(dest, buff, nanlen + 1);
return nanlen;
}
if (len > 0) {
dest[0] = '\0';
}
return 0;
}
// ================================================================
//
// BINARY16
//
// ================================================================
#if SWIFT_DTOA_BINARY16_SUPPORT
#if !SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT
// Format a C `_Float16`
size_t swift_dtoa_optimal_binary16(_Float16 d, char *dest, size_t length) {
return swift_dtoa_optimal_binary16_p(&d, dest, length);
}
#endif
// Format an IEEE 754 binary16 half-precision floating point value
// into an optimal text form.
// This does not assume that the C environment has any support
// for binary16.
// Because binary16 has such a limited range, a simple exact
// implementation can fit in 32 bit arithmetic. Since we can easily
// verify every single binary16 value, this can be experimentally
// optimized.
size_t swift_dtoa_optimal_binary16_p(const void *f, char *dest, size_t length) {
static const int significandBitCount = 10;
static const uint32_t significandMask
= ((uint32_t)1 << significandBitCount) - 1;
static const int exponentBitCount = 5;
static const int exponentMask = (1 << exponentBitCount) - 1;
// See comments in swift_dtoa_optimal_binary64_p
static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14
if (length < 1) {
return 0;
}
// Step 0: Deconstruct IEEE 754 binary16 format
uint16_t raw = *(const uint16_t *)f;
int exponentBitPattern = (raw >> significandBitCount) & exponentMask;
uint16_t significandBitPattern = raw & significandMask;
int negative = raw >> 15;
// Step 1: Handle the various input cases:
int binaryExponent;
uint16_t significand;
int isBoundary = significandBitPattern == 0;
if (exponentBitPattern == exponentMask) { // NaN or Infinity
if (isBoundary) { // Infinity
return infinity(dest, length, negative);
} else {
const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1;
uint16_t payload = significandBitPattern & ((1U << (significandBitCount - 2)) - 1);
return nan_details(dest, length, negative, quiet, 0, payload);
}
} else if (exponentBitPattern == 0) {
if (isBoundary) { // Zero
return zero(dest, length, negative);
} else { // Subnormal
binaryExponent = 1 - exponentBias;
significand = significandBitPattern;
}
} else { // normal
binaryExponent = exponentBitPattern - exponentBias;
uint16_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount;
uint16_t fullSignificand = significandBitPattern + hiddenBit;
significand = fullSignificand;
}
// Step 2: Determine the exact target interval
significand <<= 2;
static const uint16_t halfUlp = 2;
uint32_t upperMidpointExact = significand + halfUlp;
static const uint16_t quarterUlp = 1;
uint32_t lowerMidpointExact
= significand - (isBoundary ? quarterUlp : halfUlp);
// Shortest output from here is "1.0" plus null byte
if (length < 4) {
dest[0] = '\0';
return 0;
}
char *p = dest;
if (negative) {
*p++ = '-';
}
if (binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38)) {
// Format values < 10^-5 as exponential form
// We know value < 10^-5, so we can do the first scaling step unconditionally
int decimalExponent = -5;
uint32_t u = (upperMidpointExact << (28 - 13 + binaryExponent)) * 100000;
uint32_t l = (lowerMidpointExact << (28 - 13 + binaryExponent)) * 100000;
uint32_t t = (significand << (28 - 13 + binaryExponent)) * 100000;
const uint32_t mask = (1 << 28) - 1;
if (t < ((1 << 28) / 10)) {
u *= 100; l *= 100; t *= 100;
decimalExponent -= 2;
}
if (t < (1 << 28)) {
u *= 10; l *= 10; t *= 10;
decimalExponent -= 1;
}
const int uDigit = u >> 28, lDigit = l >> 28;
if (uDigit == lDigit) {
// There's more than one digit, emit a '.' and the rest
if (p > dest + length - 6) {
dest[0] = '\0';
return 0;
}
*p++ = (t >> 28) + '0';
*p++ = '.';
while (true) {
u = (u & mask) * 10; l = (l & mask) * 10;
const int uDigit = u >> 28, lDigit = l >> 28;
if (uDigit != lDigit) {
t = (t & mask) * 10;
break;
}
t *= 10;
*p++ = uDigit + '0';
}
}
t = (t + (1 << 27)) >> 28; // Add 1/2 to round
if (p > dest + length - 6) { // Exactly 6 bytes written below
dest[0] = '\0';
return 0;
}
*p++ = t + '0';
memcpy(p, "e-", 2);
p += 2;
memcpy(p, asciiDigitTable + (-decimalExponent) * 2, 2);
p += 2;
*p = '\0';
return p - dest;
}
// Format the value using decimal format
// There's an integer portion of no more than 5 digits
int intportion;
if (binaryExponent < 13) {
intportion = significand >> (13 - binaryExponent);
significand -= intportion << (13 - binaryExponent);
} else {
intportion = significand << (binaryExponent - 13);
significand -= intportion >> (binaryExponent - 13);
}
if (intportion < 10) {
if (p > dest + length - 3) {
dest[0] = '\0';
return 0;
}
*p++ = intportion + '0'; // One digit is the most common case
} else if (intportion < 1000) {
// 2 or 3 digits
if (p > dest + length - 4) {
dest[0] = '\0';
return 0;
}
if (intportion > 99) {
*p++ = intportion / 100 + '0';
}
memcpy(p, asciiDigitTable + (intportion % 100) * 2, 2);
p += 2;
} else {
// 4 or 5 digits
if (p > dest + length - 6) {
dest[0] = '\0';
return 0;
}
if (intportion > 9999) {
*p++ = intportion / 10000 + '0';
intportion %= 10000;
}
memcpy(p, asciiDigitTable + (intportion / 100) * 2, 2);
memcpy(p + 2, asciiDigitTable + (intportion % 100) * 2, 2);
p += 4;
}
if (p > dest + length - 3) {
dest[0] = '\0';
return 0;
}
*p++ = '.';
if (significand == 0) { // No fraction, so we're done.
*p++ = '0';
*p = '\0';
return p - dest;
}
// Format the fractional part
uint32_t u = upperMidpointExact << (28 - 13 + binaryExponent);
uint32_t l = lowerMidpointExact << (28 - 13 + binaryExponent);
uint32_t t = significand << (28 - 13 + binaryExponent);
const uint32_t mask = (1 << 28) - 1;
unsigned uDigit, lDigit;
while (true) {
u = (u & mask) * 10; l = (l & mask) * 10;
uDigit = u >> 28; lDigit = l >> 28;
if (uDigit != lDigit) {
t = (t & mask) * 10;
break;
}
t *= 10;
if (p > dest + length - 3) {
dest[0] = '\0';
return 0;
}
*p++ = uDigit + '0';
}
t += 1 << 27; // Add 1/2
if ((t & mask) == 0) { // Was exactly 1/2 (now zero)
t = (t >> 28) & ~1; // Round even
} else {
t >>= 28;
}
if (t <= lDigit && l > 0)
t += 1;
*p++ = t + '0';
*p = '\0';
return p - dest;
}
#endif
// ================================================================
//
// BINARY32
//
// ================================================================
#if SWIFT_DTOA_BINARY32_SUPPORT
#if FLOAT_IS_BINARY32
// Format a C `float`
size_t swift_dtoa_optimal_float(float d, char *dest, size_t length) {
return swift_dtoa_optimal_binary32_p(&d, dest, length);
}
#endif
// Format an IEEE 754 single-precision binary32 format floating-point number.
size_t swift_dtoa_optimal_binary32_p(const void *f, char *dest, size_t length)
{
static const int significandBitCount = FLT_MANT_DIG - 1;
static const uint32_t significandMask
= ((uint32_t)1 << significandBitCount) - 1;
static const int exponentBitCount = 8;
static const int exponentMask = (1 << exponentBitCount) - 1;
// See comments in swift_dtoa_optimal_binary64_p
static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 125
// Step 0: Deconstruct the target number
// Note: this strongly assumes IEEE 754 binary32 format
uint32_t raw = *(const uint32_t *)f;
int exponentBitPattern = (raw >> significandBitCount) & exponentMask;
uint32_t significandBitPattern = raw & significandMask;
int negative = raw >> 31;
// Step 1: Handle the various input cases:
int binaryExponent;
uint32_t significand;
if (length < 1) {
return 0;
} else if (exponentBitPattern == exponentMask) { // NaN or Infinity
if (significandBitPattern == 0) { // Infinity
return infinity(dest, length, negative);
} else { // NaN
const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1;
uint32_t payload = raw & ((1UL << (significandBitCount - 2)) - 1);
return nan_details(dest, length, negative, quiet != 0, 0, payload);
}
} else if (exponentBitPattern == 0) {
if (significandBitPattern == 0) { // Zero
return zero(dest, length, negative);
} else { // Subnormal
binaryExponent = 1 - exponentBias;
significand = significandBitPattern << (32 - significandBitCount - 1);
}
} else { // normal
binaryExponent = exponentBitPattern - exponentBias;
uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount;
uint32_t fullSignificand = significandBitPattern + hiddenBit;
significand = fullSignificand << (32 - significandBitCount - 1);
}
// Step 2: Determine the exact unscaled target interval
static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2);
uint64_t upperMidpointExact = (uint64_t)(significand + halfUlp);
int isBoundary = significandBitPattern == 0;
static const uint32_t quarterUlp = halfUlp >> 1;
uint64_t lowerMidpointExact
= (uint64_t)(significand - (isBoundary ? quarterUlp : halfUlp));
// Step 3: Estimate the base 10 exponent
int base10Exponent = decimalExponentFor2ToThe(binaryExponent);
// Step 4: Compute a power-of-10 scale factor
uint64_t powerOfTenRoundedDown = 0;
uint64_t powerOfTenRoundedUp = 0;
int powerOfTenExponent = 0;
static const int bulkFirstDigits = 1;
intervalContainingPowerOf10_Binary32(-base10Exponent + bulkFirstDigits - 1,
&powerOfTenRoundedDown,
&powerOfTenRoundedUp,
&powerOfTenExponent);
const int extraBits = binaryExponent + powerOfTenExponent;
// Step 5: Scale the interval (with rounding)
static const int integerBits = 8;
const int shift = integerBits - extraBits;
const int roundUpBias = (1 << shift) - 1;
static const int fractionBits = 64 - integerBits;
static const uint64_t fractionMask = ((uint64_t)1 << fractionBits) - (uint64_t)1;
uint64_t u, l;
if (significandBitPattern & 1) {
// Narrow the interval (odd significand)
uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
upperMidpointExact);
u = u1 >> shift; // Rounding down
uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
lowerMidpointExact);
l = (l1 + roundUpBias) >> shift; // Rounding Up
} else {
// Widen the interval (even significand)
uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp,
upperMidpointExact);
u = (u1 + roundUpBias) >> shift; // Rounding Up
uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown,
lowerMidpointExact);
l = l1 >> shift; // Rounding down
}
// Step 6: Align first digit, adjust exponent
// In particular, this prunes leading zeros from subnormals
uint64_t t = u;
uint64_t delta = u - l;
while (t < (uint64_t)1 << fractionBits) {
base10Exponent -= 1;
t *= 10;
delta *= 10;
}
// Step 7: Generate decimal digits into the destination buffer
char *p = dest;
if (p > dest + length - 3) {
dest[0] = '\0';
return 0;
}
if (negative) {
*p++ = '-';
}
char * const firstOutputChar = p;
// Format first digit as a 2-digit value to get a leading '0'
memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2);
t &= fractionMask;
p += 2;
// Emit two digits at a time
while ((delta * 10) < ((t * 10) & fractionMask)) {
if (p > dest + length - 3) {
dest[0] = '\0';
return 0;
}
delta *= 100;
t *= 100;
memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2);
t &= fractionMask;
p += 2;
}
// Emit any final digit
if (delta < t) {
if (p > dest + length - 2) {
dest[0] = '\0';
return 0;
}
delta *= 10;
t *= 10;
*p++ = '0' + (t >> fractionBits);
t &= fractionMask;
}
// Adjust the final digit to be closer to the original value
if (delta > t + ((uint64_t)1 << fractionBits)) {
uint64_t skew;
if (isBoundary) {
skew = delta - delta / 3 - t;
} else {
skew = delta / 2 - t;
}
uint64_t one = (uint64_t)(1) << (64 - integerBits);
uint64_t lastAccurateBit = 1ULL << 24;
uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1);
uint64_t oneHalf = one >> 1;
if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) {
// If the skew is exactly integer + 1/2, round the last
// digit even after adjustment
int adjust = (int)(skew >> (64 - integerBits));
p[-1] -= adjust;
p[-1] &= ~1;
} else {
// Else round to nearest...
int adjust = (int)((skew + oneHalf) >> (64 - integerBits));
p[-1] -= adjust;
}
}
int forceExponential = binaryExponent > 25 || (binaryExponent == 25 && !isBoundary);
return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent);
}
#endif
// ================================================================
//
// BINARY64
//
// ================================================================
#if SWIFT_DTOA_BINARY64_SUPPORT
#if LONG_DOUBLE_IS_BINARY64
size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) {
return swift_dtoa_optimal_binary64_p(&d, dest, length);
}
#endif
#if DOUBLE_IS_BINARY64
size_t swift_dtoa_optimal_double(double d, char *dest, size_t length) {
return swift_dtoa_optimal_binary64_p(&d, dest, length);
}
#endif
// Format an IEEE 754 double-precision binary64 format floating-point number.
// The calling convention here assumes that C `double` is this format,
// but otherwise, this does not utilize any floating-point arithmetic
// or library routines.
size_t swift_dtoa_optimal_binary64_p(const void *d, char *dest, size_t length)
{
// Bits in raw significand (not including hidden bit, if present)
static const int significandBitCount = DBL_MANT_DIG - 1;
static const uint64_t significandMask
= ((uint64_t)1 << significandBitCount) - 1;
// Bits in raw exponent
static const int exponentBitCount = 11;
static const int exponentMask = (1 << exponentBitCount) - 1;
// Note: IEEE 754 conventionally uses 1023 as the exponent
// bias. That's because they treat the significand as a
// fixed-point number with one bit (the hidden bit) integer
// portion. The logic here reconstructs the significand as a
// pure fraction, so we need to accommodate that when
// reconstructing the binary exponent.
static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 1022
// Step 0: Deconstruct an IEEE 754 binary64 double-precision value
uint64_t raw = *(const uint64_t *)d;
int exponentBitPattern = (raw >> significandBitCount) & exponentMask;
uint64_t significandBitPattern = raw & significandMask;
int negative = raw >> 63;
// Step 1: Handle the various input cases:
if (length < 1) {
return 0;
}
int binaryExponent;
int isBoundary = significandBitPattern == 0;
uint64_t significand;
if (exponentBitPattern == exponentMask) { // NaN or Infinity
if (isBoundary) { // Infinity
return infinity(dest, length, negative);
} else {
const int quiet = (raw >> (significandBitCount - 1)) & 1;
uint64_t payload = raw & ((1ull << (significandBitCount - 2)) - 1);
return nan_details(dest, length, negative, quiet, 0, payload);
}
} else if (exponentBitPattern == 0) {
if (isBoundary) { // Zero
return zero(dest, length, negative);
} else { // subnormal
binaryExponent = 1 - exponentBias;
significand = significandBitPattern
<< (64 - significandBitCount - 1);
}
} else { // normal
binaryExponent = exponentBitPattern - exponentBias;
uint64_t hiddenBit = (uint64_t)1 << significandBitCount;
uint64_t fullSignificand = significandBitPattern + hiddenBit;
significand = fullSignificand << (64 - significandBitCount - 1);
}
// Step 2: Determine the exact unscaled target interval
// Grisu-style algorithms construct the shortest decimal digit
// sequence within a specific interval. To build the appropriate
// interval, we start by computing the midpoints between this
// floating-point value and the adjacent ones. Note that this
// step is an exact computation.
uint64_t halfUlp = (uint64_t)1 << (64 - significandBitCount - 2);
uint64_t quarterUlp = halfUlp >> 1;
uint64_t upperMidpointExact = significand + halfUlp;
uint64_t lowerMidpointExact
= significand - (isBoundary ? quarterUlp : halfUlp);
int isOddSignificand = (significandBitPattern & 1) != 0;
// Step 3: Estimate the base 10 exponent
// Grisu algorithms are based in part on a simple technique for
// generating a base-10 form for a binary floating-point number.
// Start with a binary floating-point number `f * 2^e` and then
// estimate the decimal exponent `p`. You can then rewrite your
// original number as:
//
// ```
// f * 2^e * 10^-p * 10^p
// ```
//
// The last term is part of our output, and a good estimate for
// `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying
// the first three terms then yields a fraction suitable for
// producing the decimal digits. Here we use a very fast estimate
// of `p` that is never off by more than 1; we'll have
// opportunities later to correct any error.
int base10Exponent = decimalExponentFor2ToThe(binaryExponent);
// Step 4: Compute a power-of-10 scale factor
// Compute `10^-p` to 128-bit precision. We generate
// both over- and under-estimates to ensure we can exactly
// bound the later use of these values.
swift_uint128_t powerOfTenRoundedDown;
swift_uint128_t powerOfTenRoundedUp;
int powerOfTenExponent = 0;
static const int bulkFirstDigits = 7;
static const int bulkFirstDigitFactor = 1000000; // 10^(bulkFirstDigits - 1)
// Note the extra factor of 10^bulkFirstDigits -- that will give
// us a headstart on digit generation later on. (In contrast, Ryu
// uses an extra factor of 10^17 here to get all the digits up
// front, but then has to back out any extra digits. Doing that
// with a 17-digit value requires 64-bit division, which is the
// root cause of Ryu's poor performance on 32-bit processors. We
// also might have to back out extra digits if 7 is too many, but
// will only need 32-bit division in that case.)
intervalContainingPowerOf10_Binary64(-base10Exponent + bulkFirstDigits - 1,
&powerOfTenRoundedDown,
&powerOfTenRoundedUp,
&powerOfTenExponent);
const int extraBits = binaryExponent + powerOfTenExponent;
// Step 5: Scale the interval (with rounding)
// As mentioned above, the final digit generation works
// with an interval, so we actually apply the scaling
// to the upper and lower midpoint values separately.
// As part of the scaling here, we'll switch from a pure
// fraction with zero bit integer portion and 128-bit fraction
// to a fixed-point form with 32 bits in the integer portion.
static const int integerBits = 32;
// We scale the interval in one of two different ways,
// depending on whether the significand is even or odd...
swift_uint128_t u, l;
if (isOddSignificand) {
// Case A: Narrow the interval (odd significand)
// Loitsch' original Grisu2 always rounds so as to narrow the
// interval. Since our digit generation will select a value
// within the scaled interval, narrowing the interval
// guarantees that we will find a digit sequence that converts
// back to the original value.
// This ensures accuracy but, as explained in Loitsch' paper,
// this carries a risk that there will be a shorter digit
// sequence outside of our narrowed interval that we will
// miss. This risk obviously gets lower with increased
// precision, but it wasn't until the Errol paper that anyone
// had a good way to test whether a particular implementation
// had sufficient precision. That paper shows a way to enumerate
// the worst-case numbers; those numbers that are extremely close
// to the mid-points between adjacent floating-point values.
// These are the values that might sit just outside of the
// narrowed interval. By testing these values, we can verify
// the correctness of our implementation.
// Multiply out the upper midpoint, rounding down...
swift_uint128_t u1 = multiply128x64RoundingDown(powerOfTenRoundedDown,
upperMidpointExact);
// Account for residual binary exponent and adjust
// to the fixed-point format
u = shiftRightRoundingDown128(u1, integerBits - extraBits);
// Conversely for the lower midpoint...
swift_uint128_t l1 = multiply128x64RoundingUp(powerOfTenRoundedUp,
lowerMidpointExact);
l = shiftRightRoundingUp128(l1, integerBits - extraBits);
} else {
// Case B: Widen the interval (even significand)
// As explained in Errol Theorem 6, in certain cases there is
// a short decimal representation at the exact boundary of the
// scaled interval. When such a number is converted back to
// binary, it will get rounded to the adjacent even
// significand.
// So when the significand is even, we round so as to widen
// the interval in order to ensure that the exact midpoints
// are considered. Of couse, this ensures that we find a
// short result but carries a risk of selecting a result
// outside of the exact scaled interval (which would be
// inaccurate).
// The same testing approach described above (based on results
// in the Errol paper) also applies
// to this case.
swift_uint128_t u1 = multiply128x64RoundingUp(powerOfTenRoundedUp,
upperMidpointExact);
u = shiftRightRoundingUp128(u1, integerBits - extraBits);
swift_uint128_t l1 = multiply128x64RoundingDown(powerOfTenRoundedDown,
lowerMidpointExact);
l = shiftRightRoundingDown128(l1, integerBits - extraBits);
}
// Step 6: Align first digit, adjust exponent
// Calculations above used an estimate for the power-of-ten scale.
// Here, we compensate for any error in that estimate by testing
// whether we have the expected number of digits in the integer
// portion and correcting as necessary. This also serves to
// prune leading zeros from subnormals.
// Except for subnormals, this loop should never run more than once.
// For subnormals, this might run as many as 16 + bulkFirstDigits
// times.
#if HAVE_UINT128_T
while (u < ((__uint128_t)bulkFirstDigitFactor << (128 - integerBits)))
#else
while (u.high < ((uint32_t)bulkFirstDigitFactor << (32 - integerBits)))
#endif
{
base10Exponent -= 1;
multiply128xu32(&l, 10);
multiply128xu32(&u, 10);
}
// Step 7: Produce decimal digits
// One standard approach generates digits for the scaled upper and
// lower boundaries and stops when at the first digit that
// differs. For example, note that 0.1234 is the shortest decimal
// between u = 0.123456 and l = 0.123345.