/
scalar_cast_numeric.cc
920 lines (784 loc) · 34 KB
/
scalar_cast_numeric.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Implementation of casting to integer, floating point, or decimal types
#include "arrow/array/builder_primitive.h"
#include "arrow/compute/kernels/common_internal.h"
#include "arrow/compute/kernels/scalar_cast_internal.h"
#include "arrow/compute/kernels/util_internal.h"
#include "arrow/scalar.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/float16.h"
#include "arrow/util/int_util.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
using internal::BitBlockCount;
using internal::CheckIntegersInRange;
using internal::IntegersCanFit;
using internal::OptionalBitBlockCounter;
using internal::ParseValue;
using internal::PrimitiveScalarBase;
using util::Float16;
namespace compute {
namespace internal {
Status CastIntegerToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
if (!options.allow_int_overflow) {
RETURN_NOT_OK(IntegersCanFit(batch[0].array, *out->type()));
}
CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
out->array_span_mutable());
return Status::OK();
}
Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult* out) {
CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
out->array_span_mutable());
return Status::OK();
}
// ----------------------------------------------------------------------
// Implement fast safe floating point to integer cast
//
template <typename InType, typename OutType, typename InT = typename InType::c_type,
typename OutT = typename OutType::c_type>
struct WasTruncated {
static bool Check(OutT out_val, InT in_val) {
return static_cast<InT>(out_val) != in_val;
}
static bool CheckMaybeNull(OutT out_val, InT in_val, bool is_valid) {
return is_valid && static_cast<InT>(out_val) != in_val;
}
};
// Half float to int
template <typename OutType>
struct WasTruncated<HalfFloatType, OutType> {
using OutT = typename OutType::c_type;
static bool Check(OutT out_val, uint16_t in_val) {
return static_cast<float>(out_val) != Float16::FromBits(in_val).ToFloat();
}
static bool CheckMaybeNull(OutT out_val, uint16_t in_val, bool is_valid) {
return is_valid && static_cast<float>(out_val) != Float16::FromBits(in_val).ToFloat();
}
};
// InType is a floating point type we are planning to cast to integer
template <typename InType, typename OutType, typename InT = typename InType::c_type,
typename OutT = typename OutType::c_type>
ARROW_DISABLE_UBSAN("float-cast-overflow")
Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) {
auto GetErrorMessage = [&](InT val) {
return Status::Invalid("Float value ", val, " was truncated converting to ",
*output.type);
};
const InT* in_data = input.GetValues<InT>(1);
const OutT* out_data = output.GetValues<OutT>(1);
const uint8_t* bitmap = input.buffers[0].data;
OptionalBitBlockCounter bit_counter(bitmap, input.offset, input.length);
int64_t position = 0;
int64_t offset_position = input.offset;
while (position < input.length) {
BitBlockCount block = bit_counter.NextBlock();
bool block_out_of_bounds = false;
if (block.popcount == block.length) {
// Fast path: branchless
for (int64_t i = 0; i < block.length; ++i) {
block_out_of_bounds |=
WasTruncated<InType, OutType>::Check(out_data[i], in_data[i]);
}
} else if (block.popcount > 0) {
// Indices have nulls, must only boundscheck non-null values
for (int64_t i = 0; i < block.length; ++i) {
block_out_of_bounds |= WasTruncated<InType, OutType>::CheckMaybeNull(
out_data[i], in_data[i], bit_util::GetBit(bitmap, offset_position + i));
}
}
if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
if (input.GetNullCount() > 0) {
for (int64_t i = 0; i < block.length; ++i) {
if (WasTruncated<InType, OutType>::CheckMaybeNull(
out_data[i], in_data[i],
bit_util::GetBit(bitmap, offset_position + i))) {
return GetErrorMessage(in_data[i]);
}
}
} else {
for (int64_t i = 0; i < block.length; ++i) {
if (WasTruncated<InType, OutType>::Check(out_data[i], in_data[i])) {
return GetErrorMessage(in_data[i]);
}
}
}
}
in_data += block.length;
out_data += block.length;
position += block.length;
offset_position += block.length;
}
return Status::OK();
}
template <typename InType>
Status CheckFloatToIntTruncationImpl(const ArraySpan& input, const ArraySpan& output) {
switch (output.type->id()) {
case Type::INT8:
return CheckFloatTruncation<InType, Int8Type>(input, output);
case Type::INT16:
return CheckFloatTruncation<InType, Int16Type>(input, output);
case Type::INT32:
return CheckFloatTruncation<InType, Int32Type>(input, output);
case Type::INT64:
return CheckFloatTruncation<InType, Int64Type>(input, output);
case Type::UINT8:
return CheckFloatTruncation<InType, UInt8Type>(input, output);
case Type::UINT16:
return CheckFloatTruncation<InType, UInt16Type>(input, output);
case Type::UINT32:
return CheckFloatTruncation<InType, UInt32Type>(input, output);
case Type::UINT64:
return CheckFloatTruncation<InType, UInt64Type>(input, output);
default:
break;
}
DCHECK(false);
return Status::OK();
}
Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& output) {
switch (input.type()->id()) {
case Type::FLOAT:
return CheckFloatToIntTruncationImpl<FloatType>(input.array, *output.array_span());
case Type::DOUBLE:
return CheckFloatToIntTruncationImpl<DoubleType>(input.array, *output.array_span());
case Type::HALF_FLOAT:
return CheckFloatToIntTruncationImpl<HalfFloatType>(input.array,
*output.array_span());
default:
break;
}
DCHECK(false);
return Status::OK();
}
Status CastFloatingToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
out->array_span_mutable());
if (!options.allow_float_truncate) {
RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
}
return Status::OK();
}
// ----------------------------------------------------------------------
// Implement fast integer to floating point cast
// These are the limits for exact representation of whole numbers in floating
// point numbers
template <typename T>
struct FloatingIntegerBound {};
template <>
struct FloatingIntegerBound<float> {
static const int64_t value = 1LL << 24;
};
template <>
struct FloatingIntegerBound<double> {
static const int64_t value = 1LL << 53;
};
template <typename InType, typename OutType, typename InT = typename InType::c_type,
typename OutT = typename OutType::c_type,
bool IsSigned = is_signed_integer_type<InType>::value>
Status CheckIntegerFloatTruncateImpl(const ExecValue& input) {
using InScalarType = typename TypeTraits<InType>::ScalarType;
const int64_t limit = FloatingIntegerBound<OutT>::value;
InScalarType bound_lower(IsSigned ? -limit : 0);
InScalarType bound_upper(limit);
return CheckIntegersInRange(input.array, bound_lower, bound_upper);
}
Status CheckForIntegerToFloatingTruncation(const ExecValue& input, Type::type out_type) {
switch (input.type()->id()) {
// Small integers are all exactly representable as whole numbers
case Type::INT8:
case Type::INT16:
case Type::UINT8:
case Type::UINT16:
return Status::OK();
case Type::INT32: {
if (out_type == Type::DOUBLE) {
return Status::OK();
}
return CheckIntegerFloatTruncateImpl<Int32Type, FloatType>(input);
}
case Type::UINT32: {
if (out_type == Type::DOUBLE) {
return Status::OK();
}
return CheckIntegerFloatTruncateImpl<UInt32Type, FloatType>(input);
}
case Type::INT64: {
if (out_type == Type::FLOAT) {
return CheckIntegerFloatTruncateImpl<Int64Type, FloatType>(input);
} else {
return CheckIntegerFloatTruncateImpl<Int64Type, DoubleType>(input);
}
}
case Type::UINT64: {
if (out_type == Type::FLOAT) {
return CheckIntegerFloatTruncateImpl<UInt64Type, FloatType>(input);
} else {
return CheckIntegerFloatTruncateImpl<UInt64Type, DoubleType>(input);
}
}
default:
break;
}
DCHECK(false);
return Status::OK();
}
Status CastIntegerToFloating(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
Type::type out_type = out->type()->id();
if (!options.allow_float_truncate) {
RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
}
CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0].array,
out->array_span_mutable());
return Status::OK();
}
// ----------------------------------------------------------------------
// Boolean to number
struct BooleanToNumber {
template <typename OutValue, typename Arg0Value>
static OutValue Call(KernelContext*, Arg0Value val, Status*) {
constexpr auto kOne = static_cast<OutValue>(1);
constexpr auto kZero = static_cast<OutValue>(0);
return val ? kOne : kZero;
}
};
template <typename O>
struct CastFunctor<O, BooleanType, enable_if_number<O>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
return applicator::ScalarUnary<O, BooleanType, BooleanToNumber>::Exec(ctx, batch,
out);
}
};
// ----------------------------------------------------------------------
// String to number
template <typename OutType>
struct ParseString {
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
OutValue result = OutValue(0);
if (ARROW_PREDICT_FALSE(!ParseValue<OutType>(val.data(), val.size(), &result))) {
*st = Status::Invalid("Failed to parse string: '", val, "' as a scalar of type ",
TypeTraits<OutType>::type_singleton()->ToString());
}
return result;
}
};
template <typename O, typename I>
struct CastFunctor<
O, I, enable_if_t<(is_number_type<O>::value && is_base_binary_type<I>::value)>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
}
};
template <>
struct CastFunctor<HalfFloatType, StringType, enable_if_t<true>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
return applicator::ScalarUnaryNotNull<HalfFloatType, StringType,
ParseString<HalfFloatType>>::Exec(ctx, batch,
out);
}
};
// ----------------------------------------------------------------------
// Decimal to integer
struct DecimalToIntegerMixin {
template <typename OutValue, typename Arg0Value>
OutValue ToInteger(KernelContext* ctx, const Arg0Value& val, Status* st) const {
constexpr auto min_value = std::numeric_limits<OutValue>::min();
constexpr auto max_value = std::numeric_limits<OutValue>::max();
if (!allow_int_overflow_ && ARROW_PREDICT_FALSE(val < min_value || val > max_value)) {
*st = Status::Invalid("Integer value out of bounds");
return OutValue{}; // Zero
} else {
return static_cast<OutValue>(val.low_bits());
}
}
DecimalToIntegerMixin(int32_t in_scale, bool allow_int_overflow)
: in_scale_(in_scale), allow_int_overflow_(allow_int_overflow) {}
int32_t in_scale_;
bool allow_int_overflow_;
};
struct UnsafeUpscaleDecimalToInteger : public DecimalToIntegerMixin {
using DecimalToIntegerMixin::DecimalToIntegerMixin;
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
return ToInteger<OutValue>(ctx, val.IncreaseScaleBy(-in_scale_), st);
}
};
struct UnsafeDownscaleDecimalToInteger : public DecimalToIntegerMixin {
using DecimalToIntegerMixin::DecimalToIntegerMixin;
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
return ToInteger<OutValue>(ctx, val.ReduceScaleBy(in_scale_, false), st);
}
};
struct SafeRescaleDecimalToInteger : public DecimalToIntegerMixin {
using DecimalToIntegerMixin::DecimalToIntegerMixin;
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext* ctx, Arg0Value val, Status* st) const {
auto result = val.Rescale(in_scale_, 0);
if (ARROW_PREDICT_FALSE(!result.ok())) {
*st = result.status();
return OutValue{}; // Zero
} else {
return ToInteger<OutValue>(ctx, *result, st);
}
}
};
template <typename O, typename I>
struct CastFunctor<O, I,
enable_if_t<is_integer_type<O>::value && is_decimal_type<I>::value>> {
using out_type = typename O::c_type;
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
const auto& in_type_inst = checked_cast<const I&>(*batch[0].type());
const auto in_scale = in_type_inst.scale();
if (options.allow_decimal_truncate) {
if (in_scale < 0) {
// Unsafe upscale
applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimalToInteger>
kernel(UnsafeUpscaleDecimalToInteger{in_scale, options.allow_int_overflow});
return kernel.Exec(ctx, batch, out);
} else {
// Unsafe downscale
applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimalToInteger>
kernel(UnsafeDownscaleDecimalToInteger{in_scale, options.allow_int_overflow});
return kernel.Exec(ctx, batch, out);
}
} else {
// Safe rescale
applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimalToInteger> kernel(
SafeRescaleDecimalToInteger{in_scale, options.allow_int_overflow});
return kernel.Exec(ctx, batch, out);
}
}
};
// ----------------------------------------------------------------------
// Integer to decimal
struct IntegerToDecimal {
template <typename OutValue, typename IntegerType>
OutValue Call(KernelContext*, IntegerType val, Status* st) const {
auto maybe_decimal = OutValue(val).Rescale(0, out_scale_);
if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
return maybe_decimal.MoveValueUnsafe();
}
*st = maybe_decimal.status();
return OutValue{};
}
int32_t out_scale_;
};
template <typename O, typename I>
struct CastFunctor<O, I,
enable_if_t<is_decimal_type<O>::value && is_integer_type<I>::value>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& out_type = checked_cast<const O&>(*out->type());
const auto out_scale = out_type.scale();
const auto out_precision = out_type.precision();
// verify precision and scale
if (out_scale < 0) {
return Status::Invalid("Scale must be non-negative");
}
ARROW_ASSIGN_OR_RAISE(int32_t precision, MaxDecimalDigitsForInteger(I::type_id));
precision += out_scale;
if (out_precision < precision) {
return Status::Invalid(
"Precision is not great enough for the result. "
"It should be at least ",
precision);
}
applicator::ScalarUnaryNotNullStateful<O, I, IntegerToDecimal> kernel(
IntegerToDecimal{out_scale});
return kernel.Exec(ctx, batch, out);
}
};
// ----------------------------------------------------------------------
// Decimal to decimal
// Helper that converts the input and output decimals
// For instance, Decimal128 -> Decimal256 requires converting, then scaling
// Decimal256 -> Decimal128 requires scaling, then truncating
template <typename OutDecimal, typename InDecimal>
struct DecimalConversions {};
template <typename InDecimal>
struct DecimalConversions<Decimal256, InDecimal> {
// Convert then scale
static Decimal256 ConvertInput(InDecimal&& val) { return Decimal256(val); }
static Decimal256 ConvertOutput(Decimal256&& val) { return val; }
};
template <>
struct DecimalConversions<Decimal128, Decimal256> {
// Scale then truncate
static Decimal256 ConvertInput(Decimal256&& val) { return val; }
static Decimal128 ConvertOutput(Decimal256&& val) {
const auto array_le = bit_util::little_endian::Make(val.native_endian_array());
return Decimal128(array_le[1], array_le[0]);
}
};
template <>
struct DecimalConversions<Decimal128, Decimal128> {
static Decimal128 ConvertInput(Decimal128&& val) { return val; }
static Decimal128 ConvertOutput(Decimal128&& val) { return val; }
};
struct UnsafeUpscaleDecimal {
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext*, Arg0Value val, Status*) const {
using Conv = DecimalConversions<OutValue, Arg0Value>;
return Conv::ConvertOutput(Conv::ConvertInput(std::move(val)).IncreaseScaleBy(by_));
}
int32_t by_;
};
struct UnsafeDownscaleDecimal {
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext*, Arg0Value val, Status*) const {
using Conv = DecimalConversions<OutValue, Arg0Value>;
return Conv::ConvertOutput(
Conv::ConvertInput(std::move(val)).ReduceScaleBy(by_, false));
}
int32_t by_;
};
struct SafeRescaleDecimal {
template <typename OutValue, typename Arg0Value>
OutValue Call(KernelContext*, Arg0Value val, Status* st) const {
using Conv = DecimalConversions<OutValue, Arg0Value>;
auto maybe_rescaled =
Conv::ConvertInput(std::move(val)).Rescale(in_scale_, out_scale_);
if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) {
*st = maybe_rescaled.status();
return {}; // Zero
}
if (ARROW_PREDICT_TRUE(maybe_rescaled->FitsInPrecision(out_precision_))) {
return Conv::ConvertOutput(maybe_rescaled.MoveValueUnsafe());
}
*st = Status::Invalid("Decimal value does not fit in precision ", out_precision_);
return {}; // Zero
}
int32_t out_scale_, out_precision_, in_scale_;
};
template <typename O, typename I>
struct CastFunctor<O, I,
enable_if_t<is_decimal_type<O>::value && is_decimal_type<I>::value>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
const auto& in_type = checked_cast<const I&>(*batch[0].type());
const auto& out_type = checked_cast<const O&>(*out->type());
const auto in_scale = in_type.scale();
const auto out_scale = out_type.scale();
if (options.allow_decimal_truncate) {
if (in_scale < out_scale) {
// Unsafe upscale
applicator::ScalarUnaryNotNullStateful<O, I, UnsafeUpscaleDecimal> kernel(
UnsafeUpscaleDecimal{out_scale - in_scale});
return kernel.Exec(ctx, batch, out);
} else {
// Unsafe downscale
applicator::ScalarUnaryNotNullStateful<O, I, UnsafeDownscaleDecimal> kernel(
UnsafeDownscaleDecimal{in_scale - out_scale});
return kernel.Exec(ctx, batch, out);
}
}
// Safe rescale
applicator::ScalarUnaryNotNullStateful<O, I, SafeRescaleDecimal> kernel(
SafeRescaleDecimal{out_scale, out_type.precision(), in_scale});
return kernel.Exec(ctx, batch, out);
}
};
// ----------------------------------------------------------------------
// Real to decimal
struct RealToDecimal {
template <typename OutValue, typename RealType>
OutValue Call(KernelContext*, RealType val, Status* st) const {
auto maybe_decimal = OutValue::FromReal(val, out_precision_, out_scale_);
if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
return maybe_decimal.MoveValueUnsafe();
}
if (!allow_truncate_) {
*st = maybe_decimal.status();
}
return {}; // Zero
}
int32_t out_scale_, out_precision_;
bool allow_truncate_;
};
template <typename O, typename I>
struct CastFunctor<O, I,
enable_if_t<is_decimal_type<O>::value && is_floating_type<I>::value>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
const auto& out_type = checked_cast<const O&>(*out->type());
const auto out_scale = out_type.scale();
const auto out_precision = out_type.precision();
applicator::ScalarUnaryNotNullStateful<O, I, RealToDecimal> kernel(
RealToDecimal{out_scale, out_precision, options.allow_decimal_truncate});
return kernel.Exec(ctx, batch, out);
}
};
// ----------------------------------------------------------------------
// String to decimal
struct StringToDecimal {
template <typename OutValue, typename StringType>
OutValue Call(KernelContext*, StringType val, Status* st) const {
OutValue parsed_out;
int32_t parsed_precision;
int32_t parsed_scale;
auto r_parse = OutValue::FromString(std::string_view(val.data(), val.size()),
&parsed_out, &parsed_precision, &parsed_scale);
if (ARROW_PREDICT_TRUE(r_parse.ok())) {
if (allow_truncate_) {
return (parsed_scale < out_scale_)
? parsed_out.IncreaseScaleBy(out_scale_ - parsed_scale)
: parsed_out.ReduceScaleBy(parsed_scale - out_scale_, false);
}
auto maybe_rescaled = parsed_out.Rescale(parsed_scale, out_scale_);
if (!maybe_rescaled.ok()) {
*st = maybe_rescaled.status();
return {}; // Zero
}
if (maybe_rescaled->FitsInPrecision(out_precision_)) {
return maybe_rescaled.MoveValueUnsafe();
} else {
*st = Status::Invalid("Decimal value does not fit in precision ", out_precision_);
return {}; // Zero
}
}
*st = r_parse;
return {}; // Zero
}
int32_t out_scale_, out_precision_;
bool allow_truncate_;
};
template <typename ARROW_TYPE, typename I>
struct DecimalCastFunctor {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& options = checked_cast<const CastState*>(ctx->state())->options;
const auto& out_type = checked_cast<const ARROW_TYPE&>(*out->type());
const auto out_scale = out_type.scale();
const auto out_precision = out_type.precision();
applicator::ScalarUnaryNotNullStateful<ARROW_TYPE, I, StringToDecimal> kernel(
StringToDecimal{out_scale, out_precision, options.allow_decimal_truncate});
return kernel.Exec(ctx, batch, out);
}
};
template <typename I>
struct CastFunctor<Decimal128Type, I, enable_if_t<is_base_binary_type<I>::value>>
: public DecimalCastFunctor<Decimal128Type, I> {};
template <typename I>
struct CastFunctor<Decimal256Type, I, enable_if_t<is_base_binary_type<I>::value>>
: public DecimalCastFunctor<Decimal256Type, I> {};
// ----------------------------------------------------------------------
// Decimal to real
struct DecimalToReal {
template <typename RealType, typename Arg0Value>
RealType Call(KernelContext*, const Arg0Value& val, Status*) const {
return val.template ToReal<RealType>(in_scale_);
}
int32_t in_scale_;
};
template <typename O, typename I>
struct CastFunctor<O, I,
enable_if_t<is_floating_type<O>::value && is_decimal_type<I>::value>> {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
const auto& in_type = checked_cast<const I&>(*batch[0].type());
const auto in_scale = in_type.scale();
applicator::ScalarUnaryNotNullStateful<O, I, DecimalToReal> kernel(
DecimalToReal{in_scale});
return kernel.Exec(ctx, batch, out);
}
};
// ----------------------------------------------------------------------
// Top-level kernel instantiation
namespace {
template <typename OutType>
void AddCommonNumberCasts(const std::shared_ptr<DataType>& out_ty, CastFunction* func) {
AddCommonCasts(out_ty->id(), out_ty, func);
// Cast from boolean to number
DCHECK_OK(func->AddKernel(Type::BOOL, {boolean()}, out_ty,
CastFunctor<OutType, BooleanType>::Exec));
// Cast from other strings
for (const std::shared_ptr<DataType>& in_ty : BaseBinaryTypes()) {
auto exec = GenerateVarBinaryBase<CastFunctor, OutType>(*in_ty);
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, exec));
}
}
template <typename OutType>
std::shared_ptr<CastFunction> GetCastToInteger(std::string name) {
auto func = std::make_shared<CastFunction>(std::move(name), OutType::type_id);
auto out_ty = TypeTraits<OutType>::type_singleton();
for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastIntegerToInteger));
}
// Cast from floating point
for (const std::shared_ptr<DataType>& in_ty : FloatingPointTypes()) {
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToInteger));
}
// Cast from half-float
DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty,
CastFloatingToInteger));
// From other numbers to integer
AddCommonNumberCasts<OutType>(out_ty, func.get());
// From decimal to integer
DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty,
CastFunctor<OutType, Decimal128Type>::Exec));
DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty,
CastFunctor<OutType, Decimal256Type>::Exec));
return func;
}
template <typename OutType>
std::shared_ptr<CastFunction> GetCastToFloating(std::string name) {
auto func = std::make_shared<CastFunction>(std::move(name), OutType::type_id);
auto out_ty = TypeTraits<OutType>::type_singleton();
// Casts from integer to floating point
for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastIntegerToFloating));
}
// Cast from floating point
for (const std::shared_ptr<DataType>& in_ty : FloatingPointTypes()) {
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, CastFloatingToFloating));
}
// From half-float to float/double
DCHECK_OK(func->AddKernel(Type::HALF_FLOAT, {InputType(Type::HALF_FLOAT)}, out_ty,
CastFloatingToFloating));
// From other numbers to floating point
AddCommonNumberCasts<OutType>(out_ty, func.get());
// From decimal to floating point
DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty,
CastFunctor<OutType, Decimal128Type>::Exec));
DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty,
CastFunctor<OutType, Decimal256Type>::Exec));
return func;
}
std::shared_ptr<CastFunction> GetCastToDecimal128() {
OutputType sig_out_ty(ResolveOutputFromOptions);
auto func = std::make_shared<CastFunction>("cast_decimal", Type::DECIMAL128);
AddCommonCasts(Type::DECIMAL128, sig_out_ty, func.get());
// Cast from floating point
DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
CastFunctor<Decimal128Type, FloatType>::Exec));
DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
CastFunctor<Decimal128Type, DoubleType>::Exec));
// Cast from integer
for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
auto exec = GenerateInteger<CastFunctor, Decimal128Type>(in_ty->id());
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
}
// Cast from other strings
for (const std::shared_ptr<DataType>& in_ty : BaseBinaryTypes()) {
auto exec = GenerateVarBinaryBase<CastFunctor, Decimal128Type>(in_ty->id());
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
}
// Cast from other decimal
auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
// We resolve the output type of this kernel from the CastOptions
DCHECK_OK(
func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec));
exec = CastFunctor<Decimal128Type, Decimal256Type>::Exec;
DCHECK_OK(
func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec));
return func;
}
std::shared_ptr<CastFunction> GetCastToDecimal256() {
OutputType sig_out_ty(ResolveOutputFromOptions);
auto func = std::make_shared<CastFunction>("cast_decimal256", Type::DECIMAL256);
AddCommonCasts(Type::DECIMAL256, sig_out_ty, func.get());
// Cast from floating point
DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
CastFunctor<Decimal256Type, FloatType>::Exec));
DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
CastFunctor<Decimal256Type, DoubleType>::Exec));
// Cast from integer
for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
auto exec = GenerateInteger<CastFunctor, Decimal256Type>(in_ty->id());
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
}
// Cast from other strings
for (const std::shared_ptr<DataType>& in_ty : BaseBinaryTypes()) {
auto exec = GenerateVarBinaryBase<CastFunctor, Decimal256Type>(in_ty->id());
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
}
// Cast from other decimal
auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
DCHECK_OK(
func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec));
exec = CastFunctor<Decimal256Type, Decimal256Type>::Exec;
DCHECK_OK(
func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec));
return func;
}
std::shared_ptr<CastFunction> GetCastToHalfFloat() {
// HalfFloat is a bit brain-damaged for now
auto func = std::make_shared<CastFunction>("func", Type::HALF_FLOAT);
AddCommonCasts(Type::HALF_FLOAT, float16(), func.get());
// Casts from integer to floating point
for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty},
TypeTraits<HalfFloatType>::type_singleton(),
CastIntegerToFloating));
}
// Cast from other strings to half float.
for (const std::shared_ptr<DataType>& in_ty : BaseBinaryTypes()) {
auto exec = GenerateVarBinaryBase<CastFunctor, HalfFloatType>(*in_ty);
DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty},
TypeTraits<HalfFloatType>::type_singleton(), exec));
}
DCHECK_OK(func.get()->AddKernel(Type::FLOAT, {InputType(Type::FLOAT)}, float16(),
CastFloatingToFloating));
DCHECK_OK(func.get()->AddKernel(Type::DOUBLE, {InputType(Type::DOUBLE)}, float16(),
CastFloatingToFloating));
return func;
}
} // namespace
std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
std::vector<std::shared_ptr<CastFunction>> functions;
// Make a cast to null that does not do much. Not sure why we need to be able
// to cast from dict<null> -> null but there are unit tests for it
auto cast_null = std::make_shared<CastFunction>("cast_null", Type::NA);
DCHECK_OK(cast_null->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, null(),
OutputAllNull));
functions.push_back(cast_null);
functions.push_back(GetCastToInteger<Int8Type>("cast_int8"));
functions.push_back(GetCastToInteger<Int16Type>("cast_int16"));
auto cast_int32 = GetCastToInteger<Int32Type>("cast_int32");
// Convert DATE32 or TIME32 to INT32 zero copy
AddZeroCopyCast(Type::DATE32, date32(), int32(), cast_int32.get());
AddZeroCopyCast(Type::TIME32, InputType(Type::TIME32), int32(), cast_int32.get());
functions.push_back(cast_int32);
auto cast_int64 = GetCastToInteger<Int64Type>("cast_int64");
// Convert DATE64, DURATION, TIMESTAMP, TIME64 to INT64 zero copy
AddZeroCopyCast(Type::DATE64, InputType(Type::DATE64), int64(), cast_int64.get());
AddZeroCopyCast(Type::DURATION, InputType(Type::DURATION), int64(), cast_int64.get());
AddZeroCopyCast(Type::TIMESTAMP, InputType(Type::TIMESTAMP), int64(), cast_int64.get());
AddZeroCopyCast(Type::TIME64, InputType(Type::TIME64), int64(), cast_int64.get());
functions.push_back(cast_int64);
functions.push_back(GetCastToInteger<UInt8Type>("cast_uint8"));
functions.push_back(GetCastToInteger<UInt16Type>("cast_uint16"));
functions.push_back(GetCastToInteger<UInt32Type>("cast_uint32"));
functions.push_back(GetCastToInteger<UInt64Type>("cast_uint64"));
// HalfFloat is a bit brain-damaged for now
auto cast_half_float = GetCastToHalfFloat();
functions.push_back(cast_half_float);
auto cast_float = GetCastToFloating<FloatType>("cast_float");
functions.push_back(cast_float);
auto cast_double = GetCastToFloating<DoubleType>("cast_double");
functions.push_back(cast_double);
functions.push_back(GetCastToDecimal128());
functions.push_back(GetCastToDecimal256());
return functions;
}
} // namespace internal
} // namespace compute
} // namespace arrow