From 76d2319b703630183370b903c2796fa253f08df2 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 3 Aug 2022 16:05:51 -0400 Subject: [PATCH 01/12] first pass of decimal256 --- go/arrow/array/array.go | 2 +- go/arrow/array/array_test.go | 2 +- go/arrow/array/builder.go | 3 + go/arrow/array/decimal256.go | 327 ++++++++++++++ go/arrow/array/dictionary.go | 27 +- go/arrow/datatype_fixedwidth.go | 20 + go/arrow/datatype_fixedwidth_test.go | 27 ++ go/arrow/decimal128/decimal128.go | 14 +- go/arrow/decimal256/decimal256.go | 569 +++++++++++++++++++++++++ go/arrow/decimal256/decimal256_test.go | 225 ++++++++++ go/arrow/internal/arrjson/arrjson.go | 30 ++ go/arrow/scalar/scalar.go | 51 ++- go/arrow/type_traits_decimal256.go | 70 +++ 13 files changed, 1355 insertions(+), 12 deletions(-) create mode 100644 go/arrow/array/decimal256.go create mode 100644 go/arrow/decimal256/decimal256.go create mode 100644 go/arrow/decimal256/decimal256_test.go create mode 100644 go/arrow/type_traits_decimal256.go diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index 07fa343fb69be..3a7231fb24342 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -166,7 +166,7 @@ func init() { arrow.INTERVAL_MONTHS: func(data arrow.ArrayData) arrow.Array { return NewMonthIntervalData(data) }, arrow.INTERVAL_DAY_TIME: func(data arrow.ArrayData) arrow.Array { return NewDayTimeIntervalData(data) }, arrow.DECIMAL128: func(data arrow.ArrayData) arrow.Array { return NewDecimal128Data(data) }, - arrow.DECIMAL256: unsupportedArrayType, + arrow.DECIMAL256: func(data arrow.ArrayData) arrow.Array { return NewDecimal256Data(data) }, arrow.LIST: func(data arrow.ArrayData) arrow.Array { return NewListData(data) }, arrow.STRUCT: func(data arrow.ArrayData) arrow.Array { return NewStructData(data) }, arrow.SPARSE_UNION: unsupportedArrayType, diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go index bc01bc8a89e3c..445a3ea21aedd 100644 --- a/go/arrow/array/array_test.go +++ b/go/arrow/array/array_test.go @@ -74,6 +74,7 @@ func TestMakeFromData(t *testing.T) { {name: "month_interval", d: arrow.FixedWidthTypes.MonthInterval}, {name: "day_time_interval", d: arrow.FixedWidthTypes.DayTimeInterval}, {name: "decimal128", d: &testDataType{arrow.DECIMAL128}}, + {name: "decimal256", d: &testDataType{arrow.DECIMAL256}}, {name: "month_day_nano_interval", d: arrow.FixedWidthTypes.MonthDayNanoInterval}, {name: "list", d: &testDataType{arrow.LIST}, child: []arrow.ArrayData{ @@ -122,7 +123,6 @@ func TestMakeFromData(t *testing.T) { // unsupported types {name: "sparse union", d: &testDataType{arrow.SPARSE_UNION}, expPanic: true, expError: "unsupported data type: SPARSE_UNION"}, {name: "dense union", d: &testDataType{arrow.DENSE_UNION}, expPanic: true, expError: "unsupported data type: DENSE_UNION"}, - {name: "decimal256", d: &testDataType{arrow.DECIMAL256}, expPanic: true, expError: "unsupported data type: DECIMAL256"}, // invalid types {name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"}, diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 321e8a95124f3..69dde22b0276e 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -291,6 +291,9 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { return NewDecimal128Builder(mem, typ) } case arrow.DECIMAL256: + if typ, ok := dtype.(*arrow.Decimal256Type); ok { + return NewDecimal256Builder(mem, typ) + } case arrow.LIST: typ := dtype.(*arrow.ListType) return NewListBuilder(mem, typ.Elem()) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go new file mode 100644 index 0000000000000..5d04334c27b3b --- /dev/null +++ b/go/arrow/array/decimal256.go @@ -0,0 +1,327 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array + +import ( + "bytes" + "fmt" + "math" + "math/big" + "reflect" + "strings" + "sync/atomic" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/goccy/go-json" +) + +// Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. +type Decimal256 struct { + array + + values []decimal256.Num +} + +func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { + a := &Decimal256{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] } + +func (a *Decimal256) Values() []decimal256.Num { return a.values } + +func (a *Decimal256) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + fmt.Fprintf(o, " ") + } + switch { + case a.IsNull(i): + o.WriteString("(null)") + default: + fmt.Fprintf(o, "%v", a.Value(i)) + } + } + o.WriteString("]") + return o.String() +} + +func (a *Decimal256) setData(data *Data) { + a.array.setData(data) + vals := data.buffers[1] + if vals != nil { + a.values = arrow.Decimal256Traits.CastFromBytes(vals.Bytes()) + beg := a.array.data.offset + end := beg + a.array.data.length + a.values = a.values[beg:end] + } +} + +func (a *Decimal256) getOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + + typ := a.DataType().(*arrow.Decimal256Type) + f := (&big.Float{}).SetInt(a.Value(i).BigInt()) + f.Quo(f, big.NewFloat(math.Pow10(int(typ.Scale)))) + return f.Text('g', int(typ.Precision)) +} + +func (a *Decimal256) MarshalJSON() ([]byte, error) { + vals := make([]interface{}, a.Len()) + for i := 0; i < a.Len(); i++ { + vals[i] = a.getOneForMarshal(i) + } + return json.Marshal(vals) +} + +func arrayEqualDecimal256(left, right *Decimal256) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if left.Value(i) != right.Value(i) { + return false + } + } + return true +} + +type Decimal256Builder struct { + builder + + dtype *arrow.Decimal256Type + data *memory.Buffer + rawData []decimal256.Num +} + +func NewDecimal256Builder(mem memory.Allocator, dtype *arrow.Decimal256Type) *Decimal256Builder { + return &Decimal256Builder{ + builder: builder{refCount: 1, mem: mem}, + dtype: dtype, + } +} + +// Release decreases the reference count by 1. +// When the reference count goes to zero, the memory is freed. +func (b *Decimal256Builder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + if b.nullBitmap != nil { + b.nullBitmap.Release() + b.nullBitmap = nil + } + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + } +} + +func (b *Decimal256Builder) Append(v decimal256.Num) { + b.Reserve(1) + b.UnsafeAppend(v) +} + +func (b *Decimal256Builder) UnsafeAppend(v decimal256.Num) { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + b.rawData[b.length] = v + b.length++ +} + +func (b *Decimal256Builder) AppendNull() { + b.Reserve(1) + b.UnsafeAppendBoolToBitmap(false) +} + +func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { + if isValid { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + } else { + b.nulls++ + } + b.length++ +} + +// AppendValues will append the values in the v slice. The valid slice determines which values +// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, +// all values in v are appended and considered valid. +func (b *Decimal256Builder) AppendValues(v []decimal256.Num, valid []bool) { + if len(v) != len(valid) && len(valid) != 0 { + panic("len(v) != len(valid) && len(valid) != 0") + } + + if len(v) == 0 { + return + } + + b.Reserve(len(v)) + if len(v) > 0 { + arrow.Decimal256Traits.Copy(b.rawData[b.length:], v) + } + b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) +} + +func (b *Decimal256Builder) init(capacity int) { + b.builder.init(capacity) + + b.data = memory.NewResizableBuffer(b.mem) + bytesN := arrow.Decimal256Traits.BytesRequired(capacity) + b.data.Resize(bytesN) + b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) +} + +// Reserve ensures there is enough space for appending n elements +// by checking the capacity and calling Resize if necessary. +func (b *Decimal256Builder) Reserve(n int) { + b.builder.reserve(n, b.Resize) +} + +// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), +// additional memory will be allocated. If n is smaller, the allocated memory may reduced. +func (b *Decimal256Builder) Resize(n int) { + nBuilder := n + if n < minBuilderCapacity { + n = minBuilderCapacity + } + + if b.capacity == 0 { + b.init(n) + } else { + b.builder.resize(nBuilder, b.init) + b.data.Resize(arrow.Decimal256Traits.BytesRequired(n)) + b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) + } +} + +// NewArray creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder +// so it can be used to build a new array. +func (b *Decimal256Builder) NewArray() arrow.Array { + return b.NewDecimal256Array() +} + +// NewDecimal256Array creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder +// so it can be used to build a new array. +func (b *Decimal256Builder) NewDecimal256Array() (a *Decimal256) { + data := b.newData() + a = NewDecimal256Data(data) + data.Release() + return +} + +func (b *Decimal256Builder) newData() (data *Data) { + bytesRequired := arrow.Decimal256Traits.BytesRequired(b.length) + if bytesRequired > 0 && bytesRequired < b.data.Len() { + // trim buffers + b.data.Resize(bytesRequired) + } + data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) + b.reset() + + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + + return +} + +func (b *Decimal256Builder) unmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + var out *big.Float + + switch v := t.(type) { + case float64: + out = big.NewFloat(v) + case string: + // there's no strong rationale for using ToNearestAway, it's just + // what got me the closest equivalent values with the values + // that I tested with, and there isn't a good way to push + // an option all the way down here to control it. + out, _, err = big.ParseFloat(v, 10, 128, big.ToNearestAway) + if err != nil { + return err + } + case json.Number: + out, _, err = big.ParseFloat(v.String(), 10, 128, big.ToNearestAway) + if err != nil { + return err + } + case nil: + b.AppendNull() + return nil + default: + return &json.UnmarshalTypeError{ + Value: fmt.Sprint(t), + Type: reflect.TypeOf(decimal256.Num{}), + Offset: dec.InputOffset(), + } + } + + val, _ := out.Mul(out, big.NewFloat(math.Pow10(int(b.dtype.Scale)))).Int(nil) + b.Append(decimal256.FromBigInt(val)) + return nil +} + +func (b *Decimal256Builder) unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.unmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +// UnmarshalJSON will add the unmarshalled values to this builder. +// +// If the values are strings, they will get parsed with big.ParseFloat using +// a rounding mode of big.ToNearestAway currently. +func (b *Decimal256Builder) UnmarshalJSON(data []byte) error { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("decimal256 builder must unpack from json array, found %s", delim) + } + + return b.unmarshal(dec) +} + +var ( + _ arrow.Array = (*Decimal256)(nil) + _ Builder = (*Decimal256Builder)(nil) +) diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index c2c7717b6feed..8149dacb30664 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -382,7 +382,7 @@ func createMemoTable(mem memory.Allocator, dt arrow.DataType) (ret hashing.MemoT ret = hashing.NewFloat32MemoTable(0) case arrow.FLOAT64: ret = hashing.NewFloat64MemoTable(0) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: + case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.DECIMAL256, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)) case arrow.STRING: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.String)) @@ -620,6 +620,13 @@ func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType } return ret case arrow.DECIMAL256: + ret := &Decimal256DictionaryBuilder{bldr} + if init != nil { + if err = ret.InsertDictValues(init.(*Decimal256)); err != nil { + panic(err) + } + } + return ret case arrow.LIST: case arrow.STRUCT: case arrow.SPARSE_UNION: @@ -1230,6 +1237,24 @@ func (b *Decimal128DictionaryBuilder) InsertDictValues(arr *Decimal128) (err err return } +type Decimal256DictionaryBuilder struct { + dictionaryBuilder +} + +func (b *Decimal256DictionaryBuilder) Append(v decimal128.Num) error { + return b.appendValue((*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&v)))[:]) +} +func (b *Decimal256DictionaryBuilder) InsertDictValues(arr *Decimal256) (err error) { + data := arrow.Decimal256Traits.CastToBytes(arr.values) + for len(data) > 0 { + if err = b.insertDictValue(data[:arrow.Decimal256SizeBytes]); err != nil { + break + } + data = data[arrow.Decimal256SizeBytes:] + } + return +} + type MonthDayNanoDictionaryBuilder struct { dictionaryBuilder } diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index bf64299a6f20e..255aea45481eb 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -529,6 +529,26 @@ func (Decimal128Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} } +// Decimal128Type represents a fixed-size 128-bit decimal type. +type Decimal256Type struct { + Precision int32 + Scale int32 +} + +func (*Decimal256Type) ID() Type { return DECIMAL256 } +func (*Decimal256Type) Name() string { return "decimal256" } +func (*Decimal256Type) BitWidth() int { return 256 } +func (t *Decimal256Type) String() string { + return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) +} +func (t *Decimal256Type) Fingerprint() string { + return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) +} + +func (Decimal256Type) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} +} + // MonthInterval represents a number of months. type MonthInterval int32 diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index 65cdb7e75ca60..dcd44540ee404 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -69,6 +69,33 @@ func TestDecimal128Type(t *testing.T) { } } +func TestDecimal256Type(t *testing.T) { + for _, tc := range []struct { + precision int32 + scale int32 + want string + }{ + {1, 10, "decimal256(1, 10)"}, + {10, 10, "decimal256(10, 10)"}, + {10, 1, "decimal256(10, 1)"}, + } { + t.Run(tc.want, func(t *testing.T) { + dt := arrow.Decimal256Type{Precision: tc.precision, Scale: tc.scale} + if got, want := dt.BitWidth(), 256; got != want { + t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) + } + + if got, want := dt.ID(), arrow.DECIMAL256; got != want { + t.Fatalf("invalid type ID: got=%v, want=%v", got, want) + } + + if got, want := dt.String(), tc.want; got != want { + t.Fatalf("invalid stringer: got=%q, want=%q", got, want) + } + }) + } +} + func TestFixedSizeBinaryType(t *testing.T) { for _, tc := range []struct { byteWidth int diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go index 308e95dc38b90..218b4c923175d 100644 --- a/go/arrow/decimal128/decimal128.go +++ b/go/arrow/decimal128/decimal128.go @@ -174,11 +174,10 @@ func (n Num) tofloat32Positive(scale int32) float32 { x := float32(n.hi) * twoTo64 x += float32(n.lo) if scale >= -38 && scale <= 38 { - x *= float32PowersOfTen[-scale+38] - } else { - x *= float32(math.Pow10(-int(scale))) + return x * float32PowersOfTen[-scale+38] } - return x + + return x * float32(math.Pow10(-int(scale))) } // ToFloat32 returns a float32 value representative of this decimal128.Num, @@ -195,11 +194,10 @@ func (n Num) tofloat64Positive(scale int32) float64 { x := float64(n.hi) * twoTo64 x += float64(n.lo) if scale >= -38 && scale <= 38 { - x *= float64PowersOfTen[-scale+38] - } else { - x *= math.Pow10(-int(scale)) + return x * float64PowersOfTen[-scale+38] } - return x + + return x * math.Pow10(-int(scale)) } // ToFloat64 returns a float64 value representative of this decimal128.Num, diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go new file mode 100644 index 0000000000000..6b5bc3434dfd6 --- /dev/null +++ b/go/arrow/decimal256/decimal256.go @@ -0,0 +1,569 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package decimal256 + +import ( + "errors" + "fmt" + "math" + "math/big" + + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/internal/debug" +) + +const ( + MaxPrecision = 76 + MaxScale = 76 +) + +type Num struct { + // arr[0] is the lowest bits, arr[3] is the highest bits + arr [4]uint64 +} + +// New returns a new signed 256-bit integer value where x1 contains +// the highest bits with the rest of the values in order down to the +// lowest bits +// +// ie: New(1, 2, 3, 4) returns with the elements in little-endian order +// {4, 3, 2, 1} but each value is still represented as the native endianness +func New(x1, x2, x3, x4 uint64) Num { + return Num{[4]uint64{x4, x3, x2, x1}} +} + +func (n Num) Array() [4]uint64 { return n.arr } + +func (n Num) LowBits() uint64 { return n.arr[0] } + +func FromDecimal128(n decimal128.Num) Num { + var topBits uint64 + if n.Sign() < 0 { + topBits = math.MaxUint64 + } + return New(topBits, topBits, uint64(n.HighBits()), n.LowBits()) +} + +func FromU64(v uint64) Num { + return Num{[4]uint64{v, 0, 0, 0}} +} + +func FromI64(v int64) Num { + switch { + case v > 0: + return New(0, 0, 0, uint64(v)) + case v < 0: + return New(math.MaxUint64, math.MaxUint64, math.MaxUint64, uint64(v)) + default: + return Num{} + } +} + +func (n Num) Negate() Num { + var carry uint64 = 1 + for i := range n.arr { + n.arr[i] = ^n.arr[i] + carry + if n.arr[i] != 0 { + carry = 0 + } + } + return n +} + +func FromFloat32(v float32, prec, scale int32) (Num, error) { + debug.Assert(prec > 0 && prec <= 76, "invalid precision for converting to decimal256") + + if math.IsInf(float64(v), 0) { + return Num{}, fmt.Errorf("cannot convert %f to decimal256", v) + } + + if v < 0 { + dec, err := fromPositiveFloat32(-v, prec, scale) + if err != nil { + return dec, err + } + return dec.Negate(), nil + } + return fromPositiveFloat32(v, prec, scale) +} + +func fromPositiveFloat32(v float32, prec, scale int32) (Num, error) { + var pscale float32 + if scale >= -76 && scale <= 76 { + pscale = float32PowersOfTen[scale+76] + } else { + pscale = float32(math.Pow10(int(scale))) + } + + v *= pscale + v = float32(math.RoundToEven(float64(v))) + maxabs := float32PowersOfTen[prec+76] + if v <= -maxabs || v >= maxabs { + return Num{}, fmt.Errorf("cannot convert %f to decimal256(precision=%d, scale=%d): overflow", + v, prec, scale) + } + + var arr [4]float32 + arr[3] = float32(math.Floor(math.Ldexp(float64(v), -192))) + v -= float32(math.Ldexp(float64(arr[3]), 192)) + arr[2] = float32(math.Floor(math.Ldexp(float64(v), -128))) + v -= float32(math.Ldexp(float64(arr[2]), 128)) + arr[1] = float32(math.Floor(math.Ldexp(float64(v), -64))) + v -= float32(math.Ldexp(float64(arr[1]), 64)) + arr[0] = v + + debug.Assert(arr[3] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[3] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 + debug.Assert(arr[2] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[2] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 + debug.Assert(arr[1] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[1] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 + debug.Assert(arr[0] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[0] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 + return Num{[4]uint64{uint64(arr[0]), uint64(arr[1]), uint64(arr[2]), uint64(arr[3])}}, nil +} + +func FromFloat64(v float64, prec, scale int32) (Num, error) { + debug.Assert(prec > 0 && prec <= 76, "invalid precision for converting to decimal256") + + if math.IsInf(v, 0) { + return Num{}, fmt.Errorf("cannot convert %f to decimal256", v) + } + + if v < 0 { + dec, err := fromPositiveFloat64(-v, prec, scale) + if err != nil { + return dec, err + } + return dec.Negate(), nil + } + return fromPositiveFloat64(v, prec, scale) +} + +func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { + var pscale float64 + if scale >= -76 && scale <= 76 { + pscale = float64PowersOfTen[scale+76] + } else { + pscale = math.Pow10(int(scale)) + } + + v *= pscale + v = math.RoundToEven(v) + maxabs := float64PowersOfTen[prec+76] + if v <= -maxabs || v >= maxabs { + return Num{}, fmt.Errorf("cannot convert %f to decimal256(precision=%d, scale=%d): overflow", + v, prec, scale) + } + + var arr [4]float64 + arr[3] = math.Floor(math.Ldexp(v, -192)) + v -= math.Ldexp(arr[3], 192) + arr[2] = math.Floor(math.Ldexp(v, -128)) + v -= math.Ldexp(arr[2], 128) + arr[1] = math.Floor(math.Ldexp(v, -64)) + v -= math.Ldexp(arr[1], 64) + arr[0] = v + + debug.Assert(arr[3] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[3] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[2] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[2] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[1] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[1] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[0] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[0] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + return Num{[4]uint64{uint64(arr[0]), uint64(arr[1]), uint64(arr[2]), uint64(arr[3])}}, nil +} + +func (n Num) tofloat32Positive(scale int32) float32 { + const twoTo64 float32 = 1.8446744e+19 + if n.arr[3] != 0 || n.arr[2] != 0 { + return floatInf + } + x := float32(n.arr[1]) * twoTo64 + x += float32(n.arr[0]) + if scale >= -76 && scale <= 76 { + return x * float32PowersOfTen[-scale+76] + } + + return x * float32(math.Pow10(-int(scale))) +} + +func (n Num) tofloat64Positive(scale int32) float64 { + const ( + twoTo64 float64 = 1.8446744073709552e+19 + twoTo128 float64 = 3.402823669209385e+38 + twoTo192 float64 = 6.277101735386681e+57 + ) + + x := float64(n.arr[3]) * twoTo192 + x += float64(n.arr[2]) * twoTo128 + x += float64(n.arr[1]) * twoTo64 + x += float64(n.arr[0]) + + if scale >= -76 && scale <= 76 { + return x * float64PowersOfTen[-scale+76] + } + + return x * math.Pow10(-int(scale)) +} + +func (n Num) ToFloat32(scale int32) float32 { + if n.Sign() < 0 { + return -n.Negate().tofloat32Positive(scale) + } + return n.tofloat32Positive(scale) +} + +func (n Num) ToFloat64(scale int32) float64 { + if n.Sign() < 0 { + return -n.Negate().tofloat64Positive(scale) + } + return n.tofloat64Positive(scale) +} + +func (n Num) Sign() int { + if n == (Num{}) { + return 0 + } + return int(1 | (int64(n.arr[3]) >> 63)) +} + +func FromBigInt(v *big.Int) (n Num) { + bitlen := v.BitLen() + if bitlen > 256 { + panic("arrow/decimal256: cannot represent value larger than 256bits") + } else if bitlen == 0 { + return + } + + b := v.Bits() + for i, bits := range b { + n.arr[i] = uint64(bits) + } + if v.Sign() < 0 { + return n.Negate() + } + return +} + +func toBigIntPositive(n Num) *big.Int { + return new(big.Int).SetBits([]big.Word{big.Word(n.arr[0]), big.Word(n.arr[1]), big.Word(n.arr[2]), big.Word(n.arr[3])}) +} + +func (n Num) BigInt() *big.Int { + if n.Sign() < 0 { + b := toBigIntPositive(n.Negate()) + return b.Neg(b) + } + return toBigIntPositive(n) +} + +func (n Num) Less(other Num) bool { + switch { + case n.arr[3] != other.arr[3]: + return n.arr[3] < other.arr[3] + case n.arr[2] != other.arr[2]: + return n.arr[2] < other.arr[2] + case n.arr[1] != other.arr[1]: + return n.arr[1] < other.arr[1] + } + return n.arr[0] < other.arr[0] +} + +func (n Num) IncreaseScaleBy(increase int32) Num { + debug.Assert(increase >= 0, "invalid amount to increase scale by") + debug.Assert(increase <= 76, "invalid amount to increase scale by") + + v := scaleMultipliers[increase].BigInt() + return FromBigInt(v.Mul(n.BigInt(), v)) +} + +func (n Num) ReduceScaleBy(reduce int32, round bool) Num { + debug.Assert(reduce >= 0, "invalid amount to reduce scale by") + debug.Assert(reduce <= 76, "invalid amount to reduce scale by") + + if reduce == 0 { + return n + } + + divisor := scaleMultipliers[reduce].BigInt() + result, remainder := divisor.QuoRem(n.BigInt(), divisor, new(big.Int)) + if round { + divisorHalf := scaleMultipliersHalf[reduce] + if remainder.Abs(remainder).Cmp(divisorHalf.BigInt()) != -1 { + result.Add(result, big.NewInt(int64(n.Sign()))) + } + } + return FromBigInt(result) +} + +func (n Num) rescaleWouldCauseDataLoss(deltaScale int32, multiplier Num) (out Num, loss bool) { + var ( + value, result, remainder *big.Int + ) + value = n.BigInt() + if deltaScale < 0 { + result, remainder = new(big.Int).QuoRem(value, multiplier.BigInt(), new(big.Int)) + return FromBigInt(result), remainder.Cmp(big.NewInt(0)) != 0 + } + + result = (&big.Int{}).Mul(value, multiplier.BigInt()) + out = FromBigInt(result) + cmp := result.Cmp(value) + if n.Sign() < 0 { + loss = cmp == 1 + } else { + loss = cmp == -1 + } + return +} + +func (n Num) Rescale(original, newscale int32) (out Num, err error) { + if original == newscale { + return n, nil + } + + deltaScale := newscale - original + absDeltaScale := int32(math.Abs(float64(deltaScale))) + + multiplier := scaleMultipliers[absDeltaScale] + var wouldHaveLoss bool + out, wouldHaveLoss = n.rescaleWouldCauseDataLoss(deltaScale, multiplier) + if wouldHaveLoss { + err = errors.New("rescale data loss") + } + return +} + +func (n Num) Abs() Num { + switch n.Sign() { + case -1: + return n.Negate() + } + return n +} + +func (n Num) FitsInPrecision(prec int32) bool { + debug.Assert(prec > 0, "precision must be > 0") + debug.Assert(prec <= 76, "precision must be <= 76") + return n.Abs().Less(scaleMultipliers[prec]) +} + +var ( + scaleMultipliers = [...]Num{ + FromU64(1), + FromU64(10), + FromU64(100), + FromU64(1000), + FromU64(10000), + FromU64(100000), + FromU64(1000000), + FromU64(10000000), + FromU64(100000000), + FromU64(1000000000), + FromU64(10000000000), + FromU64(100000000000), + FromU64(1000000000000), + FromU64(10000000000000), + FromU64(100000000000000), + FromU64(1000000000000000), + FromU64(10000000000000000), + FromU64(100000000000000000), + FromU64(1000000000000000000), + New(0, 0, 0, 10000000000000000000), + New(0, 0, 5, 7766279631452241920), + New(0, 0, 54, 3875820019684212736), + New(0, 0, 542, 1864712049423024128), + New(0, 0, 5421, 200376420520689664), + New(0, 0, 54210, 2003764205206896640), + New(0, 0, 542101, 1590897978359414784), + New(0, 0, 5421010, 15908979783594147840), + New(0, 0, 54210108, 11515845246265065472), + New(0, 0, 542101086, 4477988020393345024), + New(0, 0, 5421010862, 7886392056514347008), + New(0, 0, 54210108624, 5076944270305263616), + New(0, 0, 542101086242, 13875954555633532928), + New(0, 0, 5421010862427, 9632337040368467968), + New(0, 0, 54210108624275, 4089650035136921600), + New(0, 0, 542101086242752, 4003012203950112768), + New(0, 0, 5421010862427522, 3136633892082024448), + New(0, 0, 54210108624275221, 12919594847110692864), + New(0, 0, 542101086242752217, 68739955140067328), + New(0, 0, 5421010862427522170, 687399551400673280), + New(0, 2, 17316620476856118468, 6873995514006732800), + New(0, 29, 7145508105175220139, 13399722918938673152), + New(0, 293, 16114848830623546549, 4870020673419870208), + New(0, 2938, 13574535716559052564, 11806718586779598848), + New(0, 29387, 6618148649623664334, 7386721425538678784), + New(0, 293873, 10841254275107988496, 80237960548581376), + New(0, 2938735, 16178822382532126880, 802379605485813760), + New(0, 29387358, 14214271235644855872, 8023796054858137600), + New(0, 293873587, 13015503840481697412, 6450984253743169536), + New(0, 2938735877, 1027829888850112811, 9169610316303040512), + New(0, 29387358770, 10278298888501128114, 17909126868192198656), + New(0, 293873587705, 10549268516463523069, 13070572018536022016), + New(0, 2938735877055, 13258964796087472617, 1578511669393358848), + New(0, 29387358770557, 3462439444907864858, 15785116693933588480), + New(0, 293873587705571, 16177650375369096972, 10277214349659471872), + New(0, 2938735877055718, 14202551164014556797, 10538423128046960640), + New(0, 29387358770557187, 12898303124178706663, 13150510911921848320), + New(0, 293873587705571876, 18302566799529756941, 2377900603251621888), + New(0, 2938735877055718769, 17004971331911604867, 5332261958806667264), + New(1, 10940614696847636083, 4029016655730084128, 16429131440647569408), + New(15, 17172426599928602752, 3396678409881738056, 16717361816799281152), + New(159, 5703569335900062977, 15520040025107828953, 1152921504606846976), + New(1593, 1695461137871974930, 7626447661401876602, 11529215046068469760), + New(15930, 16954611378719749304, 2477500319180559562, 4611686018427387904), + New(159309, 3525417123811528497, 6328259118096044006, 9223372036854775808), + New(1593091, 16807427164405733357, 7942358959831785217, 0), + New(15930919, 2053574980671369030, 5636613303479645706, 0), + New(159309191, 2089005733004138687, 1025900813667802212, 0), + New(1593091911, 2443313256331835254, 10259008136678022120, 0), + New(15930919111, 5986388489608800929, 10356360998232463120, 0), + New(159309191113, 4523652674959354447, 11329889613776873120, 0), + New(1593091911132, 8343038602174441244, 2618431695511421504, 0), + New(15930919111324, 9643409726906205977, 7737572881404663424, 0), + New(159309191113245, 4200376900514301694, 3588752519208427776, 0), + New(1593091911132452, 5110280857723913709, 17440781118374726144, 0), + New(15930919111324522, 14209320429820033867, 8387114520361296896, 0), + New(159309191113245227, 12965995782233477362, 10084168908774762496, 0), + New(1593091911132452277, 532749306367912313, 8607968719199866880, 0), + } + + scaleMultipliersHalf = [...]Num{ + FromU64(0), + FromU64(5), + FromU64(50), + FromU64(500), + FromU64(5000), + FromU64(50000), + FromU64(500000), + FromU64(5000000), + FromU64(50000000), + FromU64(500000000), + FromU64(5000000000), + FromU64(50000000000), + FromU64(500000000000), + FromU64(5000000000000), + FromU64(50000000000000), + FromU64(500000000000000), + FromU64(5000000000000000), + FromU64(50000000000000000), + FromU64(500000000000000000), + FromU64(5000000000000000000), + New(0, 0, 2, 13106511852580896768), + New(0, 0, 27, 1937910009842106368), + New(0, 0, 271, 932356024711512064), + New(0, 0, 2710, 9323560247115120640), + New(0, 0, 27105, 1001882102603448320), + New(0, 0, 271050, 10018821026034483200), + New(0, 0, 2710505, 7954489891797073920), + New(0, 0, 27105054, 5757922623132532736), + New(0, 0, 271050543, 2238994010196672512), + New(0, 0, 2710505431, 3943196028257173504), + New(0, 0, 27105054312, 2538472135152631808), + New(0, 0, 271050543121, 6937977277816766464), + New(0, 0, 2710505431213, 14039540557039009792), + New(0, 0, 27105054312137, 11268197054423236608), + New(0, 0, 271050543121376, 2001506101975056384), + New(0, 0, 2710505431213761, 1568316946041012224), + New(0, 0, 27105054312137610, 15683169460410122240), + New(0, 0, 271050543121376108, 9257742014424809472), + New(0, 0, 2710505431213761085, 343699775700336640), + New(0, 1, 8658310238428059234, 3436997757003366400), + New(0, 14, 12796126089442385877, 15923233496324112384), + New(0, 146, 17280796452166549082, 11658382373564710912), + New(0, 1469, 6787267858279526282, 5903359293389799424), + New(0, 14693, 12532446361666607975, 3693360712769339392), + New(0, 146936, 14643999174408770056, 40118980274290688), + New(0, 1469367, 17312783228120839248, 401189802742906880), + New(0, 14693679, 7107135617822427936, 4011898027429068800), + New(0, 146936793, 15731123957095624514, 3225492126871584768), + New(0, 1469367938, 9737286981279832213, 13808177195006296064), + New(0, 14693679385, 5139149444250564057, 8954563434096099328), + New(0, 146936793852, 14498006295086537342, 15758658046122786816), + New(0, 1469367938527, 15852854434898512116, 10012627871551455232), + New(0, 14693679385278, 10954591759308708237, 7892558346966794240), + New(0, 146936793852785, 17312197224539324294, 5138607174829735936), + New(0, 1469367938527859, 7101275582007278398, 14492583600878256128), + New(0, 14693679385278593, 15672523598944129139, 15798627492815699968), + New(0, 146936793852785938, 9151283399764878470, 10412322338480586752), + New(0, 1469367938527859384, 17725857702810578241, 11889503016258109440), + New(0, 14693679385278593849, 11237880364719817872, 8214565720323784704), + New(7, 17809585336819077184, 1698339204940869028, 8358680908399640576), + New(79, 12075156704804807296, 16983392049408690284, 9799832789158199296), + New(796, 10071102605790763273, 3813223830700938301, 5764607523034234880), + New(7965, 8477305689359874652, 1238750159590279781, 2305843009213693952), + New(79654, 10986080598760540056, 12387501595902797811, 4611686018427387904), + New(796545, 17627085619057642486, 13194551516770668416, 9223372036854775808), + New(7965459, 10250159527190460323, 2818306651739822853, 0), + New(79654595, 10267874903356845151, 9736322443688676914, 0), + New(796545955, 10445028665020693435, 5129504068339011060, 0), + New(7965459555, 12216566281659176272, 14401552535971007368, 0), + New(79654595556, 11485198374334453031, 14888316843743212368, 0), + New(796545955566, 4171519301087220622, 1309215847755710752, 0), + New(7965459555662, 4821704863453102988, 13092158477557107520, 0), + New(79654595556622, 11323560487111926655, 1794376259604213888, 0), + New(796545955566226, 2555140428861956854, 17943762596042138880, 0), + New(7965459555662261, 7104660214910016933, 13416929297035424256, 0), + New(79654595556622613, 15706369927971514489, 5042084454387381248, 0), + New(796545955566226138, 9489746690038731964, 13527356396454709248, 0), + } + + floatInf = float32(math.Inf(0)) + float32PowersOfTen = [...]float32{ + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1e-45, 1e-44, 1e-43, 1e-42, + 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, + 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, + 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, + 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, + 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, + 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, + 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, + 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, + 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, + 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, + 1e36, 1e37, 1e38, floatInf, floatInf, floatInf, floatInf, + floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, + floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, + floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, + floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, + floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, + } + float64PowersOfTen = [...]float64{ + 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, + 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, + 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, + 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, + 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, + 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, + 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, + 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43, + 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, + 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, + 1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, + } +) diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go new file mode 100644 index 0000000000000..719fb13e4eb31 --- /dev/null +++ b/go/arrow/decimal256/decimal256_test.go @@ -0,0 +1,225 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package decimal256_test + +import ( + "fmt" + "math" + "math/big" + "testing" + + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/stretchr/testify/assert" +) + +func TestFromU64(t *testing.T) { + for _, tc := range []struct { + v uint64 + want decimal256.Num + sign int + }{ + {0, decimal256.New(0, 0, 0, 0), 0}, + {1, decimal256.New(0, 0, 0, 1), +1}, + {2, decimal256.New(0, 0, 0, 2), +1}, + {math.MaxInt64, decimal256.New(0, 0, 0, math.MaxInt64), +1}, + {math.MaxUint64, decimal256.New(0, 0, 0, math.MaxUint64), +1}, + } { + t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) { + v := decimal256.FromU64(tc.v) + ref := new(big.Int).SetUint64(tc.v) + if got, want := v, tc.want; got != want { + t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref) + } + if got, want := v.Sign(), tc.sign; got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Sign(), ref.Sign(); got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Array(), tc.want.Array(); got != want { + t.Fatalf("invalid array: got=%+0#v, want=%+0#v", got, want) + } + }) + } +} + +func u64Cnv(i int64) uint64 { return uint64(i) } + +func TestFromI64(t *testing.T) { + for _, tc := range []struct { + v int64 + want decimal256.Num + sign int + }{ + {0, decimal256.New(0, 0, 0, 0), 0}, + {1, decimal256.New(0, 0, 0, 1), 1}, + {2, decimal256.New(0, 0, 0, 2), 1}, + {math.MaxInt64, decimal256.New(0, 0, 0, math.MaxInt64), 1}, + {math.MinInt64, decimal256.New(math.MaxUint64, math.MaxUint64, math.MaxUint64, u64Cnv(math.MinInt64)), -1}, + } { + t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) { + v := decimal256.FromI64(tc.v) + ref := big.NewInt(tc.v) + if got, want := v, tc.want; got != want { + t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref) + } + if got, want := v.Sign(), tc.sign; got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Sign(), ref.Sign(); got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Array(), tc.want.Array(); got != want { + t.Fatalf("invalid array: got=%+0#v, want=%+0#v", got, want) + } + }) + } +} + +func TestDecimalToBigInt(t *testing.T) { + tests := []struct { + arr [4]uint64 + exp string + }{ + {[4]uint64{0, 10084168908774762496, 12965995782233477362, 159309191113245227}, "1000000000000000000000000000000000000000000000000000000000000000000000000000"}, + {[4]uint64{0, 8362575164934789120, 5480748291476074253, 18287434882596306388}, "-1000000000000000000000000000000000000000000000000000000000000000000000000000"}, + {[4]uint64{0, 0, 0, 0}, "0"}, + {[4]uint64{17877984925544397504, 5352188884907840935, 234631617561833724, 196678011949953713}, "1234567890123456789012345678901234567890123456789012345678901234567890123456"}, + {[4]uint64{568759148165154112, 13094555188801710680, 18212112456147717891, 18250066061759597902}, "-1234567890123456789012345678901234567890123456789012345678901234567890123456"}, + } + for _, tc := range tests { + t.Run("", func(t *testing.T) { + n := decimal256.New(tc.arr[3], tc.arr[2], tc.arr[1], tc.arr[0]) + bi := n.BigInt() + + assert.Equal(t, tc.exp, bi.String()) + n2 := decimal256.FromBigInt(bi) + assert.Equal(t, n2.Array(), n.Array()) + }) + } +} + +func TestDecimalFromFloat(t *testing.T) { + tests := []struct { + val float64 + precision, scale int32 + expected string + }{ + {0, 1, 0, "0"}, + {math.Copysign(0, -1), 1, 0, "0"}, + {0, 19, 4, "0.0000"}, + {math.Copysign(0, -1), 19, 4, "0.0000"}, + {123.0, 7, 4, "123.0000"}, + {-123, 7, 4, "-123.0000"}, + {456.78, 7, 4, "456.7800"}, + {-456.78, 7, 4, "-456.7800"}, + {456.784, 5, 2, "456.78"}, + {-456.784, 5, 2, "-456.78"}, + {456.786, 5, 2, "456.79"}, + {-456.786, 5, 2, "-456.79"}, + {999.99, 5, 2, "999.99"}, + {-999.99, 5, 2, "-999.99"}, + {123, 19, 0, "123"}, + {-123, 19, 0, "-123"}, + {123.4, 19, 0, "123"}, + {-123.4, 19, 0, "-123"}, + {123.6, 19, 0, "124"}, + {-123.6, 19, 0, "-124"}, + // 2**62 + {4.611686018427387904e+18, 19, 0, "4611686018427387904"}, + {-4.611686018427387904e+18, 19, 0, "-4611686018427387904"}, + // 2**63 + {9.223372036854775808e+18, 19, 0, "9223372036854775808"}, + {-9.223372036854775808e+18, 19, 0, "-9223372036854775808"}, + // 2**64 + {1.8446744073709551616e+19, 20, 0, "18446744073709551616"}, + {-1.8446744073709551616e+19, 20, 0, "-18446744073709551616"}, + {9.999999999999999e+75, 76, 0, "9999999999999998863663300700064420349597509066704028242075715752105414230016"}, + {-9.999999999999999e+75, 76, 0, "-9999999999999998863663300700064420349597509066704028242075715752105414230016"}, + } + + t.Run("float64", func(t *testing.T) { + for _, tt := range tests { + t.Run(tt.expected, func(t *testing.T) { + n, err := decimal256.FromFloat64(tt.val, tt.precision, tt.scale) + assert.NoError(t, err) + + assert.Equal(t, tt.expected, big.NewFloat(n.ToFloat64(tt.scale)).Text('f', int(tt.scale))) + }) + } + + t.Run("large values", func(t *testing.T) { + // test entire float64 range + for scale := int32(-308); scale <= 308; scale++ { + val := math.Pow10(int(scale)) + n, err := decimal256.FromFloat64(val, 1, -scale) + assert.NoError(t, err) + assert.Equal(t, "1", n.BigInt().String()) + } + + for scale := int32(-307); scale <= 306; scale++ { + val := 123 * math.Pow10(int(scale)) + n, err := decimal256.FromFloat64(val, 2, -scale-1) + assert.NoError(t, err) + assert.Equal(t, "12", n.BigInt().String()) + n, err = decimal256.FromFloat64(val, 3, -scale) + assert.NoError(t, err) + assert.Equal(t, "123", n.BigInt().String()) + n, err = decimal256.FromFloat64(val, 4, -scale+1) + assert.NoError(t, err) + assert.Equal(t, "1230", n.BigInt().String()) + } + }) + }) + + t.Run("float32", func(t *testing.T) { + for _, tt := range tests { + if tt.precision > 38 { + continue + } + t.Run(tt.expected, func(t *testing.T) { + n, err := decimal256.FromFloat32(float32(tt.val), tt.precision, tt.scale) + assert.NoError(t, err) + + assert.Equal(t, tt.expected, big.NewFloat(float64(n.ToFloat32(tt.scale))).Text('f', int(tt.scale))) + }) + } + + t.Run("large values", func(t *testing.T) { + // test entire float32 range + for scale := int32(-38); scale <= 38; scale++ { + val := float32(math.Pow10(int(scale))) + n, err := decimal256.FromFloat32(val, 1, -scale) + assert.NoError(t, err) + assert.Equal(t, "1", n.BigInt().String()) + } + + for scale := int32(-37); scale <= 36; scale++ { + val := 123 * float32(math.Pow10(int(scale))) + n, err := decimal256.FromFloat32(val, 2, -scale-1) + assert.NoError(t, err) + assert.Equal(t, "12", n.BigInt().String()) + n, err = decimal256.FromFloat32(val, 3, -scale) + assert.NoError(t, err) + assert.Equal(t, "123", n.BigInt().String()) + n, err = decimal256.FromFloat32(val, 4, -scale+1) + assert.NoError(t, err) + assert.Equal(t, "1230", n.BigInt().String()) + } + }) + }) +} diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index d8cd259e1ddce..4fae446e3f40d 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -31,6 +31,7 @@ import ( "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/bitutil" "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" "github.com/apache/arrow/go/v10/arrow/float16" "github.com/apache/arrow/go/v10/arrow/internal/dictutils" "github.com/apache/arrow/go/v10/arrow/ipc" @@ -1447,6 +1448,14 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Valids: validsToJSON(arr), } + case *array.Decimal256: + return Array{ + Name: field.Name, + Count: arr.Len(), + Data: decimal256ToJSON(arr), + Valids: validsToJSON(arr), + } + case array.ExtensionArray: return arrayToJSON(field, arr.Storage()) @@ -1743,6 +1752,27 @@ func decimal128FromJSON(vs []interface{}) []decimal128.Num { return o } +func decimal256ToJSON(arr *array.Decimal256) []interface{} { + o := make([]interface{}, arr.Len()) + for i := range o { + o[i] = arr.Value(i).BigInt().String() + } + return o +} + +func decimal256FromJSON(vs []interface{}) []decimal256.Num { + var tmp big.Int + o := make([]decimal256.Num, len(vs)) + for i, v := range vs { + if err := tmp.UnmarshalJSON([]byte(v.(string))); err != nil { + panic(fmt.Errorf("could not convert %v (%T) to decimal128: %w", v, v, err)) + } + + o[i] = decimal256.FromBigInt(&tmp) + } + return o +} + func strFromJSON(vs []interface{}) []string { o := make([]string, len(vs)) for i, v := range vs { diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index a96e0593c4045..82d0feb03ce14 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -30,6 +30,7 @@ import ( "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/bitutil" "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" "github.com/apache/arrow/go/v10/arrow/endian" "github.com/apache/arrow/go/v10/arrow/float16" "github.com/apache/arrow/go/v10/arrow/internal/debug" @@ -297,6 +298,8 @@ func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) { switch to.ID() { case arrow.DECIMAL128: return NewDecimal128Scalar(s.Value, to), nil + case arrow.DECIMAL256: + return NewDecimal256Scalar(decimal256.FromDecimal128(s.Value), to), nil case arrow.STRING: dt := s.Type.(*arrow.Decimal128Type) scale := big.NewFloat(math.Pow10(int(dt.Scale))) @@ -311,6 +314,50 @@ func NewDecimal128Scalar(val decimal128.Num, typ arrow.DataType) *Decimal128 { return &Decimal128{scalar{typ, true}, val} } +type Decimal256 struct { + scalar + Value decimal256.Num +} + +func (s *Decimal256) value() interface{} { return s.Value } + +func (s *Decimal256) String() string { + if !s.Valid { + return "null" + } + val, err := s.CastTo(arrow.BinaryTypes.String) + if err != nil { + return "..." + } + return string(val.(*String).Value.Bytes()) +} + +func (s *Decimal256) equals(rhs Scalar) bool { + return s.Value == rhs.(*Decimal256).Value +} + +func (s *Decimal256) CastTo(to arrow.DataType) (Scalar, error) { + if !s.Valid { + return MakeNullScalar(to), nil + } + + switch to.ID() { + case arrow.DECIMAL256: + return NewDecimal256Scalar(s.Value, to), nil + case arrow.STRING: + dt := s.Type.(*arrow.Decimal256Type) + scale := big.NewFloat(math.Pow10(int(dt.Scale))) + val := (&big.Float{}).SetInt(s.Value.BigInt()) + return NewStringScalar(val.Quo(val, scale).Text('g', int(dt.Precision))), nil + } + + return nil, fmt.Errorf("cannot cast non-nil decimal128 scalar to type %s", to) +} + +func NewDecimal256Scalar(val decimal256.Num, typ arrow.DataType) *Decimal256 { + return &Decimal256{scalar{typ, true}, val} +} + type Extension struct { scalar Value Scalar @@ -448,7 +495,7 @@ func init() { arrow.LARGE_STRING: func(dt arrow.DataType) Scalar { return &LargeString{&String{&Binary{scalar: scalar{dt, false}}}} }, arrow.LARGE_BINARY: func(dt arrow.DataType) Scalar { return &LargeBinary{&Binary{scalar: scalar{dt, false}}} }, arrow.LARGE_LIST: func(dt arrow.DataType) Scalar { return &LargeList{&List{scalar: scalar{dt, false}}} }, - arrow.DECIMAL256: unsupportedScalarType, + arrow.DECIMAL256: func(dt arrow.DataType) Scalar { return &Decimal256{scalar: scalar{dt, false}} }, arrow.MAP: func(dt arrow.DataType) Scalar { return &Map{&List{scalar: scalar{dt, false}}} }, arrow.EXTENSION: func(dt arrow.DataType) Scalar { return &Extension{scalar: scalar{dt, false}} }, arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar { return &FixedSizeList{&List{scalar: scalar{dt, false}}} }, @@ -485,6 +532,8 @@ func GetScalar(arr arrow.Array, idx int) (Scalar, error) { return NewDayTimeIntervalScalar(arr.Value(idx)), nil case *array.Decimal128: return NewDecimal128Scalar(arr.Value(idx), arr.DataType()), nil + case *array.Decimal256: + return NewDecimal256Scalar(arr.Value(idx), arr.DataType()), nil case *array.Duration: return NewDurationScalar(arr.Value(idx), arr.DataType()), nil case array.ExtensionArray: diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go new file mode 100644 index 0000000000000..15e69e96799d8 --- /dev/null +++ b/go/arrow/type_traits_decimal256.go @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/endian" +) + +// Decimal256 traits +var Decimal256Traits decimal256Traits + +const ( + Decimal256SizeBytes = int(unsafe.Sizeof(decimal256.Num{})) +) + +type decimal256Traits struct{} + +func (decimal256Traits) BytesRequired(n int) int { return Decimal256SizeBytes * n } + +func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { + for i, a := range v.Array() { + start := i * 8 + endian.Native.PutUint64(b[start:], a) + } +} + +// CastFromBytes reinterprets the slice b to a slice of decimal256 +func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []decimal256.Num + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = h.Len / Decimal256SizeBytes + s.Cap = h.Cap / Decimal256SizeBytes + + return res +} + +func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []byte + s := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + s.Data = h.Data + s.Len = h.Len * Decimal256SizeBytes + s.Cap = h.Cap * Decimal256SizeBytes + + return res +} + +func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } From 6d02ecb0dce9505ebb38742553c0868e64a1aff3 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 3 Aug 2022 17:37:42 -0400 Subject: [PATCH 02/12] ipc, arrjson and so on --- dev/archery/archery/integration/datagen.py | 3 +- docs/source/status.rst | 2 +- go/arrow/array/compare.go | 6 + go/arrow/array/decimal256.go | 4 +- go/arrow/array/decimal256_test.go | 179 +++++++++++++++++ go/arrow/array/decimal_test.go | 222 +++++++++++++++++++++ go/arrow/internal/arrjson/arrjson.go | 20 +- go/arrow/ipc/endian_swap.go | 13 ++ go/arrow/ipc/file_reader.go | 2 +- go/arrow/ipc/metadata.go | 18 +- 10 files changed, 460 insertions(+), 9 deletions(-) create mode 100644 go/arrow/array/decimal256_test.go create mode 100644 go/arrow/array/decimal_test.go diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 9069b04a4ecec..a0ec9d95840db 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1599,8 +1599,7 @@ def _temp_path(): generate_decimal128_case(), - generate_decimal256_case() - .skip_category('Go') # TODO(ARROW-7948): Decimal + Go + generate_decimal256_case() .skip_category('JS'), generate_datetime_case(), diff --git a/docs/source/status.rst b/docs/source/status.rst index a1cac8af09791..e1d92200207c3 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -46,7 +46,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Decimal128 | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Decimal256 | ✓ | ✓ | | | ✓ | | ✓ | +| Decimal256 | ✓ | ✓ | ✓ | | ✓ | | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Date32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index 19c06de0b2846..d52aa6377438c 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -276,6 +276,9 @@ func Equal(left, right arrow.Array) bool { case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal128(l, r) + case *Decimal256: + r := right.(*Decimal256) + return arrayEqualDecimal256(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) @@ -520,6 +523,9 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal128(l, r) + case *Decimal256: + r := right.(*Decimal256) + return arrayEqualDecimal256(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index 5d04334c27b3b..bdbaca3af249b 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -269,12 +269,12 @@ func (b *Decimal256Builder) unmarshalOne(dec *json.Decoder) error { // what got me the closest equivalent values with the values // that I tested with, and there isn't a good way to push // an option all the way down here to control it. - out, _, err = big.ParseFloat(v, 10, 128, big.ToNearestAway) + out, _, err = big.ParseFloat(v, 10, 256, big.ToNearestAway) if err != nil { return err } case json.Number: - out, _, err = big.ParseFloat(v.String(), 10, 128, big.ToNearestAway) + out, _, err = big.ParseFloat(v.String(), 10, 256, big.ToNearestAway) if err != nil { return err } diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go new file mode 100644 index 0000000000000..1206080f83798 --- /dev/null +++ b/go/arrow/array/decimal256_test.go @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestNewDecimal256Builder(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) + defer ab.Release() + + ab.Retain() + ab.Release() + + want := []decimal256.Num{ + decimal256.New(1, 1, 1, 1), + decimal256.New(2, 2, 2, 2), + decimal256.New(3, 3, 3, 3), + {}, + decimal256.FromI64(-5), + decimal256.FromI64(-6), + {}, + decimal256.FromI64(8), + decimal256.FromI64(9), + decimal256.FromI64(10), + } + valids := []bool{true, true, true, false, true, true, false, true, true, true} + + for i, valid := range valids { + switch { + case valid: + ab.Append(want[i]) + default: + ab.AppendNull() + } + } + + // check state of builder before NewDecimal256Array + assert.Equal(t, 10, ab.Len(), "unexpected Len()") + assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") + + a := ab.NewArray().(*array.Decimal256) + a.Retain() + a.Release() + + // check state of builder after NewDecimal256Array + assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal256Array did not reset state") + assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal256Array did not reset state") + assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal256Array did not reset state") + + // check state of array + assert.Equal(t, 2, a.NullN(), "unexpected null count") + + assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") + assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") + + a.Release() + ab.Append(decimal256.FromI64(7)) + ab.Append(decimal256.FromI64(8)) + + a = ab.NewDecimal256Array() + + assert.Equal(t, 0, a.NullN()) + assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) + assert.Len(t, a.Values(), 2) + + a.Release() +} + +func TestDecimal256Builder_Empty(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) + defer ab.Release() + + want := []decimal256.Num{decimal256.FromI64(3), decimal256.FromI64(4)} + + ab.AppendValues([]decimal256.Num{}, nil) + a := ab.NewDecimal256Array() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(nil, nil) + a = ab.NewDecimal256Array() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(want, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() + + ab.AppendValues([]decimal256.Num{}, nil) + ab.AppendValues(want, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() + + ab.AppendValues(want, nil) + ab.AppendValues([]decimal256.Num{}, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() +} + +func TestDecimal256Slice(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dtype := &arrow.Decimal256Type{Precision: 10, Scale: 1} + b := array.NewDecimal256Builder(mem, dtype) + defer b.Release() + + var data = []decimal256.Num{ + decimal256.FromI64(-1), + decimal256.FromI64(+0), + decimal256.FromI64(+1), + decimal256.New(4, 4, 4, 4), + } + b.AppendValues(data[:2], nil) + b.AppendNull() + b.Append(data[3]) + + arr := b.NewDecimal256Array() + defer arr.Release() + + if got, want := arr.Len(), len(data); got != want { + t.Fatalf("invalid array length: got=%d, want=%d", got, want) + } + + slice := array.NewSliceData(arr.Data(), 2, 4) + defer slice.Release() + + sub1 := array.MakeFromData(slice) + defer sub1.Release() + + v, ok := sub1.(*array.Decimal256) + if !ok { + t.Fatalf("could not type-assert to array.String") + } + + if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if got, want := v.NullN(), 1; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if got, want := v.Data().Offset(), 2; got != want { + t.Fatalf("invalid offset: got=%d, want=%d", got, want) + } +} diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go new file mode 100644 index 0000000000000..65980d25f333a --- /dev/null +++ b/go/arrow/array/decimal_test.go @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "fmt" + "math/big" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/suite" +) + +type decimalValue interface{} + +func bitmapFromSlice(vals []bool) []byte { + out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))))) + writer := bitutil.NewBitmapWriter(out, 0, len(vals)) + for _, val := range vals { + if val { + writer.Set() + } else { + writer.Clear() + } + writer.Next() + } + writer.Finish() + return out +} + +type DecimalTestSuite struct { + suite.Suite + + dt arrow.DataType + mem *memory.CheckedAllocator +} + +func (d *DecimalTestSuite) SetupTest() { + d.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) +} + +func (d *DecimalTestSuite) TearDownTest() { + d.mem.AssertSize(d.T(), 0) +} + +func (d *DecimalTestSuite) makeData(input []decimalValue, out []byte) { + switch d.dt.ID() { + case arrow.DECIMAL128: + for _, v := range input { + arrow.Decimal128Traits.PutValue(out, v.(decimal128.Num)) + out = out[arrow.Decimal128SizeBytes:] + } + case arrow.DECIMAL256: + for _, v := range input { + arrow.Decimal256Traits.PutValue(out, v.(decimal256.Num)) + out = out[arrow.Decimal256SizeBytes:] + } + } +} + +func (d *DecimalTestSuite) testCreate(bitWidth int, prec int32, draw []decimalValue, valids []bool, offset int64) arrow.Array { + switch bitWidth { + case 128: + d.dt = &arrow.Decimal128Type{Precision: prec, Scale: 4} + case 256: + d.dt = &arrow.Decimal256Type{Precision: prec, Scale: 4} + } + + bldr := array.NewBuilder(d.mem, d.dt) + defer bldr.Release() + bldr.Reserve(len(draw)) + + nullCount := 0 + for i, b := range valids { + if b { + switch v := draw[i].(type) { + case decimal128.Num: + bldr.(*array.Decimal128Builder).Append(v) + case decimal256.Num: + bldr.(*array.Decimal256Builder).Append(v) + } + } else { + bldr.AppendNull() + nullCount++ + } + } + + arr := bldr.NewArray() + d.EqualValues(0, bldr.Len()) + + rawBytes := make([]byte, len(draw)*(d.dt.(arrow.FixedWidthDataType).BitWidth()/8)) + d.makeData(draw, rawBytes) + + expectedData := memory.NewBufferBytes(rawBytes) + expectedNullBitmap := bitmapFromSlice(valids) + expectedNullCount := len(draw) - bitutil.CountSetBits(expectedNullBitmap, 0, len(valids)) + + expected := array.NewData(d.dt, len(valids), []*memory.Buffer{memory.NewBufferBytes(expectedNullBitmap), expectedData}, nil, expectedNullCount, 0) + defer expected.Release() + + expectedArr := array.MakeFromData(expected) + defer expectedArr.Release() + + lhs := array.NewSlice(arr, offset, int64(arr.Len())-offset) + rhs := array.NewSlice(expectedArr, offset, int64(expectedArr.Len())-offset) + defer func() { + lhs.Release() + rhs.Release() + }() + + d.Truef(array.Equal(lhs, rhs), "expected: %s, got: %s\n", rhs, lhs) + return arr +} + +type Decimal128TestSuite struct { + DecimalTestSuite +} + +func (d *Decimal128TestSuite) runTest(f func(prec int32)) { + for prec := int32(1); prec <= 38; prec++ { + d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) + } +} + +func (d *Decimal128TestSuite) TestNoNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal128.FromU64(1), decimal128.FromI64(-2), + decimal128.FromU64(2389), decimal128.FromU64(4), + decimal128.FromI64(-12348)} + valids := []bool{true, true, true, true, true} + arr := d.testCreate(128, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(128, prec, draw, valids, 2) + arr.Release() + }) +} + +func (d *Decimal128TestSuite) TestWithNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal128.FromU64(1), decimal128.FromU64(2), + decimal128.FromI64(-1), decimal128.FromI64(4), decimal128.FromI64(-1), + decimal128.FromI64(1), decimal128.FromI64(2)} + bigVal, _ := (&big.Int{}).SetString("230342903942234234", 10) + draw = append(draw, decimal128.FromBigInt(bigVal)) + + bigNeg, _ := (&big.Int{}).SetString("-23049302932235234", 10) + draw = append(draw, decimal128.FromBigInt(bigNeg)) + + valids := []bool{true, true, false, true, false, true, true, true, true} + arr := d.testCreate(128, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(128, prec, draw, valids, 2) + arr.Release() + }) +} + +type Decimal256TestSuite struct { + DecimalTestSuite +} + +func (d *Decimal256TestSuite) runTest(f func(prec int32)) { + for _, prec := range []int32{1, 2, 5, 10, 38, 39, 40, 75, 76} { + d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) + } +} + +func (d *Decimal256TestSuite) TestNoNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal256.FromU64(1), decimal256.FromI64(-2), + decimal256.FromU64(2389), decimal256.FromU64(4), + decimal256.FromI64(-12348)} + valids := []bool{true, true, true, true, true} + arr := d.testCreate(256, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(256, prec, draw, valids, 2) + arr.Release() + }) +} + +func (d *Decimal256TestSuite) TestWithNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal256.FromU64(1), decimal256.FromU64(2), + decimal256.FromI64(-1), decimal256.FromI64(4), decimal256.FromI64(-1), + decimal256.FromI64(1), decimal256.FromI64(2)} + + // (pow(2, 255) - 1) + bigVal, _ := (&big.Int{}).SetString("57896044618658097711785492504343953926634992332820282019728792003956564819967", 10) + draw = append(draw, decimal256.FromBigInt(bigVal)) + + draw = append(draw, decimal256.FromBigInt(bigVal.Neg(bigVal))) + + valids := []bool{true, true, false, true, false, true, true, true, true} + arr := d.testCreate(256, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(256, prec, draw, valids, 2) + arr.Release() + }) +} + +func TestDecimal(t *testing.T) { + suite.Run(t, new(Decimal128TestSuite)) + suite.Run(t, new(Decimal256TestSuite)) +} diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 4fae446e3f40d..f83cabfaf81d2 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -217,7 +217,9 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) { case *arrow.FixedSizeBinaryType: typ = byteWidthJSON{"fixedsizebinary", dt.ByteWidth} case *arrow.Decimal128Type: - typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision)} + typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision), 128} + case *arrow.Decimal256Type: + typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision), 256} default: return nil, fmt.Errorf("unknown arrow.DataType %v", arrowType) } @@ -454,7 +456,12 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow if err = json.Unmarshal(typ, &t); err != nil { return } - arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + switch t.BitWidth { + case 128: + arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + case 256: + arrowType = &arrow.Decimal256Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + } } if arrowType == nil { @@ -578,6 +585,7 @@ type decimalJSON struct { Name string `json:"name"` Scale int `json:"scale,omitempty"` Precision int `json:"precision,omitempty"` + BitWidth int `json:"bitWidth,omitempty"` } type byteWidthJSON struct { @@ -1126,6 +1134,14 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr bldr.AppendValues(data, valids) return returnNewArrayData(bldr) + case *arrow.Decimal256Type: + bldr := array.NewDecimal256Builder(mem, dt) + defer bldr.Release() + data := decimal256FromJSON(arr.Data) + valids := validsFromJSON(arr.Valids) + bldr.AppendValues(data, valids) + return returnNewArrayData(bldr) + case arrow.ExtensionType: storage := arrayFromJSON(mem, dt.StorageType(), arr) defer storage.Release() diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go index f6ccc0d1a6c1e..b0625d30f4097 100644 --- a/go/arrow/ipc/endian_swap.go +++ b/go/arrow/ipc/endian_swap.go @@ -82,6 +82,19 @@ func swapType(dt arrow.DataType, data *array.Data) (err error) { rawdata[idx] = bits.ReverseBytes64(rawdata[idx+1]) rawdata[idx+1] = tmp } + case *arrow.Decimal256Type: + rawdata := arrow.Uint64Traits.CastFromBytes(data.Buffers()[1].Bytes()) + length := data.Buffers()[1].Len() / arrow.Decimal256SizeBytes + for i := 0; i < length; i++ { + idx := i * 4 + tmp0 := bits.ReverseBytes64(rawdata[idx]) + tmp1 := bits.ReverseBytes64(rawdata[idx+1]) + tmp2 := bits.ReverseBytes64(rawdata[idx+2]) + rawdata[idx] = bits.ReverseBytes64(rawdata[idx+3]) + rawdata[idx+1] = tmp2 + rawdata[idx+2] = tmp1 + rawdata[idx+3] = tmp0 + } case *arrow.ListType: swapOffsets(1, 32, data) case *arrow.LargeListType: diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index 90c145648d092..ccc97ca310578 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -461,7 +461,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type, *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type, *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type, - *arrow.Decimal128Type, + *arrow.Decimal128Type, *arrow.Decimal256Type, *arrow.Time32Type, *arrow.Time64Type, *arrow.TimestampType, *arrow.Date32Type, *arrow.Date64Type, diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index 69f808ae25f52..3a8c237313a1b 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -285,6 +285,15 @@ func (fv *fieldVisitor) visit(field arrow.Field) { flatbuf.DecimalStart(fv.b) flatbuf.DecimalAddPrecision(fv.b, dt.Precision) flatbuf.DecimalAddScale(fv.b, dt.Scale) + flatbuf.DecimalAddBitWidth(fv.b, 128) + fv.offset = flatbuf.DecimalEnd(fv.b) + + case *arrow.Decimal256Type: + fv.dtype = flatbuf.TypeDecimal + flatbuf.DecimalStart(fv.b) + flatbuf.DecimalAddPrecision(fv.b, dt.Precision) + flatbuf.DecimalAddScale(fv.b, dt.Scale) + flatbuf.DecimalAddBitWidth(fv.b, 256) fv.offset = flatbuf.DecimalEnd(fv.b) case *arrow.FixedSizeBinaryType: @@ -809,7 +818,14 @@ func floatToFB(b *flatbuffers.Builder, bw int32) flatbuffers.UOffsetT { } func decimalFromFB(data flatbuf.Decimal) (arrow.DataType, error) { - return &arrow.Decimal128Type{Precision: data.Precision(), Scale: data.Scale()}, nil + switch data.BitWidth() { + case 128: + return &arrow.Decimal128Type{Precision: data.Precision(), Scale: data.Scale()}, nil + case 256: + return &arrow.Decimal256Type{Precision: data.Precision(), Scale: data.Scale()}, nil + default: + return nil, fmt.Errorf("arrow/ipc: invalid decimal bitwidth: %d", data.BitWidth()) + } } func timeFromFB(data flatbuf.Time) (arrow.DataType, error) { From aa4cfa9b56fe9ee9bc3fbbedc2dbe46ca8972a4b Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 11:01:12 -0400 Subject: [PATCH 03/12] forgot to add the bitWidth to the expected JSON --- go/arrow/internal/arrjson/arrjson_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index eccdb9aee37ed..c4ffc3bbbf633 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -3352,7 +3352,8 @@ func makeDecimal128sWantJSONs() string { "type": { "name": "decimal", "scale": 1, - "precision": 10 + "precision": 10, + "bitWidth": 128 }, "nullable": true, "children": [] From f85b07ba1f395c3d461eefb5ccf74fc5f54fe5b7 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 12:09:53 -0400 Subject: [PATCH 04/12] Update go/arrow/datatype_fixedwidth.go Co-authored-by: Antoine Pitrou --- go/arrow/datatype_fixedwidth.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index 255aea45481eb..fbbd12ba03b93 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -529,7 +529,7 @@ func (Decimal128Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} } -// Decimal128Type represents a fixed-size 128-bit decimal type. +// Decimal256Type represents a fixed-size 256-bit decimal type. type Decimal256Type struct { Precision int32 Scale int32 From b6e95e7ce27df31a0cd9e64cdd065e0a87abdec9 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 12:10:05 -0400 Subject: [PATCH 05/12] Update go/arrow/datatype_fixedwidth.go Co-authored-by: Antoine Pitrou --- go/arrow/datatype_fixedwidth.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index fbbd12ba03b93..5589ad637a2f5 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -546,7 +546,7 @@ func (t *Decimal256Type) Fingerprint() string { } func (Decimal256Type) Layout() DataTypeLayout { - return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal256SizeBytes)}} } // MonthInterval represents a number of months. From 21aef6df1d0a6c17a2e2d03273afe5a285d9755c Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 12:42:43 -0400 Subject: [PATCH 06/12] changes from review feedback --- go/arrow/array/decimal128_test.go | 3 + go/arrow/array/decimal256_test.go | 4 ++ go/arrow/decimal128/decimal128.go | 58 +----------------- go/arrow/decimal256/decimal256.go | 99 +------------------------------ go/arrow/scalar/scalar.go | 35 +++++++++-- 5 files changed, 44 insertions(+), 155 deletions(-) diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go index 33644a5f7ac1b..123da6c4ab3b3 100644 --- a/go/arrow/array/decimal128_test.go +++ b/go/arrow/array/decimal128_test.go @@ -77,7 +77,9 @@ func TestNewDecimal128Builder(t *testing.T) { assert.Equal(t, want, a.Values(), "unexpected Decimal128Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Len(t, a.Values(), 10, "unexpected length of Decimal128Values") + assert.Equal(t, 10*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() ab.Append(decimal128.FromI64(7)) @@ -88,6 +90,7 @@ func TestNewDecimal128Builder(t *testing.T) { assert.Equal(t, 0, a.NullN()) assert.Equal(t, []decimal128.Num{decimal128.FromI64(7), decimal128.FromI64(8)}, a.Values()) assert.Len(t, a.Values(), 2) + assert.Equal(t, 2*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() } diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go index 1206080f83798..6f44fd3e01eb0 100644 --- a/go/arrow/array/decimal256_test.go +++ b/go/arrow/array/decimal256_test.go @@ -77,7 +77,9 @@ func TestNewDecimal256Builder(t *testing.T) { assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") + assert.Equal(t, 10*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) a.Release() ab.Append(decimal256.FromI64(7)) @@ -86,8 +88,10 @@ func TestNewDecimal256Builder(t *testing.T) { a = ab.NewDecimal256Array() assert.Equal(t, 0, a.NullN()) + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) assert.Len(t, a.Values(), 2) + assert.Equal(t, 2*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) a.Release() } diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go index 218b4c923175d..fa044f46ec58f 100644 --- a/go/arrow/decimal128/decimal128.go +++ b/go/arrow/decimal128/decimal128.go @@ -67,7 +67,7 @@ func FromI64(v int64) Num { // BitLen > 128, this will panic. func FromBigInt(v *big.Int) (n Num) { bitlen := v.BitLen() - if bitlen > 128 { + if bitlen > 127 { panic("arrow/decimal128: cannot represent value larger than 128bits") } else if bitlen == 0 { // if bitlen is 0, then the value is 0 so return the default zeroed @@ -101,26 +101,6 @@ func (n Num) Negate() Num { return n } -func fromPositiveFloat32(v float32, prec, scale int32) (Num, error) { - var pscale float32 - if scale >= -38 && scale <= 38 { - pscale = float32PowersOfTen[scale+38] - } else { - pscale = float32(math.Pow10(int(scale))) - } - - v *= pscale - v = float32(math.RoundToEven(float64(v))) - maxabs := float32PowersOfTen[prec+38] - if v <= -maxabs || v >= maxabs { - return Num{}, fmt.Errorf("cannot convert %f to decimal128(precision=%d, scale=%d): overflow", v, prec, scale) - } - - hi := float32(math.Floor(math.Ldexp(float64(v), -64))) - low := v - float32(math.Ldexp(float64(hi), 64)) - return Num{hi: int64(hi), lo: uint64(low)}, nil -} - func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { var pscale float64 if scale >= -38 && scale <= 38 { @@ -145,14 +125,7 @@ func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { // value using the provided precision and scale. Will return an error if the // value cannot be accurately represented with the desired precision and scale. func FromFloat32(v float32, prec, scale int32) (Num, error) { - if v < 0 { - dec, err := fromPositiveFloat32(-v, prec, scale) - if err != nil { - return dec, err - } - return dec.Negate(), nil - } - return fromPositiveFloat32(v, prec, scale) + return FromFloat64(float64(v), prec, scale) } // FromFloat64 returns a new decimal128.Num constructed from the given float64 @@ -169,24 +142,10 @@ func FromFloat64(v float64, prec, scale int32) (Num, error) { return fromPositiveFloat64(v, prec, scale) } -func (n Num) tofloat32Positive(scale int32) float32 { - const twoTo64 float32 = 1.8446744e+19 - x := float32(n.hi) * twoTo64 - x += float32(n.lo) - if scale >= -38 && scale <= 38 { - return x * float32PowersOfTen[-scale+38] - } - - return x * float32(math.Pow10(-int(scale))) -} - // ToFloat32 returns a float32 value representative of this decimal128.Num, // but with the given scale. func (n Num) ToFloat32(scale int32) float32 { - if n.hi < 0 { - return -n.Negate().tofloat32Positive(scale) - } - return n.tofloat32Positive(scale) + return float32(n.ToFloat64(scale)) } func (n Num) tofloat64Positive(scale int32) float64 { @@ -429,17 +388,6 @@ var ( New(2710505431213761085, 343699775700336640), } - float32PowersOfTen = [...]float32{ - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, - 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, - 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, - 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, - 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, - 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, - 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, - } - float64PowersOfTen = [...]float64{ 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go index 6b5bc3434dfd6..3dbaa56024f4d 100644 --- a/go/arrow/decimal256/decimal256.go +++ b/go/arrow/decimal256/decimal256.go @@ -85,56 +85,7 @@ func (n Num) Negate() Num { } func FromFloat32(v float32, prec, scale int32) (Num, error) { - debug.Assert(prec > 0 && prec <= 76, "invalid precision for converting to decimal256") - - if math.IsInf(float64(v), 0) { - return Num{}, fmt.Errorf("cannot convert %f to decimal256", v) - } - - if v < 0 { - dec, err := fromPositiveFloat32(-v, prec, scale) - if err != nil { - return dec, err - } - return dec.Negate(), nil - } - return fromPositiveFloat32(v, prec, scale) -} - -func fromPositiveFloat32(v float32, prec, scale int32) (Num, error) { - var pscale float32 - if scale >= -76 && scale <= 76 { - pscale = float32PowersOfTen[scale+76] - } else { - pscale = float32(math.Pow10(int(scale))) - } - - v *= pscale - v = float32(math.RoundToEven(float64(v))) - maxabs := float32PowersOfTen[prec+76] - if v <= -maxabs || v >= maxabs { - return Num{}, fmt.Errorf("cannot convert %f to decimal256(precision=%d, scale=%d): overflow", - v, prec, scale) - } - - var arr [4]float32 - arr[3] = float32(math.Floor(math.Ldexp(float64(v), -192))) - v -= float32(math.Ldexp(float64(arr[3]), 192)) - arr[2] = float32(math.Floor(math.Ldexp(float64(v), -128))) - v -= float32(math.Ldexp(float64(arr[2]), 128)) - arr[1] = float32(math.Floor(math.Ldexp(float64(v), -64))) - v -= float32(math.Ldexp(float64(arr[1]), 64)) - arr[0] = v - - debug.Assert(arr[3] >= 0, "bad conversion float64 to decimal256") - debug.Assert(arr[3] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 - debug.Assert(arr[2] >= 0, "bad conversion float64 to decimal256") - debug.Assert(arr[2] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 - debug.Assert(arr[1] >= 0, "bad conversion float64 to decimal256") - debug.Assert(arr[1] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 - debug.Assert(arr[0] >= 0, "bad conversion float64 to decimal256") - debug.Assert(arr[0] < 1.8446744e+19, "bad conversion float32 to decimal256") // 2**64 - return Num{[4]uint64{uint64(arr[0]), uint64(arr[1]), uint64(arr[2]), uint64(arr[3])}}, nil + return FromFloat64(float64(v), prec, scale) } func FromFloat64(v float64, prec, scale int32) (Num, error) { @@ -190,20 +141,6 @@ func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { return Num{[4]uint64{uint64(arr[0]), uint64(arr[1]), uint64(arr[2]), uint64(arr[3])}}, nil } -func (n Num) tofloat32Positive(scale int32) float32 { - const twoTo64 float32 = 1.8446744e+19 - if n.arr[3] != 0 || n.arr[2] != 0 { - return floatInf - } - x := float32(n.arr[1]) * twoTo64 - x += float32(n.arr[0]) - if scale >= -76 && scale <= 76 { - return x * float32PowersOfTen[-scale+76] - } - - return x * float32(math.Pow10(-int(scale))) -} - func (n Num) tofloat64Positive(scale int32) float64 { const ( twoTo64 float64 = 1.8446744073709552e+19 @@ -223,12 +160,7 @@ func (n Num) tofloat64Positive(scale int32) float64 { return x * math.Pow10(-int(scale)) } -func (n Num) ToFloat32(scale int32) float32 { - if n.Sign() < 0 { - return -n.Negate().tofloat32Positive(scale) - } - return n.tofloat32Positive(scale) -} +func (n Num) ToFloat32(scale int32) float32 { return float32(n.ToFloat64(scale)) } func (n Num) ToFloat64(scale int32) float64 { if n.Sign() < 0 { @@ -246,7 +178,7 @@ func (n Num) Sign() int { func FromBigInt(v *big.Int) (n Num) { bitlen := v.BitLen() - if bitlen > 256 { + if bitlen > 255 { panic("arrow/decimal256: cannot represent value larger than 256bits") } else if bitlen == 0 { return @@ -526,31 +458,6 @@ var ( New(796545955566226138, 9489746690038731964, 13527356396454709248, 0), } - floatInf = float32(math.Inf(0)) - float32PowersOfTen = [...]float32{ - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1e-45, 1e-44, 1e-43, 1e-42, - 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, - 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, - 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, - 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, - 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, - 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, - 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, - 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, - 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, - 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, - 1e29, 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, - 1e36, 1e37, 1e38, floatInf, floatInf, floatInf, floatInf, - floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, - floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, - floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, - floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, - floatInf, floatInf, floatInf, floatInf, floatInf, floatInf, - } float64PowersOfTen = [...]float64{ 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index 82d0feb03ce14..d32b9f4652d5b 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -295,11 +295,23 @@ func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) { return MakeNullScalar(to), nil } + dt := s.Type.(*arrow.Decimal128Type) + switch to.ID() { case arrow.DECIMAL128: - return NewDecimal128Scalar(s.Value, to), nil + to := to.(*arrow.Decimal128Type) + newVal, err := s.Value.Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + return NewDecimal128Scalar(newVal, to), nil case arrow.DECIMAL256: - return NewDecimal256Scalar(decimal256.FromDecimal128(s.Value), to), nil + to := to.(*arrow.Decimal256Type) + newVal, err := decimal256.FromDecimal128(s.Value).Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + return NewDecimal256Scalar(newVal, to), nil case arrow.STRING: dt := s.Type.(*arrow.Decimal128Type) scale := big.NewFloat(math.Pow10(int(dt.Scale))) @@ -341,11 +353,17 @@ func (s *Decimal256) CastTo(to arrow.DataType) (Scalar, error) { return MakeNullScalar(to), nil } + dt := s.Type.(*arrow.Decimal256Type) + switch to.ID() { case arrow.DECIMAL256: - return NewDecimal256Scalar(s.Value, to), nil + to := to.(*arrow.Decimal256Type) + newVal, err := s.Value.Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + return NewDecimal256Scalar(newVal, to), nil case arrow.STRING: - dt := s.Type.(*arrow.Decimal256Type) scale := big.NewFloat(math.Pow10(int(dt.Scale))) val := (&big.Float{}).SetInt(s.Value.BigInt()) return NewStringScalar(val.Quo(val, scale).Text('g', int(dt.Precision))), nil @@ -852,6 +870,15 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { binary.Write(&h, endian.Native, v.LowBits()) hash() binary.Write(&h, endian.Native, uint64(v.HighBits())) + case decimal256.Num: + arr := v.Array() + binary.Write(&h, endian.Native, arr[3]) + hash() + binary.Write(&h, endian.Native, arr[2]) + hash() + binary.Write(&h, endian.Native, arr[1]) + hash() + binary.Write(&h, endian.Native, arr[0]) } hash() return out From c47ef3ea4873bc5d949c87919ba5e60d14224001 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 12:46:43 -0400 Subject: [PATCH 07/12] check FitsInPrecision when casting scalars --- go/arrow/scalar/scalar.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index d32b9f4652d5b..5edc98584b5b3 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -304,6 +304,9 @@ func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) { if err != nil { return nil, err } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal128 value %v will not fit in new precision %d", newVal, to.Precision) + } return NewDecimal128Scalar(newVal, to), nil case arrow.DECIMAL256: to := to.(*arrow.Decimal256Type) @@ -311,6 +314,9 @@ func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) { if err != nil { return nil, err } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal256 value %v will not fit in new precision %d", newVal, to.Precision) + } return NewDecimal256Scalar(newVal, to), nil case arrow.STRING: dt := s.Type.(*arrow.Decimal128Type) @@ -362,6 +368,9 @@ func (s *Decimal256) CastTo(to arrow.DataType) (Scalar, error) { if err != nil { return nil, err } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal256 value %v will not fit in new precision %d", newVal, to.Precision) + } return NewDecimal256Scalar(newVal, to), nil case arrow.STRING: scale := big.NewFloat(math.Pow10(int(dt.Scale))) From 45baa6ddc06e9a0afbdd6460b6aae0482a533f17 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 13:48:19 -0400 Subject: [PATCH 08/12] default to decimal128 when missing bitWidth in JSON integration test --- go/arrow/internal/arrjson/arrjson.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index f83cabfaf81d2..86618d11a8afc 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -457,10 +457,10 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow return } switch t.BitWidth { - case 128: - arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} case 256: arrowType = &arrow.Decimal256Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + case 128, 0: // default to 128 bits when missing + arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} } } From d5df467bb8f2fecd8a85d6076c575f4ff62bf233 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 4 Aug 2022 21:43:05 -0400 Subject: [PATCH 09/12] add missing AppendEmptyValue --- go/arrow/array/decimal256.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index bdbaca3af249b..b3e60924e7e84 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -160,6 +160,10 @@ func (b *Decimal256Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Decimal256Builder) AppendEmptyValue() { + b.Append(decimal256.Num{}) +} + func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) From 0c69de066278ea522f1033f863c444e089a9a93f Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 5 Aug 2022 10:19:06 -0400 Subject: [PATCH 10/12] add Type method to decimal256 --- go/arrow/array/decimal256.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index b3e60924e7e84..c85a0fae75be0 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -164,6 +164,8 @@ func (b *Decimal256Builder) AppendEmptyValue() { b.Append(decimal256.Num{}) } +func (b *Decimal256Builder) Type() arrow.DataType { return b.dtype } + func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) From 71312b73ec4f4289ef72dca6f4a7b526ed27ec79 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 5 Aug 2022 10:20:01 -0400 Subject: [PATCH 11/12] error conventions --- go/arrow/array/decimal256.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go index c85a0fae75be0..8ad45a6b8d659 100644 --- a/go/arrow/array/decimal256.go +++ b/go/arrow/array/decimal256.go @@ -180,7 +180,7 @@ func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { // all values in v are appended and considered valid. func (b *Decimal256Builder) AppendValues(v []decimal256.Num, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") + panic("arrow/array: len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { @@ -321,7 +321,7 @@ func (b *Decimal256Builder) UnmarshalJSON(data []byte) error { } if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("decimal256 builder must unpack from json array, found %s", delim) + return fmt.Errorf("arrow/array: decimal256 builder must unpack from json array, found %s", delim) } return b.unmarshal(dec) From fe152cc654cad61f098cf0bbad1ae588234f08ec Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 5 Aug 2022 10:24:59 -0400 Subject: [PATCH 12/12] no more need for unsupportedArrayType --- go/arrow/array/array.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index d322be87dd546..7db2a8f004bf6 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -114,10 +114,6 @@ var ( makeArrayFn [64]arrayConstructorFn ) -func unsupportedArrayType(data arrow.ArrayData) arrow.Array { - panic("unsupported data type: " + data.DataType().ID().String()) -} - func invalidDataType(data arrow.ArrayData) arrow.Array { panic("invalid data type: " + data.DataType().ID().String()) }