Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17880: [Go] Add support for Decimal128 and Decimal256 to CSV writer #14278

Merged
merged 7 commits into from
Sep 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go/arrow/csv/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func WithNullWriter(null string) Option {
}

// WithBoolWriter override the default bool formatter with a function that returns
// a string representaton of bool states. i.e. True, False, 1, 0
// a string representaton of bool states. i.e. True, False, 1, 0
func WithBoolWriter(fmtr func(bool) string) Option {
return func(cfg config) {
switch cfg := cfg.(type) {
Expand Down Expand Up @@ -221,6 +221,7 @@ func validate(schema *arrow.Schema) {
case *arrow.StringType:
case *arrow.TimestampType:
case *arrow.Date32Type, *arrow.Date64Type:
case *arrow.Decimal128Type, *arrow.Decimal256Type:
default:
panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
}
Expand Down
30 changes: 30 additions & 0 deletions go/arrow/csv/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package csv
import (
"encoding/csv"
"io"
"math"
"math/big"
"strconv"
"sync"

Expand Down Expand Up @@ -219,6 +221,34 @@ func (w *Writer) Write(record arrow.Record) error {
recs[i][j] = w.nullValue
}
}
case *arrow.Decimal128Type:
fieldType := w.schema.Field(j).Type.(*arrow.Decimal128Type)
scale := fieldType.Scale
precision := fieldType.Precision
arr := col.(*array.Decimal128)
for i := 0; i < arr.Len(); i++ {
if arr.IsValid(i) {
f := (&big.Float{}).SetInt(arr.Value(i).BigInt())
f.Quo(f, big.NewFloat(math.Pow10(int(scale))))
recs[i][j] = f.Text('g', int(precision))
} else {
recs[i][j] = w.nullValue
}
}
case *arrow.Decimal256Type:
fieldType := w.schema.Field(j).Type.(*arrow.Decimal256Type)
scale := fieldType.Scale
precision := fieldType.Precision
arr := col.(*array.Decimal256)
for i := 0; i < arr.Len(); i++ {
if arr.IsValid(i) {
f := (&big.Float{}).SetInt(arr.Value(i).BigInt())
f.Quo(f, big.NewFloat(math.Pow10(int(scale))))
recs[i][j] = f.Text('g', int(precision))
} else {
recs[i][j] = w.nullValue
}
}
}
}

Expand Down
30 changes: 20 additions & 10 deletions go/arrow/csv/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
"github.com/apache/arrow/go/v10/arrow"
"github.com/apache/arrow/go/v10/arrow/array"
"github.com/apache/arrow/go/v10/arrow/csv"
"github.com/apache/arrow/go/v10/arrow/decimal128"
"github.com/apache/arrow/go/v10/arrow/decimal256"
"github.com/apache/arrow/go/v10/arrow/memory"
)

Expand Down Expand Up @@ -129,18 +131,18 @@ func Example_writer() {

var (
fullData = [][]string{
{"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64"},
{"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26"},
{"false", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28"},
{"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28"},
{nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal},
{"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64", "dec128", "dec256"},
{"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", "-123.45", "-123.45"},
{"false", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28", "0", "0"},
{"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28", "123.45", "123.45"},
{nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal},
}
bananaData = [][]string{
{"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64"},
{"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26"},
{"MANGO", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28"},
{"BANANA", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28"},
{nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal},
{"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64", "dec128", "dec256"},
{"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", "-123.45", "-123.45"},
{"MANGO", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28", "0", "0"},
{"BANANA", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28", "123.45", "123.45"},
{nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal},
}
)

Expand Down Expand Up @@ -213,6 +215,8 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
{Name: "ts_s", Type: arrow.FixedWidthTypes.Timestamp_s},
{Name: "d32", Type: arrow.FixedWidthTypes.Date32},
{Name: "d64", Type: arrow.FixedWidthTypes.Date64},
{Name: "dec128", Type: &arrow.Decimal128Type{Precision: 5, Scale: 2}},
{Name: "dec256", Type: &arrow.Decimal256Type{Precision: 5, Scale: 2}},
},
nil,
)
Expand All @@ -235,6 +239,8 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
b.Field(12).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), nil)
b.Field(13).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 19304, 20304}, nil)
b.Field(14).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 1940400000000, 2040400000000}, nil)
b.Field(15).(*array.Decimal128Builder).AppendValues([]decimal128.Num{decimal128.FromI64(-12345), decimal128.FromI64(0), decimal128.FromI64(12345)}, nil)
b.Field(16).(*array.Decimal256Builder).AppendValues([]decimal256.Num{decimal256.FromI64(-12345), decimal256.FromI64(0), decimal256.FromI64(12345)}, nil)

for _, field := range b.Fields() {
field.AppendNull()
Expand Down Expand Up @@ -327,6 +333,8 @@ func BenchmarkWrite(b *testing.B) {
{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
{Name: "str", Type: arrow.BinaryTypes.String},
{Name: "dec128", Type: &arrow.Decimal128Type{Precision: 4, Scale: 3}},
{Name: "dec128", Type: &arrow.Decimal256Type{Precision: 4, Scale: 3}},
},
nil,
)
Expand All @@ -348,6 +356,8 @@ func BenchmarkWrite(b *testing.B) {
bldr.Field(9).(*array.Float32Builder).Append(float32(i))
bldr.Field(10).(*array.Float64Builder).Append(float64(i))
bldr.Field(11).(*array.StringBuilder).Append(fmt.Sprintf("str-%d", i))
bldr.Field(12).(*array.Decimal128Builder).Append(decimal128.FromI64(int64(i)))
bldr.Field(13).(*array.Decimal256Builder).Append(decimal256.FromI64(int64(i)))
}

rec := bldr.NewRecord()
Expand Down