diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3c9100c20b774..e696de98871bb 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -75,6 +75,39 @@ jobs: continue-on-error: true run: archery docker push debian-go + docker_cgo_python: + name: AMD64 Debian 10 GO ${{ matrix.go }} - CGO Python + runs-on: ubuntu-latest + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + go: [1.15] + env: + GO: ${{ matrix.go }} + steps: + - name: Checkout Arrow + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Fetch Submodules and Tags + run: ci/scripts/util_checkout.sh + - name: Free Up Disk Space + run: ci/scripts/util_cleanup.sh + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Setup Archery + run: pip install -e dev/archery[docker] + - name: Execute Docker Build + run: archery docker run debian-go-cgo-python + - name: Docker Push + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' + continue-on-error: true + run: archery docker push debian-go-cgo-python + windows: name: AMD64 Windows 2019 Go ${{ matrix.go }} runs-on: windows-latest diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile new file mode 100644 index 0000000000000..46455a42bb3a9 --- /dev/null +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ENV DEBIAN_FRONTEND noninteractive + +# Install python3 and pip so we can install pyarrow to test the C data interface. +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + python3 \ + python3-pip && \ + apt-get clean + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +# Need a newer pip than Debian's to install manylinux201x wheels +RUN pip install -U pip + +RUN pip install pyarrow cffi --only-binary pyarrow diff --git a/ci/docker/debian-10-go.dockerfile b/ci/docker/debian-10-go.dockerfile index 199f09e24fcb3..3a24b8afee66d 100644 --- a/ci/docker/debian-10-go.dockerfile +++ b/ci/docker/debian-10-go.dockerfile @@ -17,7 +17,8 @@ ARG arch=amd64 ARG go=1.15 -FROM ${arch}/golang:${go} +FROM ${arch}/golang:${go}-buster + # TODO(kszucs): # 1. add the files required to install the dependencies to .dockerignore diff --git a/ci/scripts/go_cgo_python_test.sh b/ci/scripts/go_cgo_python_test.sh new file mode 100755 index 0000000000000..564b5e3f4168c --- /dev/null +++ b/ci/scripts/go_cgo_python_test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow/cdata/test + +case "$(uname)" in + Linux) + testlib="cgotest.so" + ;; + Darwin) + testlib="cgotest.so" + ;; + MINGW*) + testlib="cgotest.dll" + ;; +esac + +go build -tags cdata_test -buildmode=c-shared -o $testlib . + +python test_export_to_cgo.py + +rm $testlib +rm "${testlib%.*}.h" + +popd diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh index 9b2572e1b43d2..18855ac5000be 100755 --- a/ci/scripts/go_test.sh +++ b/ci/scripts/go_test.sh @@ -31,6 +31,10 @@ esac pushd ${source_dir}/arrow +# the cgo implementation of the c data interface requires the "test" +# tag in order to run its tests so that the testing functions implemented +# in .c files don't get included in non-test builds. + for d in $(go list ./... | grep -v vendor); do go test $testargs -tags "test" $d done diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index e2aa6285ea53a..b07f9623e5266 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -129,6 +129,7 @@ go/arrow/flight/Flight.pb.go go/arrow/flight/Flight_grpc.pb.go go/arrow/internal/cpu/* go/arrow/type_string.go +go/arrow/cdata/test/go.sum go/*.tmpldata go/*.s go/parquet/go.sum diff --git a/docker-compose.yml b/docker-compose.yml index 2c3736d8a00ec..edf432a38b18e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,7 +112,8 @@ x-hierarchy: - debian-c-glib: - debian-ruby - debian-python - - debian-go + - debian-go: + - debian-go-cgo-python - debian-java: - debian-java-jni - debian-js @@ -1198,6 +1199,24 @@ services: /arrow/ci/scripts/go_build.sh /arrow && /arrow/ci/scripts/go_test.sh /arrow" + debian-go-cgo-python: + # Usage: + # docker-compose build debian-go-cgo-python + # docker-compose run debian-go-cgo-python + image: ${REPO}:${ARCH}-debian-10-go-${GO}-cgo-python + build: + context: . + dockerfile: ci/docker/debian-10-go-cgo-python.dockerfile + cache_from: + - ${REPO}:${ARCH}-debian-10-go-${GO}-cgo-python + args: + base: ${REPO}:${ARCH}-debian-10-go-${GO} + shm_size: *shm-size + volumes: *debian-volumes + command: &go-cgo-python-command > + /bin/bash -c " + /arrow/ci/scripts/go_cgo_python_test.sh /arrow" + ############################# JavaScript #################################### debian-js: diff --git a/docs/source/status.rst b/docs/source/status.rst index 48084187ef91f..037e74fb217f4 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -177,18 +177,18 @@ Notes: C Data Interface ================ -+-----------------------------+-------+--------+-------+-------+ -| Feature | C++ | Python | R | Rust | -| | | | | | -+=============================+=======+========+=======+=======+ -| Schema export | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Array export | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Schema import | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Array import | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ ++-----------------------------+-------+--------+-------+-------+----+ +| Feature | C++ | Python | R | Rust | Go | +| | | | | | | ++=============================+=======+========+=======+=======+====+ +| Schema export | ✓ | ✓ | ✓ | ✓ | | ++-----------------------------+-------+--------+-------+-------+----+ +| Array export | ✓ | ✓ | ✓ | ✓ | | ++-----------------------------+-------+--------+-------+-------+----+ +| Schema import | ✓ | ✓ | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+-------+-------+-----+ +| Array import | ✓ | ✓ | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+-------+-------+-----+ .. seealso:: The :ref:`C Data Interface ` specification. @@ -197,14 +197,14 @@ C Data Interface C Stream Interface (experimental) ================================= -+-----------------------------+-------+--------+ -| Feature | C++ | Python | -| | | | -+=============================+=======+========+ -| Stream export | ✓ | ✓ | -+-----------------------------+-------+--------+ -| Stream import | ✓ | ✓ | -+-----------------------------+-------+--------+ ++-----------------------------+-------+--------+----+ +| Feature | C++ | Python | Go | +| | | | | ++=============================+=======+========+====+ +| Stream export | ✓ | ✓ | | ++-----------------------------+-------+--------+----+ +| Stream import | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+----+ .. seealso:: The :ref:`C Stream Interface ` specification. diff --git a/go/arrow/cdata/arrow/c/abi.h b/go/arrow/cdata/arrow/c/abi.h new file mode 100644 index 0000000000000..a78170dbdbcb0 --- /dev/null +++ b/go/arrow/cdata/arrow/c/abi.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +// EXPERIMENTAL: C stream interface + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#ifdef __cplusplus +} +#endif diff --git a/go/arrow/cdata/arrow/c/helpers.h b/go/arrow/cdata/arrow/c/helpers.h new file mode 100644 index 0000000000000..a5c1f6fe4bab5 --- /dev/null +++ b/go/arrow/cdata/arrow/c/helpers.h @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/c/abi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// Query whether the C schema is released +inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) { + return schema->release == NULL; +} + +/// Mark the C schema released (for use in release callbacks) +inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) { + schema->release = NULL; +} + +/// Move the C schema from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid schema already, otherwise there +/// will be a memory leak. +inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { + assert(dest != src); + assert(!ArrowSchemaIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowSchema)); + ArrowSchemaMarkReleased(src); +} + +/// Release the C schema, if necessary, by calling its release callback +inline void ArrowSchemaRelease(struct ArrowSchema* schema) { + if (!ArrowSchemaIsReleased(schema)) { + schema->release(schema); + assert(ArrowSchemaIsReleased(schema)); + } +} + +/// Query whether the C array is released +inline int ArrowArrayIsReleased(const struct ArrowArray* array) { + return array->release == NULL; +} + +/// Mark the C array released (for use in release callbacks) +inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } + +/// Move the C array from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid array already, otherwise there +/// will be a memory leak. +inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { + assert(dest != src); + assert(!ArrowArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArray)); + ArrowArrayMarkReleased(src); +} + +/// Release the C array, if necessary, by calling its release callback +inline void ArrowArrayRelease(struct ArrowArray* array) { + if (!ArrowArrayIsReleased(array)) { + array->release(array); + assert(ArrowArrayIsReleased(array)); + } +} + +/// Query whether the C array stream is released +inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { + return stream->release == NULL; +} + +/// Mark the C array stream released (for use in release callbacks) +inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { + stream->release = NULL; +} + +/// Move the C array stream from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid stream already, otherwise there +/// will be a memory leak. +inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dest) { + assert(dest != src); + assert(!ArrowArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArrayStream)); + ArrowArrayStreamMarkReleased(src); +} + +/// Release the C array stream, if necessary, by calling its release callback +inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { + if (!ArrowArrayStreamIsReleased(stream)) { + stream->release(stream); + assert(ArrowArrayStreamIsReleased(stream)); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go new file mode 100644 index 0000000000000..7cda454fc1876 --- /dev/null +++ b/go/arrow/cdata/cdata.go @@ -0,0 +1,574 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package cdata + +// implement handling of the Arrow C Data Interface. At least from a consuming side. + +// #include "arrow/c/abi.h" +// #include "arrow/c/helpers.h" +// typedef struct ArrowSchema ArrowSchema; +// typedef struct ArrowArray ArrowArray; +// typedef struct ArrowArrayStream ArrowArrayStream; +// +// int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } +// int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } +// const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } +// struct ArrowArray get_arr() { struct ArrowArray arr; return arr; } +// struct ArrowArrayStream get_stream() { struct ArrowArrayStream stream; return stream; } +// +import "C" + +import ( + "io" + "reflect" + "runtime" + "strconv" + "strings" + "syscall" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/arrow/memory" + "golang.org/x/xerrors" +) + +type ( + // CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h + CArrowSchema = C.ArrowSchema + // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h + CArrowArray = C.ArrowArray + // CArrowArrayStream is the Experimental API for handling streams of record batches + // through the C Data interface. + CArrowArrayStream = C.ArrowArrayStream +) + +// Map from the defined strings to their corresponding arrow.DataType interface +// object instances, for types that don't require params. +var formatToSimpleType = map[string]arrow.DataType{ + "n": arrow.Null, + "b": arrow.FixedWidthTypes.Boolean, + "c": arrow.PrimitiveTypes.Int8, + "C": arrow.PrimitiveTypes.Uint8, + "s": arrow.PrimitiveTypes.Int16, + "S": arrow.PrimitiveTypes.Uint16, + "i": arrow.PrimitiveTypes.Int32, + "I": arrow.PrimitiveTypes.Uint32, + "l": arrow.PrimitiveTypes.Int64, + "L": arrow.PrimitiveTypes.Uint64, + "e": arrow.FixedWidthTypes.Float16, + "f": arrow.PrimitiveTypes.Float32, + "g": arrow.PrimitiveTypes.Float64, + "z": arrow.BinaryTypes.Binary, + "u": arrow.BinaryTypes.String, + "tdD": arrow.FixedWidthTypes.Date32, + "tdm": arrow.FixedWidthTypes.Date64, + "tts": arrow.FixedWidthTypes.Time32s, + "ttm": arrow.FixedWidthTypes.Time32ms, + "ttu": arrow.FixedWidthTypes.Time64us, + "ttn": arrow.FixedWidthTypes.Time64ns, + "tDs": arrow.FixedWidthTypes.Duration_s, + "tDm": arrow.FixedWidthTypes.Duration_ms, + "tDu": arrow.FixedWidthTypes.Duration_us, + "tDn": arrow.FixedWidthTypes.Duration_ns, + "tiM": arrow.FixedWidthTypes.MonthInterval, + "tiD": arrow.FixedWidthTypes.DayTimeInterval, +} + +// decode metadata from C which is encoded as +// +// [int32] -> number of metadata pairs +// for 0..n +// [int32] -> number of bytes in key +// [n bytes] -> key value +// [int32] -> number of bytes in value +// [n bytes] -> value +func decodeCMetadata(md *C.char) arrow.Metadata { + if md == nil { + return arrow.Metadata{} + } + + // don't copy the bytes, just reference them directly + const maxlen = 0x7fffffff + data := (*[maxlen]byte)(unsafe.Pointer(md))[:] + + readint32 := func() int32 { + v := *(*int32)(unsafe.Pointer(&data[0])) + data = data[arrow.Int32SizeBytes:] + return v + } + + readstr := func() string { + l := readint32() + s := string(data[:l]) + data = data[l:] + return s + } + + npairs := readint32() + if npairs == 0 { + return arrow.Metadata{} + } + + keys := make([]string, npairs) + vals := make([]string, npairs) + + for i := int32(0); i < npairs; i++ { + keys[i] = readstr() + vals[i] = readstr() + } + + return arrow.NewMetadata(keys, vals) +} + +// convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema +func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { + // always release, even on error + defer C.ArrowSchemaRelease(schema) + + var childFields []arrow.Field + if schema.n_children > 0 { + // call ourselves recursively if there are children. + var schemaChildren []*C.ArrowSchema + // set up a slice to reference safely + s := (*reflect.SliceHeader)(unsafe.Pointer(&schemaChildren)) + s.Data = uintptr(unsafe.Pointer(schema.children)) + s.Len = int(schema.n_children) + s.Cap = int(schema.n_children) + + childFields = make([]arrow.Field, schema.n_children) + for i, c := range schemaChildren { + childFields[i], err = importSchema((*CArrowSchema)(c)) + if err != nil { + return + } + } + } + + // copy the schema name from the c-string + ret.Name = C.GoString(schema.name) + ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0 + ret.Metadata = decodeCMetadata(schema.metadata) + + // copies the c-string here, but it's very small + f := C.GoString(schema.format) + // handle our non-parameterized simple types. + dt, ok := formatToSimpleType[f] + if ok { + ret.Type = dt + return + } + + // handle types with params via colon + typs := strings.Split(f, ":") + defaulttz := "UTC" + switch typs[0] { + case "tss": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz} + case "tsm": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz} + case "tsu": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz} + case "tsn": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz} + case "w": // fixed size binary is "w:##" where ## is the byteWidth + byteWidth, err := strconv.Atoi(typs[1]) + if err != nil { + return ret, err + } + dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} + case "d": // decimal types are d:,[,] size is assumed 128 if left out + props := typs[1] + propList := strings.Split(props, ",") + if len(propList) == 3 { + err = xerrors.New("only decimal128 is supported") + return + } + + precision, _ := strconv.Atoi(propList[0]) + scale, _ := strconv.Atoi(propList[1]) + dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} + } + + if f[0] == '+' { // types with children + switch f[1] { + case 'l': // list + dt = arrow.ListOf(childFields[0].Type) + case 'w': // fixed size list is w:# where # is the list size. + listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) + if err != nil { + return ret, err + } + + dt = arrow.FixedSizeListOf(int32(listSize), childFields[0].Type) + case 's': // struct + dt = arrow.StructOf(childFields...) + case 'm': // map type is basically a list of structs. + st := childFields[0].Type.(*arrow.StructType) + dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) + dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0 + } + } + + if dt == nil { + // if we didn't find a type, then it's something we haven't implemented. + err = xerrors.New("unimplemented type") + } else { + ret.Type = dt + } + return +} + +// importer to keep track when importing C ArrowArray objects. +type cimporter struct { + dt arrow.DataType + arr *C.ArrowArray + data *array.Data + parent *cimporter + children []cimporter + cbuffers []*C.void +} + +func (imp *cimporter) importChild(parent *cimporter, src *C.ArrowArray) error { + imp.parent = parent + return imp.doImport(src) +} + +// import any child arrays for lists, structs, and so on. +func (imp *cimporter) doImportChildren() error { + var children []*C.ArrowArray + // create a proper slice for our children + s := (*reflect.SliceHeader)(unsafe.Pointer(&children)) + s.Data = uintptr(unsafe.Pointer(imp.arr.children)) + s.Len = int(imp.arr.n_children) + s.Cap = int(imp.arr.n_children) + + if len(children) > 0 { + imp.children = make([]cimporter, len(children)) + } + + // handle the cases + switch imp.dt.ID() { + case arrow.LIST: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.ListType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.FIXED_SIZE_LIST: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.STRUCT: // import all the children + st := imp.dt.(*arrow.StructType) + for i, c := range children { + imp.children[i].dt = st.Field(i).Type + imp.children[i].importChild(imp, c) + } + case arrow.MAP: // only one child to import, it's a struct array + imp.children[0].dt = imp.dt.(*arrow.MapType).ValueType() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + } + + return nil +} + +func (imp *cimporter) initarr() { + arr := C.get_arr() + imp.arr = &arr +} + +// import is called recursively as needed for importing an array and its children +// in order to generate array.Data objects +func (imp *cimporter) doImport(src *C.ArrowArray) error { + imp.initarr() + // move the array from the src object passed in to the one referenced by + // this importer. That way we can set up a finalizer on the created + // *array.Data object so we clean up our Array's memory when garbage collected. + C.ArrowArrayMove(src, imp.arr) + defer func(arr *C.ArrowArray) { + if imp.data != nil { + runtime.SetFinalizer(imp.data, func(*array.Data) { + C.ArrowArrayRelease(arr) + if C.ArrowArrayIsReleased(arr) != 1 { + panic("did not release C mem") + } + }) + } + }(imp.arr) + + // import any children + if err := imp.doImportChildren(); err != nil { + return err + } + + // get a view of the buffers, zero-copy. we're just looking at the pointers + const maxlen = 0x7fffffff + imp.cbuffers = (*[maxlen]*C.void)(unsafe.Pointer(imp.arr.buffers))[:imp.arr.n_buffers:imp.arr.n_buffers] + + // handle each of our type cases + switch dt := imp.dt.(type) { + case *arrow.NullType: + if err := imp.checkNoChildren(); err != nil { + return err + } + imp.data = array.NewData(dt, int(imp.arr.length), nil, nil, int(imp.arr.null_count), int(imp.arr.offset)) + case arrow.FixedWidthDataType: + return imp.importFixedSizePrimitive() + case *arrow.StringType: + return imp.importStringLike() + case *arrow.BinaryType: + return imp.importStringLike() + case *arrow.ListType: + return imp.importListLike() + case *arrow.MapType: + return imp.importListLike() + case *arrow.FixedSizeListType: + if err := imp.checkNumChildren(1); err != nil { + return err + } + + if err := imp.checkNumBuffers(1); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, []*array.Data{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + case *arrow.StructType: + if err := imp.checkNumBuffers(1); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + children := make([]*array.Data, len(imp.children)) + for i := range imp.children { + children[i] = imp.children[i].data + } + + imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) + default: + return xerrors.Errorf("unimplemented type %s", dt) + } + + return nil +} + +func (imp *cimporter) importStringLike() error { + if err := imp.checkNoChildren(); err != nil { + return err + } + + if err := imp.checkNumBuffers(3); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + offsets := imp.importOffsetsBuffer(1) + values := imp.importVariableValuesBuffer(2, 1, arrow.Int32Traits.CastFromBytes(offsets.Bytes())) + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) importListLike() error { + if err := imp.checkNumChildren(1); err != nil { + return err + } + + if err := imp.checkNumBuffers(2); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + offsets := imp.importOffsetsBuffer(1) + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []*array.Data{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) importFixedSizePrimitive() error { + if err := imp.checkNoChildren(); err != nil { + return err + } + + if err := imp.checkNumBuffers(2); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + var values *memory.Buffer + + fw := imp.dt.(arrow.FixedWidthDataType) + if bitutil.IsMultipleOf8(int64(fw.BitWidth())) { + values = imp.importFixedSizeBuffer(1, bitutil.BytesForBits(int64(fw.BitWidth()))) + } else { + if fw.BitWidth() != 1 { + return xerrors.New("invalid bitwidth") + } + values = imp.importBitsBuffer(1) + } + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) checkNoChildren() error { return imp.checkNumChildren(0) } + +func (imp *cimporter) checkNumChildren(n int64) error { + if int64(imp.arr.n_children) != n { + return xerrors.Errorf("expected %d children, for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_children) + } + return nil +} + +func (imp *cimporter) checkNumBuffers(n int64) error { + if int64(imp.arr.n_buffers) != n { + return xerrors.Errorf("expected %d buffers for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_buffers) + } + return nil +} + +func (imp *cimporter) importBuffer(bufferID int, sz int64) *memory.Buffer { + // this is not a copy, we're just having a slice which points at the data + // it's still owned by the C.ArrowArray object and its backing C++ object. + const maxLen = 0x7fffffff + data := (*[maxLen]byte)(unsafe.Pointer(imp.cbuffers[bufferID]))[:sz:sz] + return memory.NewBufferBytes(data) +} + +func (imp *cimporter) importBitsBuffer(bufferID int) *memory.Buffer { + bufsize := bitutil.BytesForBits(int64(imp.arr.length) + int64(imp.arr.offset)) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importNullBitmap(bufferID int) (*memory.Buffer, error) { + if imp.arr.null_count > 0 && imp.cbuffers[bufferID] == nil { + return nil, xerrors.Errorf("arrowarray struct has null bitmap buffer, but non-zero null_count %d", imp.arr.null_count) + } + + if imp.arr.null_count == 0 && imp.cbuffers[bufferID] == nil { + return nil, nil + } + + return imp.importBitsBuffer(bufferID), nil +} + +func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) *memory.Buffer { + bufsize := byteWidth * int64(imp.arr.length+imp.arr.offset) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importOffsetsBuffer(bufferID int) *memory.Buffer { + const offsetsize = int64(arrow.Int32SizeBytes) // go doesn't implement int64 offsets yet + bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth int, offsets []int32) *memory.Buffer { + bufsize := byteWidth * int(offsets[imp.arr.length]) + return imp.importBuffer(bufferID, int64(bufsize)) +} + +func importCArrayAsType(arr *C.ArrowArray, dt arrow.DataType) (imp *cimporter, err error) { + imp = &cimporter{dt: dt} + err = imp.doImport(arr) + return +} + +func initReader(rdr *nativeCRecordBatchReader, stream *C.ArrowArrayStream) { + st := C.get_stream() + rdr.stream = &st + C.ArrowArrayStreamMove(stream, rdr.stream) + runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { C.ArrowArrayStreamRelease(r.stream) }) +} + +// Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface +type nativeCRecordBatchReader struct { + stream *C.ArrowArrayStream + schema *arrow.Schema +} + +func (n *nativeCRecordBatchReader) getError(errno int) error { + return xerrors.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(n.stream))) +} + +func (n *nativeCRecordBatchReader) Read() (array.Record, error) { + if n.schema == nil { + var sc C.ArrowSchema + errno := C.stream_get_schema(n.stream, &sc) + if errno != 0 { + return nil, n.getError(int(errno)) + } + defer C.ArrowSchemaRelease(&sc) + s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) + if err != nil { + return nil, err + } + + n.schema = s + } + + arr := C.get_arr() + errno := C.stream_get_next(n.stream, &arr) + if errno != 0 { + return nil, n.getError(int(errno)) + } + + if C.ArrowArrayIsReleased(&arr) == 1 { + return nil, io.EOF + } + + return ImportCRecordBatchWithSchema(&arr, n.schema) +} diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c new file mode 100644 index 0000000000000..5c4ca49edf6f0 --- /dev/null +++ b/go/arrow/cdata/cdata_fulltest.c @@ -0,0 +1,379 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo +// +build test + +#include +#include +#include +#include +#include +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +#include "utils.h" + +static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; + +static void release_int32_type(struct ArrowSchema* schema) { + // mark released + schema->release = NULL; +} + +void export_int32_type(struct ArrowSchema* schema) { + const char* encoded_metadata; + if (is_little_endian() == 1) { + encoded_metadata = kEncodedMeta1LE; + } else { + encoded_metadata = kEncodedMeta1BE; + } + *schema = (struct ArrowSchema) { + // Type description + .format = "i", + .name = "", + .metadata = encoded_metadata, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_type, + }; +} + +static bool test1_released = false; + +int test1_is_released() { return test1_released; } + +static void release_int32_array(struct ArrowArray* array) { + assert(array->n_buffers == 2); + // free the buffers and buffers array + free((void *) array->buffers[1]); + free(array->buffers); + // mark released + array->release = NULL; + test1_released = true; +} + +void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { + // initialize primitive fields + *array = (struct ArrowArray) { + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_array + }; + + // allocate list of buffers + array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); + assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; +} + + +static void release_primitive(struct ArrowSchema* schema) { + free((void *)schema->format); + schema->release = NULL; +} + +static void release_nested_internal(struct ArrowSchema* schema, + int is_dynamic) { + assert(!ArrowSchemaIsReleased(schema)); + for (int i = 0; i < schema->n_children; ++i) { + ArrowSchemaRelease(schema->children[i]); + free(schema->children[i]); + } + if (is_dynamic) { + free((void*)schema->format); + free((void*)schema->name); + } + ArrowSchemaMarkReleased(schema); +} + +static void release_nested_static(struct ArrowSchema* schema) { + release_nested_internal(schema, /*is_dynamic=*/0); +} + +static void release_nested_dynamic(struct ArrowSchema* schema) { + release_nested_internal(schema, /*is_dynamic=*/1); +} + +static void release_nested_dynamic_toplevel(struct ArrowSchema* schema) { + assert(!ArrowSchemaIsReleased(schema)); + for (int i = 0; i < schema->n_children; ++i) { + ArrowSchemaRelease(schema->children[i]); + free(schema->children[i]); + } + free((void*)schema->format); + if (strlen(schema->name) > 0) { + free((void*)schema->name); + } + ArrowSchemaMarkReleased(schema); +} + +void test_primitive(struct ArrowSchema* schema, const char* fmt) { + *schema = (struct ArrowSchema) { + // Type description + .format = fmt, + .name = "", + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_primitive, + }; +} + +// Since test_lists et al. allocate an entirely array of ArrowSchema pointers, +// need to expose a function to free it. +void free_malloced_schemas(struct ArrowSchema** schemas) { + free(schemas); +} + +struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = 0, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested_dynamic, + }; + if (i != 0) { + schemas[i-1]->n_children = 1; + schemas[i-1]->children = &schemas[i]; + } + } + return schemas; +} + +struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = flags[i], + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested_dynamic, + }; + } + + schemas[0]->children = &schemas[1]; + schemas[0]->n_children = n-1; + return schemas; +} + +struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); + + if (is_little_endian() == 1) { + schemas[n-1]->metadata = kEncodedMeta2LE; + } else { + schemas[n-1]->metadata = kEncodedMeta2BE; + } + + return schemas; +} + +struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); + + if (is_little_endian() == 1) { + schemas[0]->metadata = kEncodedMeta2LE; + schemas[n-1]->metadata = kEncodedMeta1LE; + } else { + schemas[0]->metadata = kEncodedMeta2BE; + schemas[n-1]->metadata = kEncodedMeta1BE; + } + return schemas; +} + +struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = flags[i], + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested_dynamic, + }; + } + + schemas[0]->n_children = 1; + schemas[0]->children = &schemas[1]; + schemas[1]->n_children = n-2; + schemas[1]->children = &schemas[2]; + + return schemas; +} + +struct streamcounter { + int n; + int max; +}; + +static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { + out->children = malloc(sizeof(struct ArrowSchema*)*2); + out->n_children = 2; + + out->children[0] = malloc(sizeof(struct ArrowSchema)); + *out->children[0] = (struct ArrowSchema) { + .format = "i", + .name = "a", + .metadata = NULL, + .flags = ARROW_FLAG_NULLABLE, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested_static, + }; + + out->children[1] = malloc(sizeof(struct ArrowSchema)); + *out->children[1] = (struct ArrowSchema) { + .format = "u", + .name = "b", + .metadata = NULL, + .flags = ARROW_FLAG_NULLABLE, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested_static, + }; + + out->format = "+s"; + out->release = &release_nested_static; + + return 0; +} + +static void release_stream(struct ArrowArrayStream* st) { + free(st->private_data); + ArrowArrayStreamMarkReleased(st); +} + +static void release_the_array(struct ArrowArray* out) { + for (int i = 0; i < out->n_children; ++i) { + ArrowArrayRelease(out->children[i]); + } + free((void*)out->children); + free(out->buffers); + out->release = NULL; +} + +void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); + +static void release_str_array(struct ArrowArray* array) { + assert(array->n_buffers == 3); + free((void*) array->buffers[1]); + free((void*) array->buffers[2]); + free(array->buffers); + array->release = NULL; +} + +void export_str_array(const char* data, const int32_t* offsets, int64_t nitems, struct ArrowArray* out) { + *out = (struct ArrowArray) { + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 3, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_str_array + }; + + out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers); + assert(out->buffers != NULL); + out->buffers[0] = NULL; + out->buffers[1] = offsets; + out->buffers[2] = data; +} + +static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) { + struct streamcounter* cnter = (struct streamcounter*)(st->private_data); + if (cnter->n == cnter->max) { + ArrowArrayMarkReleased(out); + return 0; + } + + cnter->n++; + + *out = (struct ArrowArray) { + .offset = 0, + .dictionary = NULL, + .length = 3, + .null_count = 0, + .buffers = (const void**)malloc(sizeof(void*)), + .n_children = 2, + .n_buffers = 1, + .release = &release_the_array + }; + + out->buffers[0] = NULL; + out->children = (struct ArrowArray**)malloc(sizeof(struct ArrowArray*)*2); + int32_t* intdata = malloc(sizeof(int32_t)*3); + for (int i = 0; i < 3; ++i) { + intdata[i] = cnter->n * (i+1); + } + + out->children[0] = malloc(sizeof(struct ArrowArray)); + export_int32_array(intdata, 3, out->children[0]); + out->children[1] = malloc(sizeof(struct ArrowArray)); + char* strdata = strdup("foobarbaz"); + int32_t* offsets = malloc(sizeof(int32_t)*4); + offsets[0] = 0; + offsets[1] = 3; + offsets[2] = 6; + offsets[3] = 9; + export_str_array(strdata, offsets, 3, out->children[1]); + + return 0; +} + +void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) { + struct streamcounter* cnt = malloc(sizeof(struct streamcounter)); + cnt->max = n_batches; + cnt->n = 0; + + out->get_next = &next_record; + out->get_schema = &stream_schema; + out->release = &release_stream; + out->private_data = cnt; +} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go new file mode 100644 index 0000000000000..e5e43deddb732 --- /dev/null +++ b/go/arrow/cdata/cdata_test.go @@ -0,0 +1,621 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo +// +build test + +// use test tag so that we only run these tests when the "test" tag is present +// so that the .c and other framework infrastructure is only compiled in during +// testing, and the .c files and symbols are not present in release builds. + +package cdata + +import ( + "io" + "runtime" + "testing" + "time" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/decimal128" + "github.com/apache/arrow/go/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestSchemaExport(t *testing.T) { + sc := exportInt32TypeSchema() + f, err := importSchema(&sc) + assert.NoError(t, err) + + keys, _ := getMetadataKeys() + vals, _ := getMetadataValues() + + assert.Equal(t, arrow.PrimitiveTypes.Int32, f.Type) + assert.Equal(t, keys, f.Metadata.Keys()) + assert.Equal(t, vals, f.Metadata.Values()) + + // schema was released when importing + assert.True(t, schemaIsReleased(&sc)) +} + +func TestSimpleArrayExport(t *testing.T) { + assert.False(t, test1IsReleased()) + + testarr := exportInt32Array() + arr, err := ImportCArrayWithType(&testarr, arrow.PrimitiveTypes.Int32) + assert.NoError(t, err) + + assert.False(t, test1IsReleased()) + assert.True(t, isReleased(&testarr)) + + arr.Release() + runtime.GC() + assert.Eventually(t, test1IsReleased, 1*time.Second, 10*time.Millisecond) +} + +func TestSimpleArrayAndSchema(t *testing.T) { + sc := exportInt32TypeSchema() + testarr := exportInt32Array() + + // grab address of the buffer we stuck into the ArrowArray object + buflist := (*[2]unsafe.Pointer)(unsafe.Pointer(testarr.buffers)) + origvals := (*[10]int32)(unsafe.Pointer(buflist[1])) + + fld, arr, err := ImportCArray(&testarr, &sc) + assert.NoError(t, err) + assert.Equal(t, arrow.PrimitiveTypes.Int32, fld.Type) + assert.EqualValues(t, 10, arr.Len()) + + // verify that the address is the same of the first integer for the + // slice that is being used by the array.Interface and the original buffer + vals := arr.(*array.Int32).Int32Values() + assert.Same(t, &vals[0], &origvals[0]) + + // and that the values are correct + for i, v := range vals { + assert.Equal(t, int32(i+1), v) + } +} + +func TestPrimitiveSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmt string + }{ + {arrow.PrimitiveTypes.Int8, "c"}, + {arrow.PrimitiveTypes.Int16, "s"}, + {arrow.PrimitiveTypes.Int32, "i"}, + {arrow.PrimitiveTypes.Int64, "l"}, + {arrow.PrimitiveTypes.Uint8, "C"}, + {arrow.PrimitiveTypes.Uint16, "S"}, + {arrow.PrimitiveTypes.Uint32, "I"}, + {arrow.PrimitiveTypes.Uint64, "L"}, + {arrow.FixedWidthTypes.Boolean, "b"}, + {arrow.Null, "n"}, + {arrow.FixedWidthTypes.Float16, "e"}, + {arrow.PrimitiveTypes.Float32, "f"}, + {arrow.PrimitiveTypes.Float64, "g"}, + {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, + {arrow.BinaryTypes.Binary, "z"}, + {arrow.BinaryTypes.String, "u"}, + {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, + {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, + {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testPrimitive(tt.fmt) + + f, err := ImportCArrowField(&sc) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + assert.True(t, schemaIsReleased(&sc)) + }) + } +} + +func TestImportTemporalSchema(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmt string + }{ + {arrow.FixedWidthTypes.Date32, "tdD"}, + {arrow.FixedWidthTypes.Date64, "tdm"}, + {arrow.FixedWidthTypes.Time32s, "tts"}, + {arrow.FixedWidthTypes.Time32ms, "ttm"}, + {arrow.FixedWidthTypes.Time64us, "ttu"}, + {arrow.FixedWidthTypes.Time64ns, "ttn"}, + {arrow.FixedWidthTypes.Duration_s, "tDs"}, + {arrow.FixedWidthTypes.Duration_ms, "tDm"}, + {arrow.FixedWidthTypes.Duration_us, "tDu"}, + {arrow.FixedWidthTypes.Duration_ns, "tDn"}, + {arrow.FixedWidthTypes.MonthInterval, "tiM"}, + {arrow.FixedWidthTypes.DayTimeInterval, "tiD"}, + {arrow.FixedWidthTypes.Timestamp_s, "tss:"}, + {&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Europe/Paris"}, "tss:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_ms, "tsm:"}, + {&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Europe/Paris"}, "tsm:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_us, "tsu:"}, + {&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Europe/Paris"}, "tsu:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_ns, "tsn:"}, + {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Europe/Paris"}, "tsn:Europe/Paris"}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testPrimitive(tt.fmt) + + f, err := ImportCArrowField(&sc) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + assert.True(t, schemaIsReleased(&sc)) + }) + } +} + +func TestListSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmts []string + names []string + }{ + {arrow.ListOf(arrow.PrimitiveTypes.Int8), []string{"+l", "c"}, []string{"", "item"}}, + {arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int64), []string{"+w:2", "l"}, []string{"", "item"}}, + {arrow.ListOf(arrow.ListOf(arrow.PrimitiveTypes.Int32)), []string{"+l", "+l", "i"}, []string{"", "item", "item"}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testNested(tt.fmts, tt.names) + defer freeMallocedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestStructSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmts []string + names []string + flags []int64 + }{ + {arrow.StructOf( + arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: metadata2}, + ), []string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{flagIsNullable, flagIsNullable, flagIsNullable}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testStruct(tt.fmts, tt.names, tt.flags) + defer freeMallocedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestMapSchemas(t *testing.T) { + tests := []struct { + typ *arrow.MapType + keysSorted bool + fmts []string + names []string + flags []int64 + }{ + {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), false, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable, 0, 0, flagIsNullable}}, + {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), true, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable | flagMapKeysSorted, 0, 0, flagIsNullable}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testMap(tt.fmts, tt.names, tt.flags) + defer freeMallocedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + tt.typ.KeysSorted = tt.keysSorted + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestSchema(t *testing.T) { + // schema is exported as an equivalent struct type (+ top-level metadata) + sc := arrow.NewSchema([]arrow.Field{ + {Name: "nulls", Type: arrow.Null, Nullable: false}, + {Name: "values", Type: arrow.PrimitiveTypes.Int64, Nullable: true, Metadata: metadata1}, + }, &metadata2) + + cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) + defer freeMallocedSchemas(cst) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] + out, err := ImportCArrowSchema(top) + assert.NoError(t, err) + + assert.True(t, sc.Equal(out)) + assert.True(t, sc.Metadata().Equal(out.Metadata())) + + assert.True(t, schemaIsReleased(top)) +} + +func createTestInt8Arr() array.Interface { + bld := array.NewInt8Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int8{1, 2, 0, -3}, []bool{true, true, false, true}) + return bld.NewInt8Array() +} + +func createTestInt16Arr() array.Interface { + bld := array.NewInt16Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int16{1, 2, -3}, []bool{true, true, true}) + return bld.NewInt16Array() +} + +func createTestInt32Arr() array.Interface { + bld := array.NewInt32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int32{1, 2, 0, -3}, []bool{true, true, false, true}) + return bld.NewInt32Array() +} + +func createTestInt64Arr() array.Interface { + bld := array.NewInt64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int64{1, 2, -3}, []bool{true, true, true}) + return bld.NewInt64Array() +} + +func createTestUint8Arr() array.Interface { + bld := array.NewUint8Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint8{1, 2, 0, 3}, []bool{true, true, false, true}) + return bld.NewUint8Array() +} + +func createTestUint16Arr() array.Interface { + bld := array.NewUint16Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint16{1, 2, 3}, []bool{true, true, true}) + return bld.NewUint16Array() +} + +func createTestUint32Arr() array.Interface { + bld := array.NewUint32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint32{1, 2, 0, 3}, []bool{true, true, false, true}) + return bld.NewUint32Array() +} + +func createTestUint64Arr() array.Interface { + bld := array.NewUint64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint64{1, 2, 3}, []bool{true, true, true}) + return bld.NewUint64Array() +} + +func createTestBoolArr() array.Interface { + bld := array.NewBooleanBuilder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]bool{true, false, false}, []bool{true, true, false}) + return bld.NewBooleanArray() +} + +func createTestNullArr() array.Interface { + return array.NewNull(2) +} + +func createTestFloat32Arr() array.Interface { + bld := array.NewFloat32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]float32{1.5, 0}, []bool{true, false}) + return bld.NewFloat32Array() +} + +func createTestFloat64Arr() array.Interface { + bld := array.NewFloat64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]float64{1.5, 0}, []bool{true, false}) + return bld.NewFloat64Array() +} + +func createTestFSBArr() array.Interface { + bld := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 3}) + defer bld.Release() + + bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) + return bld.NewFixedSizeBinaryArray() +} + +func createTestBinaryArr() array.Interface { + bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) + defer bld.Release() + + bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) + return bld.NewBinaryArray() +} + +func createTestStrArr() array.Interface { + bld := array.NewStringBuilder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) + return bld.NewStringArray() +} + +func createTestDecimalArr() array.Interface { + bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) + defer bld.Release() + + bld.AppendValues([]decimal128.Num{decimal128.FromU64(12345670), decimal128.FromU64(0)}, []bool{true, false}) + return bld.NewDecimal128Array() +} + +func TestPrimitiveArrs(t *testing.T) { + tests := []struct { + name string + fn func() array.Interface + }{ + {"int8", createTestInt8Arr}, + {"uint8", createTestUint8Arr}, + {"int16", createTestInt16Arr}, + {"uint16", createTestUint16Arr}, + {"int32", createTestInt32Arr}, + {"uint32", createTestUint32Arr}, + {"int64", createTestInt64Arr}, + {"uint64", createTestUint64Arr}, + {"bool", createTestBoolArr}, + {"null", createTestNullArr}, + {"float32", createTestFloat32Arr}, + {"float64", createTestFloat64Arr}, + {"fixed size binary", createTestFSBArr}, + {"binary", createTestBinaryArr}, + {"utf8", createTestStrArr}, + {"decimal128", createTestDecimalArr}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + arr := tt.fn() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(arr, imported)) + assert.True(t, isReleased(carr)) + + imported.Release() + }) + } +} + +func TestPrimitiveSliced(t *testing.T) { + arr := createTestInt16Arr() + defer arr.Release() + + sl := array.NewSlice(arr, 1, 2) + defer sl.Release() + + carr := createCArr(sl) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(sl, imported)) + assert.True(t, array.ArraySliceEqual(arr, 1, 2, imported, 0, int64(imported.Len()))) + assert.True(t, isReleased(carr)) + + imported.Release() +} + +func createTestListArr() array.Interface { + bld := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) + defer bld.Release() + + vb := bld.ValueBuilder().(*array.Int8Builder) + + bld.Append(true) + vb.AppendValues([]int8{1, 2}, []bool{true, true}) + + bld.Append(true) + vb.AppendValues([]int8{3, 0}, []bool{true, false}) + + bld.AppendNull() + + return bld.NewArray() +} + +func createTestFixedSizeList() array.Interface { + bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) + defer bld.Release() + + vb := bld.ValueBuilder().(*array.Int64Builder) + + bld.Append(true) + vb.AppendValues([]int64{1, 2}, []bool{true, true}) + + bld.Append(true) + vb.AppendValues([]int64{3, 0}, []bool{true, false}) + + bld.AppendNull() + return bld.NewArray() +} + +func createTestStructArr() array.Interface { + bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( + arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, + )) + defer bld.Release() + + f1bld := bld.FieldBuilder(0).(*array.Int8Builder) + f2bld := bld.FieldBuilder(1).(*array.StringBuilder) + + bld.Append(true) + f1bld.Append(1) + f2bld.Append("foo") + + bld.Append(true) + f1bld.Append(2) + f2bld.AppendNull() + + return bld.NewArray() +} + +func createTestMapArr() array.Interface { + bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) + defer bld.Release() + + kb := bld.KeyBuilder().(*array.Int8Builder) + vb := bld.ItemBuilder().(*array.StringBuilder) + + bld.Append(true) + kb.Append(1) + vb.Append("foo") + kb.Append(2) + vb.AppendNull() + + bld.Append(true) + kb.Append(3) + vb.Append("bar") + + return bld.NewArray() +} + +func TestNestedArrays(t *testing.T) { + tests := []struct { + name string + fn func() array.Interface + }{ + {"list", createTestListArr}, + {"fixed size list", createTestFixedSizeList}, + {"struct", createTestStructArr}, + {"map", createTestMapArr}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + arr := tt.fn() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(arr, imported)) + assert.True(t, isReleased(carr)) + + imported.Release() + }) + } +} + +func TestRecordBatch(t *testing.T) { + arr := createTestStructArr() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) + defer freeMallocedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + rb, err := ImportCRecordBatch(carr, top) + assert.NoError(t, err) + defer rb.Release() + + assert.EqualValues(t, 2, rb.NumCols()) + rbschema := rb.Schema() + assert.Equal(t, "a", rbschema.Field(0).Name) + assert.Equal(t, "b", rbschema.Field(1).Name) + + rec := array.NewRecord(rbschema, []array.Interface{arr.(*array.Struct).Field(0), arr.(*array.Struct).Field(1)}, -1) + defer rec.Release() + + assert.True(t, array.RecordEqual(rb, rec)) +} + +func TestRecordReaderStream(t *testing.T) { + stream := arrayStreamTest() + defer releaseStream(stream) + + rdr := ImportCArrayStream(stream, nil) + i := 0 + for { + rec, err := rdr.Read() + if err != nil { + if err == io.EOF { + break + } + assert.NoError(t, err) + } + defer rec.Release() + + assert.EqualValues(t, 2, rec.NumCols()) + assert.Equal(t, "a", rec.ColumnName(0)) + assert.Equal(t, "b", rec.ColumnName(1)) + i++ + for j := 0; j < int(rec.NumRows()); j++ { + assert.Equal(t, int32((j+1)*i), rec.Column(0).(*array.Int32).Value(j)) + } + assert.Equal(t, "foo", rec.Column(1).(*array.String).Value(0)) + assert.Equal(t, "bar", rec.Column(1).(*array.String).Value(1)) + assert.Equal(t, "baz", rec.Column(1).(*array.String).Value(2)) + } +} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go new file mode 100644 index 0000000000000..c9ccd74e0a2ee --- /dev/null +++ b/go/arrow/cdata/cdata_test_framework.go @@ -0,0 +1,248 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build test + +package cdata + +// #include +// #include +// #include "arrow/c/abi.h" +// #include "arrow/c/helpers.h" +// +// void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); +// struct ArrowArray* get_test_arr() { return (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); } +// struct ArrowArrayStream* get_test_stream() { return (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); } +// +// void release_test_arr(struct ArrowArray* arr) { +// for (int i = 0; i < arr->n_buffers; ++i) { +// free((void*)arr->buffers[i]); +// } +// ArrowArrayMarkReleased(arr); +// } +// +// int32_t* get_data() { +// int32_t* data = malloc(sizeof(int32_t)*10); +// for (int i = 0; i < 10; ++i) { data[i] = i+1; } +// return data; +// } +// void export_int32_type(struct ArrowSchema* schema); +// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); +// int test1_is_released(); +// void test_primitive(struct ArrowSchema* schema, const char* fmt); +// void free_malloced_schemas(struct ArrowSchema**); +// struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n); +// struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); +// struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); +// struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n); +import "C" +import ( + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" +) + +const ( + flagIsNullable = C.ARROW_FLAG_NULLABLE + flagMapKeysSorted = C.ARROW_FLAG_MAP_KEYS_SORTED +) + +var ( + metadata1 = arrow.NewMetadata([]string{"key1", "key2"}, []string{"", "bar"}) + metadata2 = arrow.NewMetadata([]string{"key"}, []string{"abcde"}) +) + +func exportInt32TypeSchema() CArrowSchema { + var s CArrowSchema + C.export_int32_type(&s) + return s +} + +func releaseStream(s *CArrowArrayStream) { + C.ArrowArrayStreamRelease(s) +} + +func releaseSchema(s *CArrowSchema) { + C.ArrowSchemaRelease(s) +} + +func schemaIsReleased(s *CArrowSchema) bool { + return C.ArrowSchemaIsReleased(s) == 1 +} + +func getMetadataKeys() ([]string, []string) { + return []string{"key1", "key2"}, []string{"key"} +} + +func getMetadataValues() ([]string, []string) { + return []string{"", "bar"}, []string{"abcde"} +} + +func exportInt32Array() CArrowArray { + var arr CArrowArray + C.export_int32_array(C.get_data(), C.int64_t(10), &arr) + return arr +} + +func isReleased(arr *CArrowArray) bool { + return C.ArrowArrayIsReleased(arr) == 1 +} + +func test1IsReleased() bool { + return C.test1_is_released() == 1 +} + +func testPrimitive(fmtstr string) CArrowSchema { + var s CArrowSchema + fmt := C.CString(fmtstr) + C.test_primitive(&s, fmt) + return s +} + +func freeMallocedSchemas(schemas **CArrowSchema) { + C.free_malloced_schemas(schemas) +} + +func testNested(fmts, names []string) **CArrowSchema { + if len(fmts) != len(names) { + panic("testing nested lists must have same size fmts and names") + } + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + } + + return C.test_lists((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), C.int(len(fmts))) +} + +func testStruct(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing structs must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_struct((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func testMap(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing maps must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_map((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func testSchema(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing structs must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_schema((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func freeTestArr(carr *CArrowArray) { + C.free(unsafe.Pointer(carr)) +} + +func createCArr(arr array.Interface) *CArrowArray { + var ( + carr = C.get_test_arr() + children = (**CArrowArray)(nil) + nchildren = C.int64_t(0) + ) + + switch arr := arr.(type) { + case *array.List: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + case *array.FixedSizeList: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + case *array.Struct: + clist := []*CArrowArray{} + for i := 0; i < arr.NumField(); i++ { + clist = append(clist, createCArr(arr.Field(i))) + nchildren += 1 + } + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + case *array.Map: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + } + + carr.children = children + carr.n_children = nchildren + carr.dictionary = nil + carr.length = C.int64_t(arr.Len()) + carr.null_count = C.int64_t(arr.NullN()) + carr.offset = C.int64_t(arr.Data().Offset()) + buffers := arr.Data().Buffers() + cbuf := []unsafe.Pointer{} + for _, b := range buffers { + if b != nil { + cbuf = append(cbuf, C.CBytes(b.Bytes())) + } + } + carr.n_buffers = C.int64_t(len(cbuf)) + if len(cbuf) > 0 { + carr.buffers = &cbuf[0] + } + carr.release = (*[0]byte)(C.release_test_arr) + + return carr +} + +func arrayStreamTest() *CArrowArrayStream { + st := C.get_test_stream() + C.setup_array_stream_test(2, st) + return st +} diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go new file mode 100644 index 0000000000000..d6258a5abbe5c --- /dev/null +++ b/go/arrow/cdata/interface.go @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package cdata + +import ( + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/arrio" + "golang.org/x/xerrors" +) + +// ImportCArrowField takes in an ArrowSchema from the C Data interface, it +// will copy the metadata and type definitions rather than keep direct references +// to them. It is safe to call C.ArrowSchemaRelease after receiving the field +// from this function. +func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { + return importSchema(out) +} + +// ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it +// will copy the metadata and schema definitions over from the C object rather +// than keep direct references to them. This function will call ArrowSchemaRelease +// on the passed in schema regardless of whether or not there is an error returned. +// +// This version is intended to take in a schema for a record batch, which means +// that the top level of the schema should be a struct of the schema fields. If +// importing a single array's schema, then use ImportCArrowField instead. +func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { + ret, err := importSchema(out) + if err != nil { + return nil, err + } + + return arrow.NewSchema(ret.Type.(*arrow.StructType).Fields(), &ret.Metadata), nil +} + +// ImportCArrayWithType takes a pointer to a C Data ArrowArray and interprets the values +// as an array with the given datatype. If err is not nil, then ArrowArrayRelease must still +// be called on arr to release the memory. +// +// The underlying buffers will not be copied, but will instead be referenced directly +// by the resulting array interface object. The passed in ArrowArray will have it's ownership +// transferred to the resulting array.Interface via ArrowArrayMove. The underlying array.Data +// object that is owned by the Array will now be the owner of the memory pointer and +// will call ArrowArrayRelease when it is released and garbage collected via runtime.SetFinalizer. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (array.Interface, error) { + imp, err := importCArrayAsType(arr, dt) + if err != nil { + return nil, err + } + defer imp.data.Release() + return array.MakeFromData(imp.data), nil +} + +// ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order +// to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still +// be called on arr to release the memory. The ArrowSchemaRelease will be called on the passed in +// schema regardless of whether there is an error or not. +// +// The Schema will be copied with the information used to populate the returned Field, complete +// with metadata. The array will reference the same memory that is referred to by the ArrowArray +// object and take ownership of it as per ImportCArrayWithType. The returned array.Interface will +// own the C memory and call ArrowArrayRelease when the array.Data object is cleaned up. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCArray(arr *CArrowArray, schema *CArrowSchema) (arrow.Field, array.Interface, error) { + field, err := importSchema(schema) + if err != nil { + return field, nil, err + } + + ret, err := ImportCArrayWithType(arr, field.Type) + return field, ret, err +} + +// ImportCRecordBatchWithSchema is used for importing a Record Batch array when the schema +// is already known such as when receiving record batches through a stream. +// +// All of the semantics regarding memory ownership are the same as when calling +// ImportCRecordBatch directly with a schema. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCRecordBatchWithSchema(arr *CArrowArray, sc *arrow.Schema) (array.Record, error) { + imp, err := importCArrayAsType(arr, arrow.StructOf(sc.Fields()...)) + if err != nil { + return nil, err + } + + st := array.NewStructData(imp.data) + defer st.Release() + + // now that we have our fields, we can split them out into the slice of arrays + // and construct a record batch from them to return. + cols := make([]array.Interface, st.NumField()) + for i := 0; i < st.NumField(); i++ { + cols[i] = st.Field(i) + } + + return array.NewRecord(sc, cols, int64(st.Len())), nil +} + +// ImportCRecordBatch imports an ArrowArray from C as a record batch. If err is not nil, +// then ArrowArrayRelease must still be called to release the memory. +// +// A record batch is represented in the C Data Interface as a Struct Array whose fields +// are the columns of the record batch. Thus after importing the schema passed in here, +// if it is not a Struct type, this will return an error. As with ImportCArray, the +// columns in the record batch will take ownership of the CArrowArray memory if successful. +// Since ArrowArrayMove is used, it's still safe to call ArrowArrayRelease on the source +// regardless. But if there is an error, it *MUST* be called to ensure there is no memory leak. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCRecordBatch(arr *CArrowArray, sc *CArrowSchema) (array.Record, error) { + field, err := importSchema(sc) + if err != nil { + return nil, err + } + + if field.Type.ID() != arrow.STRUCT { + return nil, xerrors.New("recordbatch array import must be of struct type") + } + + return ImportCRecordBatchWithSchema(arr, arrow.NewSchema(field.Type.(*arrow.StructType).Fields(), &field.Metadata)) +} + +// ImportCArrayStream creates an arrio.Reader from an ArrowArrayStream taking ownership +// of the underlying stream object via ArrowArrayStreamMove. +// +// The records returned by this reader must be released manually after they are returned. +// The reader itself will release the stream via SetFinalizer when it is garbage collected. +// It will return (nil, io.EOF) from the Read function when there are no more records to return. +// +// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, +// it does not take ownership of the actual stream object itself. +func ImportCArrayStream(stream *CArrowArrayStream, schema *arrow.Schema) arrio.Reader { + out := &nativeCRecordBatchReader{schema: schema} + initReader(out, stream) + return out +} diff --git a/go/arrow/cdata/test/go.mod b/go/arrow/cdata/test/go.mod new file mode 100644 index 0000000000000..153b1272db590 --- /dev/null +++ b/go/arrow/cdata/test/go.mod @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module cdatatest + +go 1.15 + +replace github.com/apache/arrow/go/arrow => ../../ + +require github.com/apache/arrow/go/arrow v0.0.0-00010101000000-000000000000 diff --git a/go/arrow/cdata/test/go.sum b/go/arrow/cdata/test/go.sum new file mode 100644 index 0000000000000..9cbf776cb2c1c --- /dev/null +++ b/go/arrow/cdata/test/go.sum @@ -0,0 +1,150 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v2.0.0+incompatible h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI= +github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= +github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4= +github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go new file mode 100644 index 0000000000000..0fc5f86ea6f75 --- /dev/null +++ b/go/arrow/cdata/test/test_cimport.go @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cdata_test + +package main + +import ( + "fmt" + "runtime" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/cdata" + "github.com/apache/arrow/go/arrow/memory" +) + +// #include +import "C" + +//export runGC +func runGC() { + runtime.GC() +} + +//export importSchema +func importSchema(ptr uintptr) { + sc := (*cdata.CArrowSchema)(unsafe.Pointer(ptr)) + + schema, err := cdata.ImportCArrowSchema(sc) + if err != nil { + panic(err) + } + + expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) + expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) + if !schema.Equal(expectedSchema) { + panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) + } + if !schema.Metadata().Equal(expectedMetadata) { + panic(fmt.Sprintf("metadata didn't match: expected %s, got %s", expectedMetadata, schema.Metadata())) + } + + fmt.Println("schema matches! Huzzah!") +} + +//export importRecordBatch +func importRecordBatch(scptr, rbptr uintptr) { + sc := (*cdata.CArrowSchema)(unsafe.Pointer(scptr)) + rb := (*cdata.CArrowArray)(unsafe.Pointer(rbptr)) + + rec, err := cdata.ImportCRecordBatch(rb, sc) + if err != nil { + panic(err) + } + defer rec.Release() + + expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) + expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, expectedSchema) + defer bldr.Release() + + lb := bldr.Field(0).(*array.ListBuilder) + vb := lb.ValueBuilder().(*array.Int32Builder) + + // [[[1], [], None [2, 42]]] + lb.Append(true) + vb.Append(int32(1)) + + lb.Append(true) + lb.Append(false) + + lb.Append(true) + vb.AppendValues([]int32{2, 42}, nil) + + expectedRec := bldr.NewRecord() + defer expectedRec.Release() + + if !array.RecordEqual(expectedRec, rec) { + panic(fmt.Sprintf("records didn't match: expected %s\n got %s", expectedRec, rec)) + } + + fmt.Println("record batch matches huzzah!") +} + +func main() {} diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py new file mode 100644 index 0000000000000..118aebf035a1e --- /dev/null +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +import gc +import os +import unittest + +import pyarrow as pa +from pyarrow.cffi import ffi + + +def load_cgotest(): + # XXX what about Darwin? + libext = 'so' + if os.name == 'nt': + libext = 'dll' + + ffi.cdef( + """ + void importSchema(uintptr_t ptr); + void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); + void runGC(); + """) + return ffi.dlopen(f'./cgotest.{libext}') + + +cgotest = load_cgotest() + + +class TestPythonToGo(unittest.TestCase): + + def setUp(self): + self.c_schema = ffi.new("struct ArrowSchema*") + self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) + self.c_array = ffi.new("struct ArrowArray*") + self.ptr_array = int(ffi.cast("uintptr_t", self.c_array)) + + def make_schema(self): + return pa.schema([('ints', pa.list_(pa.int32()))], + metadata={b'key1': b'value1'}) + + def make_batch(self): + return pa.record_batch([[[1], [], None, [2, 42]]], + self.make_schema()) + + def run_gc(self): + # Several Go GC runs can be required to run all finalizers + for i in range(5): + cgotest.runGC() + gc.collect() + + @contextlib.contextmanager + def assert_pyarrow_memory_released(self): + self.run_gc() + old_allocated = pa.total_allocated_bytes() + yield + self.run_gc() + diff = pa.total_allocated_bytes() - old_allocated + self.assertEqual( + pa.total_allocated_bytes(), old_allocated, + f"PyArrow memory was not adequately released: {diff} bytes lost") + + def test_schema(self): + with self.assert_pyarrow_memory_released(): + self.make_schema()._export_to_c(self.ptr_schema) + # Will panic if expectations are not met + cgotest.importSchema(self.ptr_schema) + + def test_record_batch(self): + with self.assert_pyarrow_memory_released(): + self.make_schema()._export_to_c(self.ptr_schema) + self.make_batch()._export_to_c(self.ptr_array) + # Will panic if expectations are not met + cgotest.importRecordBatch(self.ptr_schema, self.ptr_array) + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h new file mode 100644 index 0000000000000..f38281057b495 --- /dev/null +++ b/go/arrow/cdata/utils.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo +// +build test + +/* + Function check_for_endianness() returns 1, if architecture + is little endian, 0 in case of big endian. +*/ +inline int is_little_endian() +{ + unsigned int x = 1; + char *c = (char*) &x; + return (int)*c; +} + +// metadata keys 1: {"key1", "key2"} +// metadata values 1: {"", "bar"} +static const char kEncodedMeta1LE[] = { + 2, 0, 0, 0, + 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, + 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, 0, 0, 0, 'b', 'a', 'r'}; + +static const char kEncodedMeta1BE[] = { + 0, 0, 0, 2, + 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, + 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, 0, 0, 3, 'b', 'a', 'r'}; + +static const char* kMetadataKeys2[] = {"key"}; +static const char* kMetadataValues2[] = {"abcde"}; + +// metadata keys 2: {"key"} +// metadata values 2: {"abcde"} +static const char kEncodedMeta2LE[] = { + 1, 0, 0, 0, + 3, 0, 0, 0, 'k', 'e', 'y', 5, 0, 0, 0, 'a', 'b', 'c', 'd', 'e'}; + +static const char kEncodedMeta2BE[] = { + 0, 0, 0, 1, + 0, 0, 0, 3, 'k', 'e', 'y', 0, 0, 0, 5, 'a', 'b', 'c', 'd', 'e'}; + + diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index 79bb878f36ae6..c90ee686b2ee6 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -132,7 +132,7 @@ type Decimal128Type struct { func (*Decimal128Type) ID() Type { return DECIMAL } func (*Decimal128Type) Name() string { return "decimal" } -func (*Decimal128Type) BitWidth() int { return 16 } +func (*Decimal128Type) BitWidth() int { return 128 } func (t *Decimal128Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index 3f5153c5024c0..3349703b40b7e 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -53,7 +53,7 @@ func TestDecimal128Type(t *testing.T) { } { t.Run(tc.want, func(t *testing.T) { dt := arrow.Decimal128Type{Precision: tc.precision, Scale: tc.scale} - if got, want := dt.BitWidth(), 16; got != want { + if got, want := dt.BitWidth(), 128; got != want { t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) } @@ -80,7 +80,7 @@ func TestFixedSizeBinaryType(t *testing.T) { } { t.Run(tc.want, func(t *testing.T) { dt := arrow.FixedSizeBinaryType{tc.byteWidth} - if got, want := dt.BitWidth(), 8 * tc.byteWidth; got != want { + if got, want := dt.BitWidth(), 8*tc.byteWidth; got != want { t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) } @@ -101,9 +101,9 @@ func TestFixedSizeBinaryType(t *testing.T) { func TestTimestampType(t *testing.T) { for _, tc := range []struct { - unit arrow.TimeUnit - timeZone string - want string + unit arrow.TimeUnit + timeZone string + want string }{ {arrow.Nanosecond, "CST", "timestamp[ns, tz=CST]"}, {arrow.Microsecond, "EST", "timestamp[us, tz=EST]"},