Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-14061: [Go][C++] Add Cgo Arrow Memory Pool Allocator #11206

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e91c738
implementation
Sep 21, 2021
1c6e8c7
ensure base package builds with cgo disabled and cgo image builds
Sep 21, 2021
558a3e3
add tag ccalloc to properly test
Sep 21, 2021
570e648
macos using old bash doesn't have -v test
Sep 21, 2021
842c5f8
forgot to fix go_test for macos
Sep 21, 2021
aed85a3
macos test with cgo
Sep 21, 2021
e7bf430
add -std flag to cxx compiler
Sep 21, 2021
6cf5559
don't static link on windows yet
Sep 21, 2021
3a48d1a
setup ming paths
Sep 21, 2021
e3f36f8
fixup tests
Sep 21, 2021
968d3c3
Merge branch 'apache:master' into arrow-14061
zeroshade Sep 21, 2021
37fd8fb
Merge branch 'apache:master' into arrow-14061
zeroshade Sep 27, 2021
c37df43
Merge branch 'apache:master' into arrow-14061
zeroshade Sep 27, 2021
b0e643d
adding a bunch of comments and docs for the allocator and helpers
Sep 28, 2021
21d159f
Merge branch 'arrow-14061' of https://github.com/zeroshade/arrow into…
Sep 28, 2021
70770e8
comment about costs of small allocations with cgo
Sep 28, 2021
90fbeab
Merge branch 'apache:master' into arrow-14061
zeroshade Sep 29, 2021
8edd1f9
default memory pool and comments for clarification. don't leak 0 byte…
Sep 29, 2021
3608d0a
Merge branch 'arrow-14061' of https://github.com/zeroshade/arrow into…
Sep 29, 2021
3afa43b
use default allocator, simplify the mem_holder
Sep 29, 2021
9558fc8
cleanup dockerfile and comments
Sep 29, 2021
6547b24
update to debian 11 via base argument
Sep 29, 2021
b33c65f
move cgo env vars to msys2_setup.sh
Sep 30, 2021
a13bacd
add ming_prefix/bin to path
Sep 30, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,40 @@ jobs:
continue-on-error: true
run: archery docker push debian-go

docker_cgo:
name: AMD64 Debian 10 GO ${{ matrix.go }} - CGO
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
go: [1.15]
env:
GO: ${{ matrix.go }}
steps:
- name: Checkout Arrow
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Fetch Submodules and Tags
run: ci/scripts/util_checkout.sh
- name: Free Up Disk Space
run: ci/scripts/util_cleanup.sh
- name: Setup Python
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
run: archery docker run debian-go-cgo
- name: Docker Push
if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
continue-on-error: true
run: archery docker push debian-go-cgo


docker_cgo_python:
name: AMD64 Debian 11 GO ${{ matrix.go }} - CGO Python
runs-on: ubuntu-latest
Expand Down Expand Up @@ -163,3 +197,91 @@ jobs:
- name: Test
shell: bash
run: ci/scripts/go_test.sh .

macos-cgo:
name: AMD64 MacOS 10.15 Go ${{ matrix.go }} - CGO
runs-on: macos-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
go: [1.15]
env:
ARROW_GO_TESTCGO: "1"
steps:
- name: Install go
uses: actions/setup-go@v1
with:
go-version: ${{ matrix.go }}
- name: Checkout Arrow
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Fetch Submodules and Tags
shell: bash
run: ci/scripts/util_checkout.sh
- name: Brew Install Arrow
shell: bash
run: brew install apache-arrow
- name: Build
shell: bash
run: ci/scripts/go_build.sh .
- name: Test
shell: bash
run: ci/scripts/go_test.sh .

windows-mingw:
name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} CGO
runs-on: windows-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
go: [1.15]
mingw-n-bits:
#- 32 runtime handling for CGO needs 64-bit currently
- 64
env:
ARROW_GO_TESTCGO: "1"
steps:
- name: Disable Crash Dialogs
run: |
reg add `
"HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" `
/v DontShowUI `
/t REG_DWORD `
/d 1 `
/f
- name: Install go
uses: actions/setup-go@v1
with:
go-version: ${{ matrix.go }}
- name: Checkout Arrow
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Fetch Submodules and Tags
shell: bash
run: ci/scripts/util_checkout.sh
- uses: msys2/setup-msys2@v2
with:
msystem: MINGW${{ matrix.mingw-n-bits }}
update: true
- name: Setup MSYS2
shell: msys2 {0}
run: |
ci/scripts/msys2_setup.sh cgo
- name: Update CGO Env vars
shell: msys2 {0}
run: |
echo "CGO_CPPFLAGS=-I$(cygpath --windows ${MINGW_PREFIX}/include)" >> $GITHUB_ENV
echo "CGO_LDFLAGS=-g -O2 -L$(cygpath --windows ${MINGW_PREFIX}/lib) -L$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_ENV
echo "$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_PATH
zeroshade marked this conversation as resolved.
Show resolved Hide resolved
- name: Build
shell: bash
run: ci/scripts/go_build.sh .
- name: Test
shell: bash
run: ci/scripts/go_test.sh .
34 changes: 34 additions & 0 deletions ci/docker/debian-10-go-cgo.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
zeroshade marked this conversation as resolved.
Show resolved Hide resolved
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG base
FROM ${base}

ENV DEBIAN_FRONTEND noninteractive

# install libarrow-dev and libarrow-python-dev so we can use pyarrow in some tests
# in order to test the C Data Interface
RUN apt-get update -y -q && \
apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
cmake \
libarrow-dev \
libarrow-python-dev && \
apt-get clean
6 changes: 5 additions & 1 deletion ci/scripts/go_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@ source_dir=${1}/go

pushd ${source_dir}/arrow

if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
TAGS="-tags ccalloc"
fi

go get -d -t -v ./...
go install -v ./...
go install $TAGS -v ./...

popd

Expand Down
8 changes: 7 additions & 1 deletion ci/scripts/go_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,18 @@ esac

pushd ${source_dir}/arrow

TAGS="test"
if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
TAGS="${TAGS},ccalloc"
fi


# the cgo implementation of the c data interface requires the "test"
# tag in order to run its tests so that the testing functions implemented
# in .c files don't get included in non-test builds.

for d in $(go list ./... | grep -v vendor); do
go test $testargs -tags "test" $d
go test $testargs -tags $TAGS $d
done

popd
Expand Down
7 changes: 7 additions & 0 deletions ci/scripts/msys2_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ case "${target}" in
;;
esac

case "${target}" in
cgo)
packages+=(${MINGW_PACKAGE_PREFIX}-arrow)
packages+=(${MINGW_PACKAGE_PREFIX}-gcc)
;;
esac

pacman \
--needed \
--noconfirm \
Expand Down
19 changes: 19 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ x-hierarchy:
- debian-ruby
- debian-python
- debian-go:
- debian-go-cgo
- debian-go-cgo-python
- debian-java:
- debian-java-jni
Expand Down Expand Up @@ -1199,6 +1200,24 @@ services:
/arrow/ci/scripts/go_build.sh /arrow &&
/arrow/ci/scripts/go_test.sh /arrow"

debian-go-cgo:
# Usage:
# docker-compose build debian-go-cgo
# docker-compose run debian-go-cgo
image: ${REPO}:${ARCH}-debian-10-go-${GO}-cgo
build:
context: .
dockerfile: ci/docker/debian-10-go-cgo.dockerfile
cache_from:
- ${REPO}:${ARCH}-debian-10-go-${GO}-cgo
args:
base: ${REPO}:${ARCH}-debian-10-go-${GO}
shm_size: *shm-size
volumes: *debian-volumes
environment:
ARROW_GO_TESTCGO: "1"
command: *go-command

debian-go-cgo-python:
# Usage:
# docker-compose build debian-go-cgo-python
Expand Down
33 changes: 16 additions & 17 deletions go/arrow/cdata/cdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ package cdata

// #include "arrow/c/abi.h"
// #include "arrow/c/helpers.h"
// typedef struct ArrowSchema ArrowSchema;
// typedef struct ArrowArray ArrowArray;
// typedef struct ArrowArrayStream ArrowArrayStream;
//
// int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); }
// int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); }
Expand Down Expand Up @@ -52,12 +49,12 @@ import (

type (
// CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h
CArrowSchema = C.ArrowSchema
CArrowSchema = C.struct_ArrowSchema
// CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h
CArrowArray = C.ArrowArray
CArrowArray = C.struct_ArrowArray
// CArrowArrayStream is the Experimental API for handling streams of record batches
// through the C Data interface.
CArrowArrayStream = C.ArrowArrayStream
CArrowArrayStream = C.struct_ArrowArrayStream
)

// Map from the defined strings to their corresponding arrow.DataType interface
Expand Down Expand Up @@ -146,7 +143,7 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) {
var childFields []arrow.Field
if schema.n_children > 0 {
// call ourselves recursively if there are children.
var schemaChildren []*C.ArrowSchema
var schemaChildren []*CArrowSchema
// set up a slice to reference safely
s := (*reflect.SliceHeader)(unsafe.Pointer(&schemaChildren))
s.Data = uintptr(unsafe.Pointer(schema.children))
Expand Down Expand Up @@ -255,21 +252,21 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) {
// importer to keep track when importing C ArrowArray objects.
type cimporter struct {
dt arrow.DataType
arr *C.ArrowArray
arr *CArrowArray
data *array.Data
parent *cimporter
children []cimporter
cbuffers []*C.void
}

func (imp *cimporter) importChild(parent *cimporter, src *C.ArrowArray) error {
func (imp *cimporter) importChild(parent *cimporter, src *CArrowArray) error {
imp.parent = parent
return imp.doImport(src)
}

// import any child arrays for lists, structs, and so on.
func (imp *cimporter) doImportChildren() error {
var children []*C.ArrowArray
var children []*CArrowArray
// create a proper slice for our children
s := (*reflect.SliceHeader)(unsafe.Pointer(&children))
s.Data = uintptr(unsafe.Pointer(imp.arr.children))
Expand Down Expand Up @@ -315,13 +312,13 @@ func (imp *cimporter) initarr() {

// import is called recursively as needed for importing an array and its children
// in order to generate array.Data objects
func (imp *cimporter) doImport(src *C.ArrowArray) error {
func (imp *cimporter) doImport(src *CArrowArray) error {
imp.initarr()
// move the array from the src object passed in to the one referenced by
// this importer. That way we can set up a finalizer on the created
// *array.Data object so we clean up our Array's memory when garbage collected.
C.ArrowArrayMove(src, imp.arr)
defer func(arr *C.ArrowArray) {
defer func(arr *CArrowArray) {
if imp.data != nil {
runtime.SetFinalizer(imp.data, func(*array.Data) {
C.ArrowArrayRelease(arr)
Expand All @@ -339,7 +336,9 @@ func (imp *cimporter) doImport(src *C.ArrowArray) error {

// get a view of the buffers, zero-copy. we're just looking at the pointers
const maxlen = 0x7fffffff
imp.cbuffers = (*[maxlen]*C.void)(unsafe.Pointer(imp.arr.buffers))[:imp.arr.n_buffers:imp.arr.n_buffers]
if imp.arr.n_buffers > 0 {
imp.cbuffers = (*[maxlen]*C.void)(unsafe.Pointer(imp.arr.buffers))[:imp.arr.n_buffers:imp.arr.n_buffers]
}

// handle each of our type cases
switch dt := imp.dt.(type) {
Expand Down Expand Up @@ -521,13 +520,13 @@ func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth int, of
return imp.importBuffer(bufferID, int64(bufsize))
}

func importCArrayAsType(arr *C.ArrowArray, dt arrow.DataType) (imp *cimporter, err error) {
func importCArrayAsType(arr *CArrowArray, dt arrow.DataType) (imp *cimporter, err error) {
imp = &cimporter{dt: dt}
err = imp.doImport(arr)
return
}

func initReader(rdr *nativeCRecordBatchReader, stream *C.ArrowArrayStream) {
func initReader(rdr *nativeCRecordBatchReader, stream *CArrowArrayStream) {
st := C.get_stream()
rdr.stream = &st
C.ArrowArrayStreamMove(stream, rdr.stream)
Expand All @@ -536,7 +535,7 @@ func initReader(rdr *nativeCRecordBatchReader, stream *C.ArrowArrayStream) {

// Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface
type nativeCRecordBatchReader struct {
stream *C.ArrowArrayStream
stream *CArrowArrayStream
schema *arrow.Schema
}

Expand All @@ -546,7 +545,7 @@ func (n *nativeCRecordBatchReader) getError(errno int) error {

func (n *nativeCRecordBatchReader) Read() (array.Record, error) {
if n.schema == nil {
var sc C.ArrowSchema
var sc CArrowSchema
errno := C.stream_get_schema(n.stream, &sc)
if errno != 0 {
return nil, n.getError(int(errno))
Expand Down
Loading