diff --git a/dev/release/README.md b/dev/release/README.md index 16c985d..ca637fa 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -158,3 +158,53 @@ svn ls https://dist.apache.org/repos/dist/release/datafusion | grep datafusion-j svn delete -m "delete old DataFusion Java release" \ https://dist.apache.org/repos/dist/release/datafusion/datafusion-java-0.1.0 ``` + +## Binary Release: Multi-Platform JAR + +Source tarballs are the official Apache release artifact, but consumers +also expect a published JAR on Maven Central that bundles native libs +for the common platforms. This section covers building that JAR. + +### Prerequisites (release manager machine) + +- macOS host (Apple Silicon or Intel) +- Docker Desktop running with BuildKit enabled +- Java 17+ +- Rust toolchain via rustup +- `gpg` configured with a key listed in the ASF KEYS file +- `xmllint` on `PATH` (pre-installed on macOS; `libxml2-utils` on Debian/Ubuntu) + +### Build the multi-platform JAR + +`build-release.sh` clones the repo into two Linux Docker containers +(one for `linux/amd64`, one for `linux/arm64`), builds the native +`.so` libraries inside each, then builds the two macOS `.dylib` +libraries directly on the host. All four libraries are placed in the +JAR's resource tree at +`org/apache/datafusion///lib.`, and the +JAR is installed into a temporary local Maven repository. + +```shell +./dev/release/build-release.sh +``` + +The script prints the local Maven repo path at the end. Inspect the JAR +to verify all four native libraries are bundled: + +```shell +unzip -l "$JAR" | grep org/apache/datafusion/ +``` + +### Publish to Apache Nexus staging + +Once the local Maven repo from `build-release.sh` looks correct, sign +and upload to Apache Nexus staging using `publish-to-maven.sh`: + +```shell +./dev/release/publish-to-maven.sh -u -r +``` + +The script prompts for the ASF password and GPG passphrase, creates a +staging repository on `repository.apache.org`, signs every artifact, +uploads it, and closes the staging repository. Verify in the Nexus UI +that the staged artifacts look correct before promoting to release. diff --git a/dev/release/build-release.sh b/dev/release/build-release.sh new file mode 100755 index 0000000..2b033bb --- /dev/null +++ b/dev/release/build-release.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Build a multi-platform datafusion-java JAR bundling native libs for +# linux/amd64, linux/aarch64, darwin/x86_64, and darwin/aarch64. The +# resulting JAR is installed into a temporary local Maven repository +# whose path is printed at the end. This script must run on a macOS host. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +PROJECT_HOME="$(cd "${SCRIPT_DIR}/../.." >/dev/null && pwd)" + +REPO="https://github.com/apache/datafusion-java.git" +BRANCH="main" +IMGTAG="latest" + +function usage { + cat <&2 + exit 1 +fi + +JAVA_VERSION=$(java -version 2>&1 | awk -F '"' '/version/ {print $2}' | awk -F '.' '{print $1}') +if [ -z "$JAVA_VERSION" ] || [ "$JAVA_VERSION" -lt 17 ]; then + echo "Java 17+ is required. Found: $(java -version 2>&1 | head -n 1)" >&2 + exit 1 +fi + +HOST_ARCH="$(uname -m)" # arm64 (Apple Silicon) or x86_64 (Intel) + +case "$HOST_ARCH" in + arm64) + HOST_DARWIN_DIR="aarch64" + OTHER_DARWIN_TARGET="x86_64-apple-darwin" + OTHER_DARWIN_DIR="x86_64" + ;; + x86_64) + HOST_DARWIN_DIR="x86_64" + OTHER_DARWIN_TARGET="aarch64-apple-darwin" + OTHER_DARWIN_DIR="aarch64" + ;; + *) + echo "Unsupported macOS arch: $HOST_ARCH" >&2 + exit 1 + ;; +esac + +CONTAINER_AMD64="datafusion-java-amd64-builder-container" +CONTAINER_ARM64="datafusion-java-arm64-builder-container" +IMAGE_AMD64="datafusion-java-rm-amd64:${IMGTAG}" +IMAGE_ARM64="datafusion-java-rm-arm64:${IMGTAG}" + +CLEANUP=1 +cleanup() { + [ "$CLEANUP" != "0" ] || return 0 + echo "Cleaning up build containers..." + docker rm -f "$CONTAINER_AMD64" "$CONTAINER_ARM64" >/dev/null 2>&1 || true + CLEANUP=0 +} +trap cleanup SIGINT SIGTERM EXIT + +echo "Cleaning leftover builder containers from any prior interrupted run" +docker rm -f "$CONTAINER_AMD64" "$CONTAINER_ARM64" >/dev/null 2>&1 || true + +echo "Cleaning previous Java and native build output" +(cd "$PROJECT_HOME" && ./mvnw -q clean) +(cd "$PROJECT_HOME/native" && cargo clean) + +echo "Building amd64 builder image" +docker build --no-cache \ + --platform=linux/amd64 \ + -t "$IMAGE_AMD64" \ + "$SCRIPT_DIR/datafusion-java-rm" + +echo "Building arm64 builder image" +docker build --no-cache \ + --platform=linux/arm64 \ + -t "$IMAGE_ARM64" \ + "$SCRIPT_DIR/datafusion-java-rm" + +echo "Building linux/amd64 native lib" +docker run --name "$CONTAINER_AMD64" \ + --platform=linux/amd64 \ + "$IMAGE_AMD64" "$REPO" "$BRANCH" + +echo "Building linux/aarch64 native lib" +docker run --name "$CONTAINER_ARM64" \ + --platform=linux/arm64 \ + "$IMAGE_ARM64" "$REPO" "$BRANCH" + +JVM_TARGET_DIR="$PROJECT_HOME/core/target/classes/org/apache/datafusion" + +mkdir -p "$JVM_TARGET_DIR/linux/amd64" +docker cp \ + "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \ + "$JVM_TARGET_DIR/linux/amd64/" + +mkdir -p "$JVM_TARGET_DIR/linux/aarch64" +docker cp \ + "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \ + "$JVM_TARGET_DIR/linux/aarch64/" + +echo "Building macOS native libs on the host (host=$HOST_ARCH)" +rustup target add "$OTHER_DARWIN_TARGET" + +(cd "$PROJECT_HOME/native" && cargo build --release) +(cd "$PROJECT_HOME/native" && cargo build --release --target "$OTHER_DARWIN_TARGET") + +mkdir -p "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR" +cp "$PROJECT_HOME/native/target/release/libdatafusion_jni.dylib" \ + "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR/" + +mkdir -p "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR" +cp "$PROJECT_HOME/native/target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \ + "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR/" + +echo "Installing JAR into local Maven repo" +LOCAL_REPO=$(mktemp -d /tmp/datafusion-java-staging-repo-XXXXXX) +(cd "$PROJECT_HOME" && ./mvnw \ + "-Dmaven.repo.local=$LOCAL_REPO" \ + "-Ddatafusion.native.profile=release" \ + -DskipTests install) + +echo "" +echo "====================================================================" +echo "Multi-platform JAR installed to local Maven repo: $LOCAL_REPO" +JAR_PATH=$(find "$LOCAL_REPO/org/apache/datafusion/datafusion-java" -name 'datafusion-java-*.jar' \ + -not -name '*-sources.jar' -not -name '*-javadoc.jar' | head -n 1) +echo "JAR: $JAR_PATH" +echo "Bundled native libraries:" +unzip -l "$JAR_PATH" | grep -E 'libdatafusion_jni\.(so|dylib)$' || true +echo "====================================================================" diff --git a/dev/release/datafusion-java-rm/Dockerfile b/dev/release/datafusion-java-rm/Dockerfile new file mode 100644 index 0000000..0583167 --- /dev/null +++ b/dev/release/datafusion-java-rm/Dockerfile @@ -0,0 +1,71 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM ubuntu:20.04 + +USER root + +ENV DEBIAN_FRONTEND=noninteractive +ENV DEBCONF_NONINTERACTIVE_SEEN=true +ENV LC_ALL=C + +RUN export LC_ALL=C \ + && apt-get update \ + && apt-get install --no-install-recommends -y \ + ca-certificates \ + build-essential \ + curl \ + wget \ + git \ + llvm \ + clang \ + libssl-dev \ + cmake \ + cpio \ + libxml2-dev \ + patch \ + bzip2 \ + libbz2-dev \ + zlib1g-dev \ + default-jdk \ + unzip \ + gcc-10 \ + g++-10 \ + cpp-10 +ENV CC="gcc-10" +ENV CXX="g++-10" + +# protoc — picks the host arch automatically by reading $(uname -m) +RUN PB_REL="https://github.com/protocolbuffers/protobuf/releases" \ + && ARCH="$(uname -m)" \ + && case "$ARCH" in \ + x86_64) PB_ARCH="x86_64" ;; \ + aarch64) PB_ARCH="aarch_64" ;; \ + *) echo "Unsupported arch: $ARCH" >&2; exit 1 ;; \ + esac \ + && curl -LO "$PB_REL/download/v30.2/protoc-30.2-linux-${PB_ARCH}.zip" \ + && unzip "protoc-30.2-linux-${PB_ARCH}.zip" -d /root/.local \ + && rm "protoc-30.2-linux-${PB_ARCH}.zip" +ENV PATH="$PATH:/root/.local/bin" + +# Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +COPY build-native-libs.sh /opt/datafusion-java-rm/build-native-libs.sh +WORKDIR /opt/datafusion-java-rm + +ENTRYPOINT ["/opt/datafusion-java-rm/build-native-libs.sh"] diff --git a/dev/release/datafusion-java-rm/build-native-libs.sh b/dev/release/datafusion-java-rm/build-native-libs.sh new file mode 100755 index 0000000..5f273cc --- /dev/null +++ b/dev/release/datafusion-java-rm/build-native-libs.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Build the datafusion_jni release native lib inside a Linux container. +# The container's --platform determines the target arch; we just build. + +set -euo pipefail + +REPO=${1:-} +BRANCH=${2:-} + +if [ -z "$REPO" ] || [ -z "$BRANCH" ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +echo "Building datafusion_jni for $(uname -m) from ${REPO}/${BRANCH}" + +rm -rf datafusion-java +git clone "$REPO" datafusion-java +cd datafusion-java +git checkout "$BRANCH" + +cd native +cargo build --release + +echo "Built $(pwd)/target/release/libdatafusion_jni.so" +ls -l target/release/libdatafusion_jni.so diff --git a/dev/release/publish-to-maven.sh b/dev/release/publish-to-maven.sh new file mode 100755 index 0000000..f9ab966 --- /dev/null +++ b/dev/release/publish-to-maven.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +### +# Based on Apache DataFusion Comet's publish-to-maven.sh (itself adapted +# from Spark's release-build.sh). +### + +function usage { + local NAME=$(basename "$0") + cat << EOF +usage: $NAME options + +Publish signed artifacts to Apache Nexus staging. + +Options: + -u ASF_USERNAME Username of ASF committer account + -r LOCAL_REPO Path to the local Maven repo created by build-release.sh + +The following will be prompted for: + ASF_PASSWORD Password of ASF committer account + GPG_KEY Optional: specific GPG key id to sign with + GPG_PASSPHRASE Passphrase for the GPG signing key +EOF + exit 1 +} + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +PROJECT_HOME="$(cd "${SCRIPT_DIR}/../.." >/dev/null && pwd)" + +ASF_USERNAME="" +LOCAL_REPO="" + +NEXUS_ROOT=https://repository.apache.org/service/local/staging +NEXUS_PROFILE=789e15c00fd47 + +while getopts "u:r:h" opt; do + case "$opt" in + u) ASF_USERNAME="$OPTARG" ;; + r) LOCAL_REPO="$OPTARG" ;; + h|*) usage ;; + esac +done + +if [ -z "$LOCAL_REPO" ]; then + echo "Please provide the local Maven repo path (-r), as produced by build-release.sh." >&2 + usage +fi + +if [ -z "$ASF_USERNAME" ]; then + read -p "ASF Username : " ASF_USERNAME && echo "" +fi +read -s -p "ASF Password : " ASF_PASSWORD && echo "" +read -s -p "GPG Key (optional) : " GPG_KEY && echo "" +read -s -p "GPG Passphrase : " GPG_PASSPHRASE && echo "" + +if [ -z "$ASF_USERNAME" ] || [ -z "$ASF_PASSWORD" ] || [ -z "$GPG_PASSPHRASE" ]; then + echo "Missing credentials" >&2 + exit 1 +fi + +GPG="gpg --pinentry-mode loopback" +if [ -n "$GPG_KEY" ]; then + GPG="$GPG -u $GPG_KEY" +fi + +SHA1SUM=$(which sha1sum || which shasum) + +GIT_HASH=$(cd "$PROJECT_HOME" && git rev-parse --short HEAD) + +echo "Creating Nexus staging repository" +REPO_REQUEST="Apache DataFusion Java (commit $GIT_HASH)" +REPO_REQUEST_RESPONSE=$(curl -X POST -d "$REPO_REQUEST" -u "$ASF_USERNAME:$ASF_PASSWORD" \ + -H "Content-Type:application/xml" \ + "$NEXUS_ROOT/profiles/$NEXUS_PROFILE/start") + +STAGED_REPO_ID=$(echo "$REPO_REQUEST_RESPONSE" | xmllint --xpath "//stagedRepositoryId/text()" -) +echo "Created Nexus staging repository: $STAGED_REPO_ID" + +if [ -z "$STAGED_REPO_ID" ]; then + echo "Error creating staged repository" >&2 + echo "$REPO_REQUEST_RESPONSE" >&2 + exit 1 +fi + +echo "Deploying artifacts from $LOCAL_REPO" +pushd "$LOCAL_REPO/org/apache/datafusion" >/dev/null + +# Remove any extra files that mvn install might have written alongside +# the jar/pom (e.g. -lastUpdated metadata files). +find . -type f | grep -v '\.jar$' | grep -v '\.pom$' | xargs rm -f || true + +echo "Creating hash and signature files" +for file in $(find . -type f); do + echo "$GPG_PASSPHRASE" | $GPG --passphrase-fd 0 --output "$file.asc" \ + --detach-sig --armour "$file" + if command -v md5 >/dev/null; then + md5 -q "$file" > "$file.md5" + else + md5sum "$file" | cut -f1 -d' ' > "$file.md5" + fi + $SHA1SUM "$file" | cut -f1 -d' ' > "$file.sha1" +done + +NEXUS_UPLOAD=$NEXUS_ROOT/deployByRepositoryId/$STAGED_REPO_ID +echo "Uploading files to $NEXUS_UPLOAD" +for file in $(find . -type f); do + FILE_SHORT=$(echo "$file" | sed -e "s/\.\///") + DEST_URL="$NEXUS_UPLOAD/org/apache/datafusion/$FILE_SHORT" + echo " Uploading $FILE_SHORT" + curl --fail-with-body \ + -u "$ASF_USERNAME:$ASF_PASSWORD" \ + --upload-file "$FILE_SHORT" "$DEST_URL" +done + +echo "Closing nexus staging repository" +REPO_REQUEST="$STAGED_REPO_IDApache DataFusion Java (commit $GIT_HASH)" +curl --fail-with-body -X POST -d "$REPO_REQUEST" -u "$ASF_USERNAME:$ASF_PASSWORD" \ + -H "Content-Type:application/xml" \ + "$NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish" +echo "Closed Nexus staging repository: $STAGED_REPO_ID" + +popd >/dev/null diff --git a/pom.xml b/pom.xml index 0a92f4b..8827f31 100644 --- a/pom.xml +++ b/pom.xml @@ -159,6 +159,8 @@ under the License. .vscode/** **/*.iml **/.DS_Store + + .github/** mvnw mvnw.cmd @@ -169,6 +171,8 @@ under the License. tpch-data/** native/Cargo.lock + + dev/release/rat_exclude_files.txt