From 283e79ee4b8aca3b865e0901be22381f211efdd2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 20 May 2026 18:15:54 -0600 Subject: [PATCH] feat(loader): bundle native library inside the JAR Rework NativeLibraryLoader so it can load datafusion_jni from a JAR-bundled native library instead of requiring java.library.path: 1. Try System.loadLibrary so operators can still override with a system-installed build via java.library.path / LD_LIBRARY_PATH. 2. On UnsatisfiedLinkError, detect the host OS/arch, look up the bundled resource at org/apache/datafusion///lib., extract it to $TMPDIR/datafusion-java// and load via System.load. Concurrent JVMs sharing the temp directory converge on the same SHA-256 hash directory; the extraction uses ATOMIC_MOVE so racing writers don't clobber each other. Wire core/pom.xml to copy the host's locally built native lib from native/target//lib. into target/classes at the matching resource path, so the produced JAR works out of the box on the build host. Per-platform Maven profiles set the OS/arch directory segments and library filename; -Ddatafusion.native.profile=release switches the copy source from debug to release. Drop -Djava.library.path from the surefire and exec-maven-plugin argLines so the test path now exercises the same resource-extraction code path users will hit. Refs #33 (work items 1 and 3); cross-build CI, Sonatype publishing, and release docs remain as follow-ups. --- core/pom.xml | 102 ++++++++++++- .../datafusion/NativeLibraryLoader.java | 142 +++++++++++++++++- .../java/org/apache/datafusion/Platform.java | 136 +++++++++++++++++ .../org/apache/datafusion/PlatformTest.java | 111 ++++++++++++++ examples/pom.xml | 1 - 5 files changed, 486 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/org/apache/datafusion/Platform.java create mode 100644 core/src/test/java/org/apache/datafusion/PlatformTest.java diff --git a/core/pom.xml b/core/pom.xml index 5eddb3b..d232b88 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -34,6 +34,12 @@ under the License. Apache DataFusion Java + + + debug + + org.junit.jupiter @@ -77,9 +83,34 @@ under the License. org.apache.maven.plugins maven-surefire-plugin - -Djava.library.path=${maven.multiModuleProjectDirectory}/native/target/debug --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + + org.apache.maven.plugins + maven-antrun-plugin + + + copy-native-lib + process-classes + run + + + + + + + + + + + + + com.googlecode.maven-download-plugin download-maven-plugin @@ -149,4 +180,73 @@ under the License. + + + + + native-linux-amd64 + + unixlinuxamd64 + + + linux + amd64 + libdatafusion_jni.so + + + + native-linux-aarch64 + + unixlinuxaarch64 + + + linux + aarch64 + libdatafusion_jni.so + + + + native-mac-x86_64 + + macx86_64 + + + darwin + x86_64 + libdatafusion_jni.dylib + + + + + native-mac-amd64 + + macamd64 + + + darwin + x86_64 + libdatafusion_jni.dylib + + + + native-mac-aarch64 + + macaarch64 + + + darwin + aarch64 + libdatafusion_jni.dylib + + + diff --git a/core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java b/core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java index 2c74341..11771a0 100644 --- a/core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java +++ b/core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java @@ -19,16 +19,150 @@ package org.apache.datafusion; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * Loads the {@code datafusion_jni} native library on demand. + * + *

The loader first tries {@link System#loadLibrary(String)} so that + * operators can override the bundled library by placing a build on + * {@code java.library.path} (for example via + * {@code -Djava.library.path=...} or {@code LD_LIBRARY_PATH}). If that + * fails the loader extracts the platform-specific library from the JAR + * resource tree and loads it via {@link System#load(String)}. + * + *

The bundled libraries live at the conventional path + * {@code org/apache/datafusion/<os>/<arch>/<libfile>}. + * Extracted files are written under + * {@code $TMPDIR/datafusion-java/<sha256>/} so that concurrent JVMs + * sharing a temp directory converge on the same file rather than each + * extracting their own copy. + */ public final class NativeLibraryLoader { - private static final String LIBRARY_NAME = "datafusion_jni"; - private static boolean loaded = false; + + private static final String TMP_DIR_NAME = "datafusion-java"; + + private static volatile boolean loaded; private NativeLibraryLoader() {} public static synchronized void loadLibrary() { - if (!loaded) { - System.loadLibrary(LIBRARY_NAME); + if (loaded) { + return; + } + if (tryLoadFromLibraryPath()) { loaded = true; + return; + } + loadFromClasspath(); + loaded = true; + } + + private static boolean tryLoadFromLibraryPath() { + try { + System.loadLibrary(Platform.LIBRARY_NAME); + return true; + } catch (UnsatisfiedLinkError ignored) { + return false; + } + } + + private static void loadFromClasspath() { + Platform platform = Platform.current(); + String resource = platform.resourcePath(); + try (InputStream check = NativeLibraryLoader.class.getResourceAsStream(resource)) { + if (check == null) { + throw new UnsatisfiedLinkError( + "No bundled datafusion_jni library for " + platform + + " (expected classpath:" + resource + ")." + + " Build the native crate and add it to java.library.path," + + " or depend on a JAR built for this platform."); + } + } catch (IOException e) { + throw linkError("Failed to probe " + resource, e); + } + + try { + Path extracted = extractToTempDir(resource, platform.libFileName()); + System.load(extracted.toAbsolutePath().toString()); + } catch (IOException e) { + throw linkError("Failed to extract " + resource, e); + } + } + + private static Path extractToTempDir(String resource, String fileName) throws IOException { + Path tmpRoot = Files.createDirectories( + Paths.get(System.getProperty("java.io.tmpdir"), TMP_DIR_NAME)); + Path staging = Files.createTempFile(tmpRoot, fileName + ".", ".part"); + + String hash; + try (InputStream raw = NativeLibraryLoader.class.getResourceAsStream(resource); + DigestInputStream in = new DigestInputStream(raw, sha256()); + OutputStream out = Files.newOutputStream(staging)) { + in.transferTo(out); + hash = toHex(in.getMessageDigest().digest()); + } catch (IOException e) { + Files.deleteIfExists(staging); + throw e; + } + + Path versionedDir = Files.createDirectories(tmpRoot.resolve(hash)); + Path target = versionedDir.resolve(fileName); + + if (Files.exists(target) && Files.size(target) == Files.size(staging)) { + Files.deleteIfExists(staging); + return target; + } + + try { + Files.move(staging, target, StandardCopyOption.ATOMIC_MOVE); + } catch (FileAlreadyExistsException e) { + // Another JVM extracted the same content while we were writing. + // Their copy is identical (same SHA-256), so discard ours. + Files.deleteIfExists(staging); + } catch (IOException e) { + // Atomic move not supported on this filesystem. Fall back to a + // replacement move; the hash directory guarantees content equality. + try { + Files.move(staging, target, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException retry) { + Files.deleteIfExists(staging); + throw retry; + } } + return target; + } + + private static MessageDigest sha256() { + try { + return MessageDigest.getInstance("SHA-256"); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("SHA-256 not available", e); + } + } + + private static String toHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(Character.forDigit((b >> 4) & 0xf, 16)); + sb.append(Character.forDigit(b & 0xf, 16)); + } + return sb.toString(); + } + + private static UnsatisfiedLinkError linkError(String message, Throwable cause) { + UnsatisfiedLinkError err = new UnsatisfiedLinkError(message + ": " + cause.getMessage()); + err.initCause(cause); + return err; } } diff --git a/core/src/main/java/org/apache/datafusion/Platform.java b/core/src/main/java/org/apache/datafusion/Platform.java new file mode 100644 index 0000000..cf86873 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/Platform.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +import java.util.Locale; + +/** + * Identifies a target OS/arch pair and the path at which the bundled + * native library is published inside the JAR. + * + *

Resource layout follows the convention from the + * {@code datafusion-java} packaging design: + * + *

+ *   org/apache/datafusion/linux/amd64/libdatafusion_jni.so
+ *   org/apache/datafusion/linux/aarch64/libdatafusion_jni.so
+ *   org/apache/datafusion/darwin/x86_64/libdatafusion_jni.dylib
+ *   org/apache/datafusion/darwin/aarch64/libdatafusion_jni.dylib
+ * 
+ * + *

Package-private; consumed only by {@link NativeLibraryLoader}. + */ +final class Platform { + + static final String LIBRARY_NAME = "datafusion_jni"; + static final String RESOURCE_PREFIX = "org/apache/datafusion"; + + enum Os { + LINUX("linux", "lib", "so"), + DARWIN("darwin", "lib", "dylib"), + WINDOWS("windows", "", "dll"); + + final String dirName; + final String libPrefix; + final String libSuffix; + + Os(String dirName, String libPrefix, String libSuffix) { + this.dirName = dirName; + this.libPrefix = libPrefix; + this.libSuffix = libSuffix; + } + } + + final Os os; + final String arch; + + private Platform(Os os, String arch) { + this.os = os; + this.arch = arch; + } + + static Platform current() { + return of(System.getProperty("os.name"), System.getProperty("os.arch")); + } + + static Platform of(String osName, String osArch) { + Os os = detectOs(osName); + String arch = detectArch(os, osArch); + return new Platform(os, arch); + } + + static Os detectOs(String osName) { + if (osName == null) { + throw new UnsupportedOperationException("os.name is not set"); + } + String n = osName.toLowerCase(Locale.ROOT); + if (n.startsWith("linux")) { + return Os.LINUX; + } + if (n.startsWith("mac") || n.contains("darwin")) { + return Os.DARWIN; + } + if (n.startsWith("windows")) { + return Os.WINDOWS; + } + throw new UnsupportedOperationException( + "Unsupported OS for datafusion_jni: " + osName); + } + + /** + * Returns the architecture segment used in the resource path for {@code os}. + * + *

Linux uses {@code amd64} (Java's preferred name for x86_64), while + * macOS uses {@code x86_64}; both use {@code aarch64} for ARM64. + */ + static String detectArch(Os os, String osArch) { + if (osArch == null) { + throw new UnsupportedOperationException("os.arch is not set"); + } + String n = osArch.toLowerCase(Locale.ROOT); + boolean isX64 = n.equals("amd64") || n.equals("x86_64") || n.equals("x64"); + boolean isArm64 = n.equals("aarch64") || n.equals("arm64"); + if (isX64) { + return os == Os.LINUX ? "amd64" : "x86_64"; + } + if (isArm64) { + return "aarch64"; + } + throw new UnsupportedOperationException( + "Unsupported CPU architecture for datafusion_jni: " + osArch); + } + + String libFileName() { + return os.libPrefix + LIBRARY_NAME + "." + os.libSuffix; + } + + /** + * Absolute classpath resource path (with leading slash) of the bundled + * native library for this platform. + */ + String resourcePath() { + return "/" + RESOURCE_PREFIX + "/" + os.dirName + "/" + arch + "/" + libFileName(); + } + + @Override + public String toString() { + return os.dirName + "/" + arch; + } +} diff --git a/core/src/test/java/org/apache/datafusion/PlatformTest.java b/core/src/test/java/org/apache/datafusion/PlatformTest.java new file mode 100644 index 0000000..bc7936b --- /dev/null +++ b/core/src/test/java/org/apache/datafusion/PlatformTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +class PlatformTest { + + @Test + void detectsLinuxFromOsName() { + assertEquals(Platform.Os.LINUX, Platform.detectOs("Linux")); + assertEquals(Platform.Os.LINUX, Platform.detectOs("linux")); + } + + @Test + void detectsDarwinFromOsName() { + assertEquals(Platform.Os.DARWIN, Platform.detectOs("Mac OS X")); + assertEquals(Platform.Os.DARWIN, Platform.detectOs("macOS")); + assertEquals(Platform.Os.DARWIN, Platform.detectOs("Darwin")); + } + + @Test + void detectsWindowsFromOsName() { + assertEquals(Platform.Os.WINDOWS, Platform.detectOs("Windows 10")); + assertEquals(Platform.Os.WINDOWS, Platform.detectOs("Windows Server 2019")); + } + + @Test + void rejectsUnknownOs() { + assertThrows(UnsupportedOperationException.class, + () -> Platform.detectOs("Solaris")); + } + + @Test + void rejectsNullOs() { + assertThrows(UnsupportedOperationException.class, + () -> Platform.detectOs(null)); + } + + @Test + void usesAmd64OnLinuxForX86_64Aliases() { + assertEquals("amd64", Platform.detectArch(Platform.Os.LINUX, "amd64")); + assertEquals("amd64", Platform.detectArch(Platform.Os.LINUX, "x86_64")); + assertEquals("amd64", Platform.detectArch(Platform.Os.LINUX, "x64")); + } + + @Test + void usesX86_64OnDarwinForX86_64Aliases() { + assertEquals("x86_64", Platform.detectArch(Platform.Os.DARWIN, "amd64")); + assertEquals("x86_64", Platform.detectArch(Platform.Os.DARWIN, "x86_64")); + } + + @Test + void usesAarch64ForArm64Aliases() { + assertEquals("aarch64", Platform.detectArch(Platform.Os.LINUX, "aarch64")); + assertEquals("aarch64", Platform.detectArch(Platform.Os.LINUX, "arm64")); + assertEquals("aarch64", Platform.detectArch(Platform.Os.DARWIN, "aarch64")); + assertEquals("aarch64", Platform.detectArch(Platform.Os.DARWIN, "arm64")); + } + + @Test + void rejectsUnknownArch() { + assertThrows(UnsupportedOperationException.class, + () -> Platform.detectArch(Platform.Os.LINUX, "ppc64le")); + } + + @Test + void libFileNameUsesPlatformConventions() { + assertEquals("libdatafusion_jni.so", Platform.of("Linux", "amd64").libFileName()); + assertEquals("libdatafusion_jni.so", Platform.of("Linux", "aarch64").libFileName()); + assertEquals("libdatafusion_jni.dylib", Platform.of("Mac OS X", "x86_64").libFileName()); + assertEquals("libdatafusion_jni.dylib", Platform.of("Mac OS X", "aarch64").libFileName()); + assertEquals("datafusion_jni.dll", Platform.of("Windows 11", "amd64").libFileName()); + } + + @Test + void resourcePathMatchesSpec() { + assertEquals( + "/org/apache/datafusion/linux/amd64/libdatafusion_jni.so", + Platform.of("Linux", "amd64").resourcePath()); + assertEquals( + "/org/apache/datafusion/linux/aarch64/libdatafusion_jni.so", + Platform.of("Linux", "aarch64").resourcePath()); + assertEquals( + "/org/apache/datafusion/darwin/x86_64/libdatafusion_jni.dylib", + Platform.of("Mac OS X", "x86_64").resourcePath()); + assertEquals( + "/org/apache/datafusion/darwin/aarch64/libdatafusion_jni.dylib", + Platform.of("Mac OS X", "aarch64").resourcePath()); + } +} diff --git a/examples/pom.xml b/examples/pom.xml index 97a6b40..96c9ad5 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -75,7 +75,6 @@ under the License. java - -Djava.library.path=${maven.multiModuleProjectDirectory}/native/target/debug --add-opens=java.base/java.nio=ALL-UNNAMED -classpath