Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 101 additions & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ under the License.

<name>Apache DataFusion Java</name>

<properties>
<!-- Which cargo build profile (debug/release) the bundled native
library is copied from. Override with -Ddatafusion.native.profile=release. -->
<datafusion.native.profile>debug</datafusion.native.profile>
</properties>

<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down Expand Up @@ -77,9 +83,34 @@ under the License.
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<argLine>-Djava.library.path=${maven.multiModuleProjectDirectory}/native/target/debug --add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
<argLine>--add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>copy-native-lib</id>
<phase>process-classes</phase>
<goals><goal>run</goal></goals>
<configuration>
<target>
<property name="datafusion.native.lib.source"
value="${maven.multiModuleProjectDirectory}/native/target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
<fail message="Native library not found at ${datafusion.native.lib.source}. Run 'cd native &amp;&amp; cargo build' (or 'make') before building the JAR.">
<condition><not><available file="${datafusion.native.lib.source}"/></not></condition>
</fail>
Comment on lines +99 to +103
Copy link
Copy Markdown
Contributor

@LantaoJin LantaoJin May 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Windows, none of the new native profiles defines datafusion.lib.filename, datafusion.lib.os, or datafusion.lib.arch, so this path remains unresolved and the new process-classes fail check aborts mvn test/package even after cargo build creates native/target/debug/datafusion_jni.dll. This regresses the previous java.library.path flow before NativeLibraryLoader can fall back to System.loadLibrary. One fix is adding a Windows profile mirroring the Linux/Mac ones: <datafusion.lib.os>windows</datafusion.lib.os>, <datafusion.lib.arch>amd64</datafusion.lib.arch>, <datafusion.lib.filename>datafusion_jni.dll</datafusion.lib.filename>. Even though no Windows binary will be bundled today, the build at least stops failing, or skipping the copy step on unsupported platforms.

<mkdir dir="${project.build.outputDirectory}/org/apache/datafusion/${datafusion.lib.os}/${datafusion.lib.arch}"/>
<copy file="${datafusion.native.lib.source}"
tofile="${project.build.outputDirectory}/org/apache/datafusion/${datafusion.lib.os}/${datafusion.lib.arch}/${datafusion.lib.filename}"
preservelastmodified="true"
verbose="true"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
Expand Down Expand Up @@ -149,4 +180,73 @@ under the License.
</plugin>
</plugins>
</build>

<profiles>
<!--
One of these profiles activates per host so the antrun copy
knows the OS/arch directory and library filename to use.
Resource layout follows the bundling convention:

org/apache/datafusion/linux/amd64/libdatafusion_jni.so
org/apache/datafusion/linux/aarch64/libdatafusion_jni.so
org/apache/datafusion/darwin/x86_64/libdatafusion_jni.dylib
org/apache/datafusion/darwin/aarch64/libdatafusion_jni.dylib
-->
<profile>
<id>native-linux-amd64</id>
<activation>
<os><family>unix</family><name>linux</name><arch>amd64</arch></os>
</activation>
<properties>
<datafusion.lib.os>linux</datafusion.lib.os>
<datafusion.lib.arch>amd64</datafusion.lib.arch>
<datafusion.lib.filename>libdatafusion_jni.so</datafusion.lib.filename>
</properties>
</profile>
<profile>
<id>native-linux-aarch64</id>
<activation>
<os><family>unix</family><name>linux</name><arch>aarch64</arch></os>
</activation>
<properties>
<datafusion.lib.os>linux</datafusion.lib.os>
<datafusion.lib.arch>aarch64</datafusion.lib.arch>
<datafusion.lib.filename>libdatafusion_jni.so</datafusion.lib.filename>
</properties>
</profile>
<profile>
<id>native-mac-x86_64</id>
<activation>
<os><family>mac</family><arch>x86_64</arch></os>
</activation>
<properties>
<datafusion.lib.os>darwin</datafusion.lib.os>
<datafusion.lib.arch>x86_64</datafusion.lib.arch>
<datafusion.lib.filename>libdatafusion_jni.dylib</datafusion.lib.filename>
</properties>
</profile>
<profile>
<!-- Some JVMs report os.arch=amd64 even on macOS x86_64. -->
<id>native-mac-amd64</id>
<activation>
<os><family>mac</family><arch>amd64</arch></os>
</activation>
<properties>
<datafusion.lib.os>darwin</datafusion.lib.os>
<datafusion.lib.arch>x86_64</datafusion.lib.arch>
<datafusion.lib.filename>libdatafusion_jni.dylib</datafusion.lib.filename>
</properties>
</profile>
<profile>
<id>native-mac-aarch64</id>
<activation>
<os><family>mac</family><arch>aarch64</arch></os>
</activation>
<properties>
<datafusion.lib.os>darwin</datafusion.lib.os>
<datafusion.lib.arch>aarch64</datafusion.lib.arch>
<datafusion.lib.filename>libdatafusion_jni.dylib</datafusion.lib.filename>
</properties>
</profile>
</profiles>
</project>
142 changes: 138 additions & 4 deletions core/src/main/java/org/apache/datafusion/NativeLibraryLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,150 @@

package org.apache.datafusion;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

/**
* Loads the {@code datafusion_jni} native library on demand.
*
* <p>The loader first tries {@link System#loadLibrary(String)} so that
* operators can override the bundled library by placing a build on
* {@code java.library.path} (for example via
* {@code -Djava.library.path=...} or {@code LD_LIBRARY_PATH}). If that
* fails the loader extracts the platform-specific library from the JAR
* resource tree and loads it via {@link System#load(String)}.
*
* <p>The bundled libraries live at the conventional path
* {@code org/apache/datafusion/&lt;os&gt;/&lt;arch&gt;/&lt;libfile&gt;}.
* Extracted files are written under
* {@code $TMPDIR/datafusion-java/&lt;sha256&gt;/} so that concurrent JVMs
* sharing a temp directory converge on the same file rather than each
* extracting their own copy.
*/
public final class NativeLibraryLoader {
private static final String LIBRARY_NAME = "datafusion_jni";
private static boolean loaded = false;

private static final String TMP_DIR_NAME = "datafusion-java";

private static volatile boolean loaded;

private NativeLibraryLoader() {}

public static synchronized void loadLibrary() {
if (!loaded) {
System.loadLibrary(LIBRARY_NAME);
if (loaded) {
return;
}
if (tryLoadFromLibraryPath()) {
loaded = true;
return;
}
loadFromClasspath();
loaded = true;
}

private static boolean tryLoadFromLibraryPath() {
try {
System.loadLibrary(Platform.LIBRARY_NAME);
return true;
} catch (UnsatisfiedLinkError ignored) {
return false;
}
}

private static void loadFromClasspath() {
Platform platform = Platform.current();
String resource = platform.resourcePath();
try (InputStream check = NativeLibraryLoader.class.getResourceAsStream(resource)) {
if (check == null) {
throw new UnsatisfiedLinkError(
"No bundled datafusion_jni library for " + platform
+ " (expected classpath:" + resource + ")."
+ " Build the native crate and add it to java.library.path,"
+ " or depend on a JAR built for this platform.");
}
} catch (IOException e) {
throw linkError("Failed to probe " + resource, e);
}

try {
Path extracted = extractToTempDir(resource, platform.libFileName());
System.load(extracted.toAbsolutePath().toString());
} catch (IOException e) {
throw linkError("Failed to extract " + resource, e);
}
}

private static Path extractToTempDir(String resource, String fileName) throws IOException {
Path tmpRoot = Files.createDirectories(
Paths.get(System.getProperty("java.io.tmpdir"), TMP_DIR_NAME));
Path staging = Files.createTempFile(tmpRoot, fileName + ".", ".part");

String hash;
try (InputStream raw = NativeLibraryLoader.class.getResourceAsStream(resource);
DigestInputStream in = new DigestInputStream(raw, sha256());
OutputStream out = Files.newOutputStream(staging)) {
in.transferTo(out);
hash = toHex(in.getMessageDigest().digest());
} catch (IOException e) {
Files.deleteIfExists(staging);
throw e;
}

Path versionedDir = Files.createDirectories(tmpRoot.resolve(hash));
Path target = versionedDir.resolve(fileName);

if (Files.exists(target) && Files.size(target) == Files.size(staging)) {
Files.deleteIfExists(staging);
return target;
}

try {
Files.move(staging, target, StandardCopyOption.ATOMIC_MOVE);
} catch (FileAlreadyExistsException e) {
// Another JVM extracted the same content while we were writing.
// Their copy is identical (same SHA-256), so discard ours.
Files.deleteIfExists(staging);
} catch (IOException e) {
// Atomic move not supported on this filesystem. Fall back to a
// replacement move; the hash directory guarantees content equality.
try {
Files.move(staging, target, StandardCopyOption.REPLACE_EXISTING);
} catch (IOException retry) {
Files.deleteIfExists(staging);
throw retry;
}
}
return target;
}

private static MessageDigest sha256() {
try {
return MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException("SHA-256 not available", e);
}
}

private static String toHex(byte[] bytes) {
StringBuilder sb = new StringBuilder(bytes.length * 2);
for (byte b : bytes) {
sb.append(Character.forDigit((b >> 4) & 0xf, 16));
sb.append(Character.forDigit(b & 0xf, 16));
}
return sb.toString();
}

private static UnsatisfiedLinkError linkError(String message, Throwable cause) {
UnsatisfiedLinkError err = new UnsatisfiedLinkError(message + ": " + cause.getMessage());
err.initCause(cause);
return err;
}
}
Loading
Loading