From 994f1942acc3aef786f8b7a22bbed7d1c21c6581 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 21 May 2026 06:50:34 +0000 Subject: [PATCH] feat: typed DataFusionException hierarchy across the JNI boundary --- .../datafusion/ConfigurationException.java | 37 +++ .../datafusion/DataFusionException.java | 45 +++ .../apache/datafusion/ExecutionException.java | 42 +++ .../org/apache/datafusion/IoException.java | 44 +++ .../datafusion/NotImplementedException.java | 37 +++ .../org/apache/datafusion/PlanException.java | 37 +++ .../ResourcesExhaustedException.java | 40 +++ .../datafusion/DataFusionExceptionTest.java | 84 ++++++ .../SessionContextTypedErrorTest.java | 114 ++++++++ native/Cargo.toml | 5 +- native/src/errors.rs | 265 +++++++++++++++++- 11 files changed, 744 insertions(+), 6 deletions(-) create mode 100644 core/src/main/java/org/apache/datafusion/ConfigurationException.java create mode 100644 core/src/main/java/org/apache/datafusion/DataFusionException.java create mode 100644 core/src/main/java/org/apache/datafusion/ExecutionException.java create mode 100644 core/src/main/java/org/apache/datafusion/IoException.java create mode 100644 core/src/main/java/org/apache/datafusion/NotImplementedException.java create mode 100644 core/src/main/java/org/apache/datafusion/PlanException.java create mode 100644 core/src/main/java/org/apache/datafusion/ResourcesExhaustedException.java create mode 100644 core/src/test/java/org/apache/datafusion/DataFusionExceptionTest.java create mode 100644 core/src/test/java/org/apache/datafusion/SessionContextTypedErrorTest.java diff --git a/core/src/main/java/org/apache/datafusion/ConfigurationException.java b/core/src/main/java/org/apache/datafusion/ConfigurationException.java new file mode 100644 index 0000000..3901f3d --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/ConfigurationException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * Invalid or unrecognised DataFusion configuration option. Surfaces upstream {@code + * DataFusionError::Configuration}. + */ +public class ConfigurationException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public ConfigurationException(String message) { + super(message); + } + + public ConfigurationException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/DataFusionException.java b/core/src/main/java/org/apache/datafusion/DataFusionException.java new file mode 100644 index 0000000..9ae7453 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/DataFusionException.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * Base unchecked exception for every error surfaced from the native DataFusion side. + * + *

Concrete subclasses correspond to caller-relevant error categories (planning, execution, IO, + * resources, configuration, not-implemented). Variants of {@code DataFusionError} on the Rust side + * that don't fit a clean caller-facing category surface as the parent class itself. + * + *

All subclasses extend {@link RuntimeException} so existing callers that {@code catch + * (RuntimeException)} keep working unchanged. Callers that want to discriminate can {@code catch + * (PlanException)} / {@code catch (ResourcesExhaustedException)} etc., or {@code catch + * (DataFusionException)} for "anything from DataFusion". + */ +public class DataFusionException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public DataFusionException(String message) { + super(message); + } + + public DataFusionException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/ExecutionException.java b/core/src/main/java/org/apache/datafusion/ExecutionException.java new file mode 100644 index 0000000..a503eda --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/ExecutionException.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * Runtime execution failure: a UDF threw, a join task panicked, an external (non-DataFusion) error + * propagated up, or an FFI-level failure surfaced. Surfaces upstream {@code + * DataFusionError::Execution}, {@code ExecutionJoin}, {@code External}, and {@code Ffi}. + * + *

Note: this is {@code org.apache.datafusion.ExecutionException}, distinct from {@code + * java.util.concurrent.ExecutionException}. A file that needs both should fully-qualify one or + * import them with care. + */ +public class ExecutionException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public ExecutionException(String message) { + super(message); + } + + public ExecutionException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/IoException.java b/core/src/main/java/org/apache/datafusion/IoException.java new file mode 100644 index 0000000..79f196e --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/IoException.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * IO-shaped failure: a local filesystem read failed, an object store request failed, or a parquet / + * arrow / avro decoder reported a malformed file. Surfaces upstream {@code + * DataFusionError::IoError}, {@code ObjectStore}, {@code ArrowError}, {@code ParquetError}, and + * {@code AvroError}. + * + *

Note: this is {@code org.apache.datafusion.IoException} (lowercase {@code o}), distinct from + * {@code java.io.IOException}. The {@code IoException} spelling matches the orthography of the + * underlying {@code DataFusionError::IoError} variant; a file that needs both should fully-qualify + * one. + */ +public class IoException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public IoException(String message) { + super(message); + } + + public IoException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/NotImplementedException.java b/core/src/main/java/org/apache/datafusion/NotImplementedException.java new file mode 100644 index 0000000..b5d59c3 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/NotImplementedException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * The requested feature is recognised by DataFusion but not implemented yet. Surfaces upstream + * {@code DataFusionError::NotImplemented}. + */ +public class NotImplementedException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public NotImplementedException(String message) { + super(message); + } + + public NotImplementedException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/PlanException.java b/core/src/main/java/org/apache/datafusion/PlanException.java new file mode 100644 index 0000000..a4d9370 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/PlanException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * SQL parsing, logical planning, or schema-resolution failure. Surfaces upstream {@code + * DataFusionError::Plan}, {@code DataFusionError::SQL}, and {@code DataFusionError::SchemaError}. + */ +public class PlanException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public PlanException(String message) { + super(message); + } + + public PlanException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/main/java/org/apache/datafusion/ResourcesExhaustedException.java b/core/src/main/java/org/apache/datafusion/ResourcesExhaustedException.java new file mode 100644 index 0000000..04c91c3 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/ResourcesExhaustedException.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +/** + * The DataFusion runtime exhausted a configured resource budget — typically the memory pool, but + * applies to any guard upstream surfaces as {@code DataFusionError::ResourcesExhausted}. + * + *

Distinguishing this from {@link ExecutionException} lets callers retry transient + * resource-pressure failures without retrying genuine query bugs. + */ +public class ResourcesExhaustedException extends DataFusionException { + + private static final long serialVersionUID = 1L; + + public ResourcesExhaustedException(String message) { + super(message); + } + + public ResourcesExhaustedException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/core/src/test/java/org/apache/datafusion/DataFusionExceptionTest.java b/core/src/test/java/org/apache/datafusion/DataFusionExceptionTest.java new file mode 100644 index 0000000..ca99211 --- /dev/null +++ b/core/src/test/java/org/apache/datafusion/DataFusionExceptionTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertSame; + +import org.junit.jupiter.api.Test; + +class DataFusionExceptionTest { + + @Test + void parentExtendsRuntimeException() { + DataFusionException e = new DataFusionException("x"); + assertInstanceOf(RuntimeException.class, e); + assertEquals("x", e.getMessage()); + } + + @Test + void parentCauseConstructorPropagatesCause() { + Throwable cause = new IllegalStateException("inner"); + DataFusionException e = new DataFusionException("outer", cause); + assertEquals("outer", e.getMessage()); + assertSame(cause, e.getCause()); + } + + @Test + void allSubclassesExtendDataFusionException() { + // Each subclass must extend the parent so callers can `catch + // (DataFusionException)` for "anything from DataFusion" and `catch + // (RuntimeException)` keeps working unchanged. + assertInstanceOf(DataFusionException.class, new PlanException("x")); + assertInstanceOf(DataFusionException.class, new ExecutionException("x")); + assertInstanceOf(DataFusionException.class, new ResourcesExhaustedException("x")); + assertInstanceOf(DataFusionException.class, new IoException("x")); + assertInstanceOf(DataFusionException.class, new NotImplementedException("x")); + assertInstanceOf(DataFusionException.class, new ConfigurationException("x")); + } + + @Test + void allSubclassesExtendRuntimeException() { + // Existing callers use `catch (RuntimeException)`. A typed-exception PR + // that breaks them is a regression even if the new types are correct. + assertInstanceOf(RuntimeException.class, new PlanException("x")); + assertInstanceOf(RuntimeException.class, new ExecutionException("x")); + assertInstanceOf(RuntimeException.class, new ResourcesExhaustedException("x")); + assertInstanceOf(RuntimeException.class, new IoException("x")); + assertInstanceOf(RuntimeException.class, new NotImplementedException("x")); + assertInstanceOf(RuntimeException.class, new ConfigurationException("x")); + } + + @Test + void allSubclassesAcceptCauseConstructor() { + // The (String, Throwable) constructor is the seam upstream issue #55 + // plugs into for Java-throwable propagation from JVM upcalls. v1 has no + // production caller for it, but it must exist on every subclass so #55 + // doesn't have to add it later. + Throwable cause = new IllegalStateException("c"); + assertSame(cause, new PlanException("m", cause).getCause()); + assertSame(cause, new ExecutionException("m", cause).getCause()); + assertSame(cause, new ResourcesExhaustedException("m", cause).getCause()); + assertSame(cause, new IoException("m", cause).getCause()); + assertSame(cause, new NotImplementedException("m", cause).getCause()); + assertSame(cause, new ConfigurationException("m", cause).getCause()); + } +} diff --git a/core/src/test/java/org/apache/datafusion/SessionContextTypedErrorTest.java b/core/src/test/java/org/apache/datafusion/SessionContextTypedErrorTest.java new file mode 100644 index 0000000..3f6720e --- /dev/null +++ b/core/src/test/java/org/apache/datafusion/SessionContextTypedErrorTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +/** + * End-to-end checks that real native call sites surface as the right typed Java exception. The + * assertions are on exception type, not message text — the whole point of the typed + * hierarchy is to free callers from message-string sniffing. + */ +class SessionContextTypedErrorTest { + + @Test + void unknownTableInSqlIsPlanException() { + try (SessionContext ctx = new SessionContext()) { + DataFusionException e = + assertThrows( + DataFusionException.class, () -> ctx.sql("SELECT * FROM nonexistent_table_xyz_42")); + assertInstanceOf(PlanException.class, e); + } + } + + @Test + void malformedParquetFileIsIoException(@org.junit.jupiter.api.io.TempDir java.nio.file.Path dir) + throws java.io.IOException { + // Triggering a clean DataFusionError::IoError / ParquetError requires a + // file that exists but isn't a valid parquet -- a missing path is + // classified as "no table paths" (Execution) rather than IoError. + java.nio.file.Path bogus = dir.resolve("not-actually-parquet.parquet"); + java.nio.file.Files.writeString(bogus, "this is not a parquet file"); + try (SessionContext ctx = new SessionContext()) { + DataFusionException e = + assertThrows( + DataFusionException.class, + () -> ctx.registerParquet("t", bogus.toAbsolutePath().toString())); + assertInstanceOf(IoException.class, e); + } + } + + @Test + void unknownConfigKeyIsConfigurationException() { + // SessionContextBuilder.setOption defers config-key validation to build() + // time, where DataFusion's `ConfigOptions::set` returns + // DataFusionError::Configuration for unrecognised keys. + SessionContextBuilder b = + SessionContext.builder().setOption("datafusion.bogus_made_up_key_for_typed_test", "x"); + DataFusionException e = assertThrows(DataFusionException.class, b::build); + assertInstanceOf(ConfigurationException.class, e); + } + + @Test + void divideByZeroIsExecutionException() throws java.io.IOException { + // SELECT 1/0 surfaces as DataFusionError::ArrowError(ArrowError::DivideByZero) + // -- the case Codex flagged that previously routed to IoException because + // the outer arm matched *any* ArrowError. ExecutionException is the right + // typed class for an arithmetic failure produced during query execution. + try (SessionContext ctx = new SessionContext(); + org.apache.arrow.memory.RootAllocator allocator = + new org.apache.arrow.memory.RootAllocator()) { + DataFusionException e = + assertThrows( + DataFusionException.class, + () -> { + try (DataFrame df = ctx.sql("SELECT 1/0")) { + df.collect(allocator).close(); + } + }); + assertInstanceOf(ExecutionException.class, e); + } + } + + @Test + void runtimeKeyRejectionIsParentDataFusionException() { + // The native-side guard against `datafusion.runtime.*` keys uses a + // stringly-typed `Err("...")?` (not a DataFusionError variant), so it + // surfaces as the parent DataFusionException — the catch-all path that + // protects callers from a wrong typed subclass when the underlying error + // doesn't carry a clean variant signal. + SessionContextBuilder b = + SessionContext.builder().setOption("datafusion.runtime.memory_limit", "2G"); + DataFusionException e = assertThrows(DataFusionException.class, b::build); + // Strictly the parent class, not any subclass. + if (e instanceof PlanException + || e instanceof IoException + || e instanceof ConfigurationException + || e instanceof ExecutionException + || e instanceof ResourcesExhaustedException + || e instanceof NotImplementedException) { + throw new AssertionError( + "expected parent DataFusionException, got " + e.getClass().getName()); + } + } +} diff --git a/native/Cargo.toml b/native/Cargo.toml index 42fab85..1b7eb09 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -22,7 +22,10 @@ edition = "2021" publish = false [lib] -crate-type = ["cdylib"] +# `rlib` alongside `cdylib` so `cargo test` has a Rust-level harness for +# native-only invariants (e.g. error-classification routing through wrapped +# DataFusionError chains). The `cdylib` is still the artifact the JVM loads. +crate-type = ["cdylib", "rlib"] [dependencies] arrow = { version = "58", features = ["ffi"] } diff --git a/native/src/errors.rs b/native/src/errors.rs index e779bb7..d926544 100644 --- a/native/src/errors.rs +++ b/native/src/errors.rs @@ -19,32 +19,146 @@ use std::any::Any; use std::error::Error; use std::panic::{catch_unwind, AssertUnwindSafe}; +use datafusion::arrow::error::ArrowError; +use datafusion::error::DataFusionError; use jni::JNIEnv; pub type JniResult = Result>; +/// Run `f`, catching panics and translating its `Err` into the appropriate +/// Java exception. When the boxed error is a [`DataFusionError`], the variant +/// drives which Java exception class is thrown -- callers can `catch +/// (PlanException)` / `catch (ResourcesExhaustedException)` etc. without +/// scraping message strings. Anything else (including Rust panics) falls +/// through to the parent [`DataFusionException`] class. pub fn try_unwrap_or_throw(env: &mut JNIEnv, default: T, f: F) -> T where F: FnOnce(&mut JNIEnv) -> JniResult, { match catch_unwind(AssertUnwindSafe(|| f(env))) { Ok(Ok(value)) => value, - Ok(Err(err)) => { - throw_runtime_exception(env, &err.to_string()); + Ok(Err(boxed)) => { + // Try to recover the typed DataFusionError so we can pick a + // matching Java subclass. If the error came from elsewhere + // (jni::errors::Error, prost::DecodeError, a stringly-typed + // Err("...")?), it falls through to the parent class. + match boxed.downcast::() { + Ok(df_err) => { + // Walk the cause chain so a ResourcesExhausted / Plan / + // etc. wrapped in ArrowError::External (or any other + // upstream-mandated wrapper) is classified by its real + // root, not by the outer wrapper. Same shape as upstream's + // `DataFusionError::find_root()`. The thrown message + // preserves the outer error's full chain so wrapping + // context isn't lost; only the *class* picks the inner + // variant. + let class = classify(df_err.find_root()); + throw(env, class, &df_err.to_string()); + } + Err(other) => { + throw(env, PARENT, &other.to_string()); + } + } default } Err(panic) => { - throw_runtime_exception(env, &panic_message(&panic)); + // Rust panics are upstream-bug-shaped, not caller-actionable. + // Surface as the parent class with a `panic: ` prefix so the + // caller can grep without inspecting a typed subclass. + throw(env, PARENT, &format!("panic: {}", panic_message(&panic))); default } } } -fn throw_runtime_exception(env: &mut JNIEnv, message: &str) { +const PARENT: &str = "org/apache/datafusion/DataFusionException"; + +/// Map a [`DataFusionError`] variant onto the Java exception class to throw. +/// Multiple Rust variants fold into a single Java class when they're the same +/// kind of problem from the caller's standpoint -- the proposed mapping is +/// the public Java contract; the underlying variant set is not. +/// +/// Callers should pass the result of [`DataFusionError::find_root`] so a +/// `ResourcesExhausted` / `Plan` / etc. buried inside a wrapping `ArrowError` +/// or `Context(...)` is classified by its real root. Variants without a +/// clean caller-facing category fall through to the parent class. +fn classify(err: &DataFusionError) -> &'static str { + match err { + DataFusionError::Plan(_) + | DataFusionError::SQL(_, _) + | DataFusionError::SchemaError(_, _) => "org/apache/datafusion/PlanException", + DataFusionError::Execution(_) + | DataFusionError::ExecutionJoin(_) + | DataFusionError::External(_) + | DataFusionError::Ffi(_) => "org/apache/datafusion/ExecutionException", + DataFusionError::ResourcesExhausted(_) => { + "org/apache/datafusion/ResourcesExhaustedException" + } + DataFusionError::IoError(_) + | DataFusionError::ObjectStore(_) + | DataFusionError::ParquetError(_) + | DataFusionError::AvroError(_) => "org/apache/datafusion/IoException", + // ArrowError is a 21-variant grab bag -- only some of those variants + // are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute + // / Cast / InvalidArgument / Memory etc. are execution-time failures + // produced by a normal query (e.g. SELECT 1/0), not IO. Routing them + // through IoException would hide ordinary execution errors from + // callers catching ExecutionException. + DataFusionError::ArrowError(arrow_err, _) => classify_arrow(arrow_err), + DataFusionError::NotImplemented(_) => "org/apache/datafusion/NotImplementedException", + DataFusionError::Configuration(_) => "org/apache/datafusion/ConfigurationException", + // Variants with no clean caller-facing category -- Internal (upstream + // bug-shaped), Substrait, Collection (multi-error), and any new + // variants a future DataFusion bump introduces -- fall through to + // the parent. The catch-all is deliberate: "I don't know what to do + // with this" surfaces as the parent class rather than a wrong typed + // subclass. Wrapper variants like Context / Diagnostic / Shared also + // land here when find_root() can't dig past them, which is rare + // because find_root walks `source()`. + _ => PARENT, + } +} + +/// Map an [`ArrowError`] variant onto the Java exception class to throw. +/// Only the genuinely IO-shaped variants (`IoError`, `IpcError`) land on +/// `IoException`; everything else is execution-time and routes through +/// `ExecutionException`. Schema/parse-shaped variants route through +/// `PlanException` so a malformed IPC schema or a parse error surfaces as a +/// query problem rather than an execution failure. +/// +/// Variants without a clean caller-facing category (`CDataInterface`, the +/// various overflow/index-overflow markers) fall through to the parent. +fn classify_arrow(err: &ArrowError) -> &'static str { + match err { + ArrowError::IoError(_, _) | ArrowError::IpcError(_) => "org/apache/datafusion/IoException", + ArrowError::SchemaError(_) | ArrowError::ParseError(_) => { + "org/apache/datafusion/PlanException" + } + ArrowError::DivideByZero + | ArrowError::ArithmeticOverflow(_) + | ArrowError::ComputeError(_) + | ArrowError::CastError(_) + | ArrowError::InvalidArgumentError(_) + | ArrowError::MemoryError(_) + | ArrowError::CsvError(_) + | ArrowError::JsonError(_) + | ArrowError::AvroError(_) + | ArrowError::ParquetError(_) + | ArrowError::ExternalError(_) => "org/apache/datafusion/ExecutionException", + ArrowError::NotYetImplemented(_) => "org/apache/datafusion/NotImplementedException", + // CDataInterface, DictionaryKeyOverflowError, RunEndIndexOverflowError, + // OffsetOverflowError, and any new variants in future Arrow bumps fall + // through to the parent -- "I don't know what to do with this" stays + // at the parent rather than getting routed to the wrong subclass. + _ => PARENT, + } +} + +fn throw(env: &mut JNIEnv, class: &str, message: &str) { if env.exception_check().unwrap_or(false) { return; } - let _ = env.throw_new("java/lang/RuntimeException", message); + let _ = env.throw_new(class, message); } fn panic_message(panic: &Box) -> String { @@ -56,3 +170,144 @@ fn panic_message(panic: &Box) -> String { "rust panic with non-string payload".to_string() } } + +#[cfg(test)] +mod tests { + use super::*; + use datafusion::arrow::error::ArrowError; + + #[test] + fn classify_unwrapped_variants_picks_typed_class() { + // Sanity check: the simple unwrapped cases land on the right classes. + assert_eq!( + classify(&DataFusionError::Plan("x".into())), + "org/apache/datafusion/PlanException" + ); + assert_eq!( + classify(&DataFusionError::ResourcesExhausted("x".into())), + "org/apache/datafusion/ResourcesExhaustedException" + ); + assert_eq!( + classify(&DataFusionError::Configuration("x".into())), + "org/apache/datafusion/ConfigurationException" + ); + } + + #[test] + fn classify_unrecognised_variant_falls_through_to_parent() { + // `Internal` has no clean caller-facing category; must surface as + // the parent class rather than getting routed to a wrong subclass. + assert_eq!(classify(&DataFusionError::Internal("x".into())), PARENT); + } + + #[test] + fn classify_via_find_root_unwraps_nested_resources_exhausted() { + // Codex regression: a DataFusionError::ResourcesExhausted buried + // inside an ArrowError::ExternalError(Box) + // wrapper would otherwise land on the outer ArrowError arm and be + // classified as IoException -- the wrong typed exception. Confirm + // that calling classify on `find_root` recovers the inner variant + // (the same shape upstream's own find_root() doctest pins). + let inner = DataFusionError::ResourcesExhausted("memory pool full".into()); + let contexted = DataFusionError::Context("aggregate stream".into(), Box::new(inner)); + let arrow_wrapped = ArrowError::ExternalError(Box::new(contexted)); + let outer = DataFusionError::ArrowError(Box::new(arrow_wrapped), None); + + // Without find_root, the outer would route to IoException; with it, + // the real root (ResourcesExhausted) wins. + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/ResourcesExhaustedException" + ); + } + + #[test] + fn classify_via_find_root_unwraps_plan_through_context() { + // A Plan error wrapped only in DataFusionError::Context still has to + // reach PlanException, not the parent class. + let inner = DataFusionError::Plan("table 't' not found".into()); + let contexted = DataFusionError::Context("logical planning".into(), Box::new(inner)); + + assert_eq!( + classify(contexted.find_root()), + "org/apache/datafusion/PlanException" + ); + } + + #[test] + fn arrow_divide_by_zero_routes_to_execution_not_io() { + // Codex regression: DataFusionError::ArrowError(ArrowError::DivideByZero) + // is the actual shape produced by `SELECT 1/0`. The 21-variant + // ArrowError grab bag means routing the whole ArrowError arm to + // IoException would put ordinary divide-by-zero / overflow / cast + // errors on the wrong typed exception. classify_arrow inspects the + // inner variant. + let outer = DataFusionError::ArrowError(Box::new(ArrowError::DivideByZero), None); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/ExecutionException" + ); + + let outer = DataFusionError::ArrowError( + Box::new(ArrowError::ArithmeticOverflow("i32 overflow".into())), + None, + ); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/ExecutionException" + ); + + let outer = DataFusionError::ArrowError( + Box::new(ArrowError::CastError("Int32 -> Date32".into())), + None, + ); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/ExecutionException" + ); + } + + #[test] + fn arrow_io_variants_still_route_to_io() { + // Genuinely IO-shaped ArrowError variants stay on IoException. + let io = std::io::Error::other("disk full"); + let outer = DataFusionError::ArrowError( + Box::new(ArrowError::IoError("write failed".into(), io)), + None, + ); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/IoException" + ); + + let outer = + DataFusionError::ArrowError(Box::new(ArrowError::IpcError("bad framing".into())), None); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/IoException" + ); + } + + #[test] + fn arrow_schema_and_parse_errors_route_to_plan() { + // SchemaError and ParseError are query-shaped problems (you wrote a + // bad query / supplied a bad schema), not execution failures. + let outer = DataFusionError::ArrowError( + Box::new(ArrowError::SchemaError("column 'foo' not found".into())), + None, + ); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/PlanException" + ); + + let outer = DataFusionError::ArrowError( + Box::new(ArrowError::ParseError("expected int".into())), + None, + ); + assert_eq!( + classify(outer.find_root()), + "org/apache/datafusion/PlanException" + ); + } +}