apache · gene-bordegaray · Jun 4, 2026 · stuhood · Jun 5, 2026 · gene-bordegaray
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
@@ -30,6 +30,7 @@ mod dfschema;
 mod functional_dependencies;
 mod join_type;
 mod param_value;
+mod partitioning;
 mod schema_reference;
 mod table_reference;
 mod unnest;
@@ -92,6 +93,7 @@ pub use join_type::{JoinConstraint, JoinSide, JoinType};
 pub use nested_struct::cast_column;
 pub use null_equality::NullEquality;
 pub use param_value::ParamValues;
+pub use partitioning::{SplitPoint, validate_range_split_points};
 pub use scalar::{ScalarType, ScalarValue};
 pub use schema_reference::SchemaReference;
 pub use spans::{Location, Span, Spans};

diff --git a/datafusion/common/src/partitioning.rs b/datafusion/common/src/partitioning.rs
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::utils::compare_rows;
+use crate::{Result, ScalarValue, error::_plan_err};
+use arrow::compute::SortOptions;
+use std::cmp::Ordering;
+use std::fmt::{self, Display};
+
+/// A boundary between adjacent range partitions.
+///
+/// A split point is a tuple with one [`ScalarValue`] per partitioning
+/// expression. Split points are interpreted lexicographically according to the
+/// ordering of the range partitioning that owns them.
+///
+/// `N` split points define `N + 1` partitions:
+///
+/// ```text
+/// partition 0: key < split_points[0]
+/// partition 1: split_points[0] <= key < split_points[1]
+/// ...
+/// partition N - 1: split_points[N - 2] <= key < split_points[N - 1]
+/// partition N: split_points[N - 1] <= key
+/// ```
+///
+/// Values equal to split point `i` belong to partition `i + 1`, so interior
+/// partitions are lower-inclusive and upper-exclusive.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
+pub struct SplitPoint {
+    values: Vec<ScalarValue>,
+}
+
+impl SplitPoint {
+    /// Creates a new split point from its tuple values.
+    pub fn new(values: Vec<ScalarValue>) -> Self {
+        Self { values }
+    }
+
+    /// Returns the tuple values for this split point.
+    pub fn values(&self) -> &[ScalarValue] {
+        &self.values
+    }
+}
+
+impl Display for SplitPoint {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let values = self
+            .values
+            .iter()
+            .map(ToString::to_string)
+            .collect::<Vec<_>>()
+            .join(", ");
+        write!(f, "({values})")
+    }
+}
+
+/// Validates that split points match the ordering width and are strictly
+/// ordered according to the provided sort options.
+pub fn validate_range_split_points(
+    split_points: &[SplitPoint],
+    sort_options: &[SortOptions],
+) -> Result<()> {
+    let width = sort_options.len();
+    for (idx, split_point) in split_points.iter().enumerate() {
+        let split_point_width = split_point.values().len();
+        if split_point_width != width {
+            return _plan_err!(
+                "Range partitioning split point {idx} has width {split_point_width}, but ordering has width {width}"
+            );
+        }
+    }
+
+    for (idx, split_points) in split_points.windows(2).enumerate() {
+        if compare_rows(
+            split_points[0].values(),
+            split_points[1].values(),
+            sort_options,
+        )? != Ordering::Less
+        {
+            return _plan_err!(
+                "Range partitioning split points must be strictly ordered: split point {idx} ({}) must be less than split point {} ({})",
+                split_points[0],
+                idx + 1,
+                split_points[1]
+            );
+        }
+    }
+
+    Ok(())
+}
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -1264,6 +1264,11 @@ impl DefaultPhysicalPlanner {
                             .collect::<Result<Vec<_>>>()?;
                         Partitioning::Hash(runtime_expr, *n)
                     }
+                    LogicalPartitioning::Range(_) => {
+                        return not_impl_err!(
+                            "Physical plan does not support Range repartitioning"
+                        );
+                    }
                     LogicalPartitioning::DistributeBy(_) => {
                         return not_impl_err!(
                             "Physical plan does not support DistributeBy partitioning"
@@ -3245,8 +3250,8 @@ mod tests {
     use arrow_schema::{FieldRef, SchemaRef};
     use datafusion_common::config::ConfigOptions;
     use datafusion_common::{
-        DFSchemaRef, ScalarValue, TableReference, ToDFSchema as _, assert_batches_eq,
-        assert_contains,
+        DFSchemaRef, ScalarValue, SplitPoint, TableReference, ToDFSchema as _,
+        assert_batches_eq, assert_contains,
     };
     use datafusion_execution::TaskContext;
     use datafusion_execution::runtime_env::RuntimeEnv;
@@ -3255,8 +3260,8 @@ mod tests {
     use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
     use datafusion_expr::{
         Accumulator, AggregateUDF, AggregateUDFImpl, ExprFunctionExt, LogicalPlanBuilder,
-        Signature, TableSource, UserDefinedLogicalNodeCore, Volatility,
-        WindowFunctionDefinition, col, lit,
+        RangePartitioning, Signature, TableSource, UserDefinedLogicalNodeCore,
+        Volatility, WindowFunctionDefinition, col, lit,
     };
     use datafusion_functions_aggregate::count::{count_all, count_udaf};
     use datafusion_functions_aggregate::expr_fn::sum;
@@ -3300,6 +3305,25 @@ mod tests {
         aggregate_explain(&logical_plan).await
     }
 
+    #[tokio::test]
+    async fn logical_range_repartition_is_not_supported() -> Result<()> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let logical_plan = scan_empty(None, &schema, None)?
+            .repartition(LogicalPartitioning::Range(RangePartitioning::try_new(
+                vec![col("a").sort(true, true)],
+                vec![SplitPoint::new(vec![ScalarValue::Int32(Some(10))])],
+            )?))?
+            .build()?;
+
+        let err = plan(&logical_plan).await.unwrap_err();
+        assert_contains!(
+            err.to_string(),
+            "Physical plan does not support Range repartitioning"
+        );
+
+        Ok(())
+    }
+
     fn int64_field(name: &str, nullable: bool) -> Field {
         Field::new(name, DataType::Int64, nullable)
     }

diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
@@ -515,6 +515,23 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                         "Partitioning Key": hash_expr
                     })
                 }
+                Partitioning::Range(range) => {
+                    let range_expr: Vec<String> =
+                        range.ordering().iter().map(|e| format!("{e}")).collect();
+                    let split_points: Vec<String> = range
+                        .split_points()
+                        .iter()
+                        .map(|e| format!("{e}"))
+                        .collect();
+
+                    json!({
+                        "Node Type": "Repartition",
+                        "Partitioning Scheme": "Range",
+                        "Partition Count": range.partition_count(),
+                        "Partitioning Key": range_expr,
+                        "Split Points": split_points
+                    })
+                }
                 Partitioning::DistributeBy(expr) => {
                     let dist_by_expr: Vec<String> =
                         expr.iter().map(|e| format!("{e}")).collect();

diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
@@ -41,9 +41,9 @@ pub use plan::{
     Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct, DistinctOn,
     EmptyRelation, Explain, ExplainOption, Extension, FetchType, Filter, Join,
     JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, Projection,
-    RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
-    SubqueryAlias, TableScan, TableScanBuilder, ToStringifiedPlan, Union, Unnest, Values,
-    Window, projection_schema,
+    RangePartitioning, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan,
+    Subquery, SubqueryAlias, TableScan, TableScanBuilder, ToStringifiedPlan, Union,
+    Unnest, Values, Window, projection_schema,
 };
 pub use statement::{
     Deallocate, Execute, Prepare, ResetVariable, SetVariable, Statement,