diff --git a/datafusion/datasource/src/file_scan_config/mod.rs b/datafusion/datasource/src/file_scan_config/mod.rs index e1fd10324373..4bf86e17d387 100644 --- a/datafusion/datasource/src/file_scan_config/mod.rs +++ b/datafusion/datasource/src/file_scan_config/mod.rs @@ -937,14 +937,19 @@ impl DataSource for FileScanConfig { /// │ → SortExec removed, fetch (LIMIT) pushed to DataSourceExec /// │ /// ├─► FileSource returns Inexact - /// │ (reverse_row_groups=true) - /// │ → SortExec kept, scan optimized + /// │ (e.g. column_in_file_schema: opener will reorder RGs at runtime) + /// │ → rebuild_with_source: sort files by stats; if the post-sort + /// │ file groups are non-overlapping AND the request now validates + /// │ AND no NULLs sit in the sort columns of non-last files, + /// │ upgrade back to Exact (SortExec removed). Otherwise stays + /// │ Inexact and SortExec is kept while the scan is still + /// │ optimised via `sort_order_for_reorder` / `reverse_row_groups`. /// │ /// └─► FileSource returns Unsupported - /// (ordering stripped because files in wrong order) + /// (e.g. expression sort key or partition column) /// → try_sort_file_groups_by_statistics(): /// 1. Sort files within each group by min/max statistics - /// 2. Re-check: non-overlapping + ordering valid? + /// 2. Re-check: non-overlapping + ordering valid + no NULLs? /// YES → Exact → SortExec removed /// NO → Inexact (files reordered, Sort stays) /// ``` @@ -973,8 +978,42 @@ impl DataSource for FileScanConfig { } } SortOrderPushdownResult::Inexact { inner } => { - Ok(SortOrderPushdownResult::Inexact { - inner: Arc::new(self.rebuild_with_source(inner, false, order)?), + let mut config = self.rebuild_with_source(inner, false, order)?; + // `rebuild_with_source` reorders files by stats; if the + // post-sort files are non-overlapping AND the request now + // validates against the new file groups, `output_ordering` + // is preserved and we can upgrade back to Exact. This + // restores the sort-elimination behaviour that lived in + // the `Unsupported` → `try_sort_file_groups_by_statistics` + // path before #21956 routed `column_in_file_schema` cases + // here. + if config.output_ordering.is_empty() { + return Ok(SortOrderPushdownResult::Inexact { + inner: Arc::new(config), + }); + } + // Upgrading to Exact: the post-sort file groups are + // non-overlapping and each file's declared ordering + // re-validates, so reading the files in their natural + // (declared-sorted) order already yields the requested + // ordering — exactly like the `Unsupported` → Exact path, + // which reads files in natural order too. + // + // Drop the runtime row-group reorder hints the Inexact + // source carried (`sort_order_for_reorder` / + // `reverse_row_groups`) by restoring the original, + // hint-free source. With the `SortExec` removed those + // hints are not just redundant but unsafe: for a DESC + // request the opener sorts row groups ASC-by-min and then + // reverses them, which reorders two row groups within a + // single file that share the same `min` incorrectly + // (e.g. a file `[10,8,8,8]` whose row groups are + // `[10,8]` and `[8,8]` would stream as `8,8,10,8`). + // The `SortExec` used to mask this; once it is gone the + // reordered stream is the final, wrong answer. + config.file_source = Arc::clone(&self.file_source); + Ok(SortOrderPushdownResult::Exact { + inner: Arc::new(config), }) } SortOrderPushdownResult::Unsupported => { diff --git a/datafusion/datasource/src/file_scan_config/sort_pushdown.rs b/datafusion/datasource/src/file_scan_config/sort_pushdown.rs index af08ed71b9a6..98ed9ba621e7 100644 --- a/datafusion/datasource/src/file_scan_config/sort_pushdown.rs +++ b/datafusion/datasource/src/file_scan_config/sort_pushdown.rs @@ -138,31 +138,76 @@ impl FileScanConfig { false }; - if is_exact && all_non_overlapping { - // Truly exact: within-file ordering guaranteed and files are non-overlapping. - // Keep output_ordering so SortExec can be eliminated for each partition. - // - // We intentionally do NOT redistribute files across groups here. - // The planning-phase bin-packing may interleave file ranges across groups: - // - // Group 0: [f1(1-10), f3(21-30)] ← interleaved with group 1 - // Group 1: [f2(11-20), f4(31-40)] - // - // This interleaving is actually beneficial because SPM pulls from both - // partitions concurrently, keeping parallel I/O active: - // - // SPM: pull P0 [1-10] → pull P1 [11-20] → pull P0 [21-30] → pull P1 [31-40] - // ^^^^^^^^^^^^ ^^^^^^^^^^^^ - // both partitions scanning files simultaneously - // - // If we were to redistribute files consecutively: - // Group 0: [f1(1-10), f2(11-20)] ← all values < group 1 - // Group 1: [f3(21-30), f4(31-40)] + // Decide whether to keep `output_ordering` (i.e. let the outer + // pushdown report `Exact` and drop `SortExec`). + // + // Two paths can produce a keep: + // + // 1. `is_exact && all_non_overlapping`: the source already had + // validated ordering and the post-sort files still don't + // overlap — Exact carries through unchanged. + // + // 2. `!is_exact && all_non_overlapping`: source returned + // `Inexact` because pre-sort `validated_output_ordering()` + // stripped the declaration (files were listed out of order + // on disk). After our stats-based sort the files are now + // non-overlapping — re-validate against the new file + // groups and, if it passes, upgrade back to Exact so the + // outer wrapper drops the `SortExec`. Without this, the + // `Inexact` branch stayed Inexact even when reorder + // restored a perfectly valid ordering, leaving an + // unnecessary `SortExec` above the source (regression + // after #21956's `column_in_file_schema` signal pushed + // this scenario into the Inexact branch instead of the + // `try_sort_file_groups_by_statistics` fallback). + // + // We intentionally do NOT redistribute files across groups here. + // The planning-phase bin-packing may interleave file ranges across groups: + // + // Group 0: [f1(1-10), f3(21-30)] ← interleaved with group 1 + // Group 1: [f2(11-20), f4(31-40)] + // + // This interleaving is actually beneficial because SPM pulls from both + // partitions concurrently, keeping parallel I/O active. + let keep_ordering = match (all_non_overlapping, is_exact) { + // Files still overlap after the stats sort — the combined + // stream isn't ordered, so `output_ordering` must be dropped. + (false, _) => false, + // Source already had validated ordering and the post-sort + // files still don't overlap — Exact carries through. + (true, true) => true, + // Source returned `Inexact`; re-validate against the + // reordered file groups to decide whether to upgrade. // - // SPM would read ALL of group 0 first (values always smaller), then group 1. - // This degrades to single-threaded sequential I/O — the other partition - // sits idle the entire time, losing the parallelism benefit. - } else { + // Same NULL guard as `try_sort_file_groups_by_statistics`: + // we cannot claim Exact if any non-last file contains + // NULLs in the sort columns. With NULLS LAST those + // NULLs sit after all non-null rows in the file, so + // when the next file's non-nulls are smaller than the + // previous file's max, they'd appear *after* the NULLs + // in the concatenated stream — breaking the ordering. + (true, false) => { + let projected_schema = new_config.projected_schema()?; + let projection_indices = new_config + .file_source + .projection() + .as_ref() + .and_then(|p| ordered_column_indices_from_projection(p)); + if any_file_has_nulls_in_sort_columns( + &new_config.file_groups, + order, + &projected_schema, + projection_indices.as_deref(), + ) { + false + } else { + let new_eq_props = new_config.eq_properties(); + new_eq_props.ordering_satisfy(order.iter().cloned())? + } + } + }; + + if !keep_ordering { new_config.output_ordering = vec![]; } diff --git a/datafusion/sqllogictest/test_files/sort_pushdown.slt b/datafusion/sqllogictest/test_files/sort_pushdown.slt index 540562eb3bc8..36fb38f5b402 100644 --- a/datafusion/sqllogictest/test_files/sort_pushdown.slt +++ b/datafusion/sqllogictest/test_files/sort_pushdown.slt @@ -1100,8 +1100,9 @@ CREATE EXTERNAL TABLE reversed_parquet(id INT, value INT) STORED AS PARQUET LOCATION 'test_files/scratch/sort_pushdown/reversed/'; -# Test 4.1: PushdownSort reorders files by min/max statistics so they are -# already in correct sort order → non-overlapping → no SortExec needed. +# Test 4.1: PushdownSort reorders files by min/max statistics; the +# post-sort file groups are non-overlapping, the inferred ordering +# re-validates, and the SortExec above can be eliminated. # (files reordered from [a_high, b_mid, c_low] to [c_low, b_mid, a_high]) query TT EXPLAIN SELECT * FROM reversed_parquet ORDER BY id ASC; @@ -1109,9 +1110,7 @@ EXPLAIN SELECT * FROM reversed_parquet ORDER BY id ASC; logical_plan 01)Sort: reversed_parquet.id ASC NULLS LAST 02)--TableScan: reversed_parquet projection=[id, value] -physical_plan -01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] -02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 ASC NULLS LAST] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet # Test 4.2: Results must be correct query II @@ -1175,10 +1174,153 @@ SELECT * FROM overlap_parquet ORDER BY id ASC; 5 500 6 600 +# Test 5b: Safety case — no WITH ORDER, files written without ORDER BY (no +# sorting_columns metadata). Source has no way to declare per-file ordering, +# so even though min/max stats happen to be non-overlapping, the optimizer +# must NOT eliminate SortExec. +statement ok +CREATE TABLE no_decl_low(id INT, value INT) AS VALUES (1, 100), (3, 300), (2, 200); + +statement ok +CREATE TABLE no_decl_mid(id INT, value INT) AS VALUES (6, 600), (4, 400), (5, 500); + +statement ok +CREATE TABLE no_decl_high(id INT, value INT) AS VALUES (9, 900), (8, 800), (7, 700); + +# Write WITHOUT ORDER BY so each file lacks sorting_columns metadata. +query I +COPY no_decl_low TO 'test_files/scratch/sort_pushdown/no_decl/a_low.parquet'; +---- +3 + +query I +COPY no_decl_mid TO 'test_files/scratch/sort_pushdown/no_decl/b_mid.parquet'; +---- +3 + +query I +COPY no_decl_high TO 'test_files/scratch/sort_pushdown/no_decl/c_high.parquet'; +---- +3 + +statement ok +CREATE EXTERNAL TABLE no_decl_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/no_decl/'; + +# Min/max stats per file happen to be non-overlapping (1-3, 4-6, 7-9) but the +# rows inside each file are NOT sorted by id. Without an ordering declaration +# (WITH ORDER or parquet sorting_columns), the optimizer cannot prove the +# output would be sorted — SortExec must stay. +query TT +EXPLAIN SELECT * FROM no_decl_parquet ORDER BY id ASC; +---- +logical_plan +01)Sort: no_decl_parquet.id ASC NULLS LAST +02)--TableScan: no_decl_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/no_decl/a_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/no_decl/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/no_decl/c_high.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 ASC NULLS LAST] + +# Results must still be correct (SortExec does the final sort) +query II +SELECT * FROM no_decl_parquet ORDER BY id ASC; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +7 700 +8 800 +9 900 + +# Cleanup Test 5b +statement ok +DROP TABLE no_decl_low; + +statement ok +DROP TABLE no_decl_mid; + +statement ok +DROP TABLE no_decl_high; + +statement ok +DROP TABLE no_decl_parquet; + +# Test 5c: NULL safety — files in **wrong** filesystem order so the +# Inexact branch fires; the previously-non-last file contains NULLs in +# the sort column. With NULLS LAST, NULLs inside a file sit after all +# non-null rows. If the next file's non-null values are smaller than +# the previous file's max, those values would land AFTER the NULLs in +# the concatenated stream — breaking the ordering. The fix must NOT +# upgrade to Exact here even though stats are non-overlapping. + +statement ok +CREATE TABLE null_safety_high(id INT, value INT) AS VALUES (4, 400), (5, 500), (6, 600); + +statement ok +CREATE TABLE null_safety_low_with_nulls(id INT, value INT) AS VALUES (1, 100), (2, 200), (3, 300), (NULL, 999); + +# Name files so alphabetical order is REVERSED relative to id order +# (a_high before b_low) — triggers the Inexact / re-validate path. +query I +COPY (SELECT * FROM null_safety_high ORDER BY id ASC NULLS LAST) +TO 'test_files/scratch/sort_pushdown/null_safety/a_high.parquet'; +---- +3 + +query I +COPY (SELECT * FROM null_safety_low_with_nulls ORDER BY id ASC NULLS LAST) +TO 'test_files/scratch/sort_pushdown/null_safety/b_low_nulls.parquet'; +---- +4 + +statement ok +CREATE EXTERNAL TABLE null_safety_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/null_safety/' +WITH ORDER (id ASC NULLS LAST); + +# After Phase 2 reorder file_groups would be [b_low_nulls, a_high] and +# min/max would be non-overlapping — but b_low_nulls has NULLs in the +# sort column, so we must NOT upgrade to Exact. SortExec stays. +query TT +EXPLAIN SELECT * FROM null_safety_parquet ORDER BY id ASC NULLS LAST; +---- +logical_plan +01)Sort: null_safety_parquet.id ASC NULLS LAST +02)--TableScan: null_safety_parquet projection=[id, value] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/null_safety/b_low_nulls.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/null_safety/a_high.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 ASC NULLS LAST] + +# Results must still be correct (SortExec does the final sort) +query II +SELECT * FROM null_safety_parquet ORDER BY id ASC NULLS LAST; +---- +1 100 +2 200 +3 300 +4 400 +5 500 +6 600 +NULL 999 + +statement ok +DROP TABLE null_safety_high; + +statement ok +DROP TABLE null_safety_low_with_nulls; + +statement ok +DROP TABLE null_safety_parquet; + # Test 6: WITH ORDER + reversed filesystem order # Same file setup as Test 4 but explicitly declaring ordering via WITH ORDER. -# Even with WITH ORDER, the optimizer should detect that inter-file order is wrong -# and keep SortExec. +# PushdownSort reorders files by min/max stats; after reorder the inter-file +# ordering re-validates and the SortExec above is eliminated. statement ok CREATE EXTERNAL TABLE reversed_with_order_parquet(id INT, value INT) @@ -1194,9 +1336,7 @@ EXPLAIN SELECT * FROM reversed_with_order_parquet ORDER BY id ASC; logical_plan 01)Sort: reversed_with_order_parquet.id ASC NULLS LAST 02)--TableScan: reversed_with_order_parquet projection=[id, value] -physical_plan -01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] -02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 ASC NULLS LAST] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/c_low.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/reversed/a_high.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet # Test 6.2: Results must be correct query II @@ -1333,9 +1473,7 @@ EXPLAIN SELECT * FROM desc_reversed_parquet ORDER BY id DESC; logical_plan 01)Sort: desc_reversed_parquet.id DESC NULLS FIRST 02)--TableScan: desc_reversed_parquet projection=[id, value] -physical_plan -01)SortExec: expr=[id@0 DESC], preserve_partitioning=[false] -02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 DESC], reverse_row_groups=true +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/b_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/desc_reversed/a_low.parquet]]}, projection=[id, value], output_ordering=[id@0 DESC], file_type=parquet # Test 8.2: Results must be correct query II @@ -1348,6 +1486,78 @@ SELECT * FROM desc_reversed_parquet ORDER BY id DESC; 2 200 1 100 +# Test 8b: DESC with multiple row groups per file sharing a min value. +# Regression test for the Inexact→Exact upgrade: when SortExec is eliminated +# the files must be read in natural order. The opener's runtime row-group +# reorder (sort ASC-by-min then reverse) mis-orders two row groups in one file +# that share the same min — so the upgrade must NOT leave those hints active. +# +# File b_high is DESC-sorted [10,8,8,8] written with 2 rows per row group: +# RG0 = [10, 8] (min 8, max 10) +# RG1 = [ 8, 8] (min 8, max 8) +# Both row groups have min=8. Naively reordering RGs ASC-by-min then reversing +# yields [RG1, RG0] → 8,8,10,8 (wrong). Natural order [RG0, RG1] is correct. + +statement ok +CREATE TABLE rg_desc_high(id INT, value INT) AS VALUES (10, 100), (8, 801), (8, 802), (8, 803); + +statement ok +CREATE TABLE rg_desc_low(id INT, value INT) AS VALUES (3, 300), (2, 200), (1, 100); + +query I +COPY (SELECT * FROM rg_desc_high ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/rg_desc/b_high.parquet' +OPTIONS ('format.max_row_group_size' '2'); +---- +4 + +query I +COPY (SELECT * FROM rg_desc_low ORDER BY id DESC) +TO 'test_files/scratch/sort_pushdown/rg_desc/a_low.parquet' +OPTIONS ('format.max_row_group_size' '2'); +---- +3 + +# Files named so filesystem order [a_low, b_high] is wrong for DESC → the +# Inexact path fires, stats reorder makes file groups [b_high, a_low] +# non-overlapping, and the upgrade eliminates SortExec. +statement ok +CREATE EXTERNAL TABLE rg_desc_parquet(id INT, value INT) +STORED AS PARQUET +LOCATION 'test_files/scratch/sort_pushdown/rg_desc/' +WITH ORDER (id DESC); + +# SortExec eliminated, files reordered, NO sort_order_for_reorder / +# reverse_row_groups (natural read is correct after the upgrade). +query TT +EXPLAIN SELECT id FROM rg_desc_parquet ORDER BY id DESC; +---- +logical_plan +01)Sort: rg_desc_parquet.id DESC NULLS FIRST +02)--TableScan: rg_desc_parquet projection=[id] +physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/rg_desc/b_high.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/rg_desc/a_low.parquet]]}, projection=[id], output_ordering=[id@0 DESC], file_type=parquet + +# Results must be in DESC order — id=10 first. +query I +SELECT id FROM rg_desc_parquet ORDER BY id DESC; +---- +10 +8 +8 +8 +3 +2 +1 + +statement ok +DROP TABLE rg_desc_parquet; + +statement ok +DROP TABLE rg_desc_high; + +statement ok +DROP TABLE rg_desc_low; + # Test 9: Multi-column sort key validation # Files have (category, id) ordering. Files share a boundary value on category='B' # so column-level min/max statistics overlap on the primary key column. @@ -2218,7 +2428,7 @@ STORED AS PARQUET LOCATION 'test_files/scratch/sort_pushdown/tg_buffer/' WITH ORDER (id ASC); -# Test G.1: BufferExec appears between SPM and DataSourceExec +# Test G.1: SortExec eliminated; BufferExec replaces it between SPM and DataSourceExec query TT EXPLAIN SELECT * FROM tg_buffer ORDER BY id ASC; ---- @@ -2227,8 +2437,8 @@ logical_plan 02)--TableScan: tg_buffer projection=[id, value] physical_plan 01)SortPreservingMergeExec: [id@0 ASC NULLS LAST] -02)--SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/c_low.parquet]]}, projection=[id, value], file_type=parquet, sort_order_for_reorder=[id@0 ASC NULLS LAST] +02)--BufferExec: capacity=1073741824 +03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/c_low.parquet]]}, projection=[id, value], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet # Verify correctness query II @@ -2245,7 +2455,7 @@ SELECT * FROM tg_buffer ORDER BY id ASC; 9 900 10 1000 -# Test G.2: LIMIT query with BufferExec +# Test G.2: LIMIT query — SortExec eliminated, limit pushed to source; BufferExec stays query TT EXPLAIN SELECT * FROM tg_buffer ORDER BY id ASC LIMIT 3; ---- @@ -2254,8 +2464,8 @@ logical_plan 02)--TableScan: tg_buffer projection=[id, value] physical_plan 01)SortPreservingMergeExec: [id@0 ASC NULLS LAST], fetch=3 -02)--SortExec: TopK(fetch=3), expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true] -03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/c_low.parquet]]}, projection=[id, value], file_type=parquet, predicate=DynamicFilter [ empty ], sort_order_for_reorder=[id@0 ASC NULLS LAST] +02)--BufferExec: capacity=1073741824 +03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/b_mid.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/a_high.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/tg_buffer/c_low.parquet]]}, projection=[id, value], limit=3, output_ordering=[id@0 ASC NULLS LAST], file_type=parquet query II SELECT * FROM tg_buffer ORDER BY id ASC LIMIT 3;