From 605c6eda9001dbca79f77ba3357edd5419214cf3 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Tue, 14 Apr 2026 09:20:52 -0500
Subject: [PATCH] Port filter_pushdown.rs async tests to sqllogictest

Port 24 end-to-end filter-pushdown tests out of
`datafusion/core/tests/physical_optimizer/filter_pushdown.rs` into the
sqllogictest suite. The new `datafusion.explain.analyze_categories`
session config lets `EXPLAIN ANALYZE` emit only deterministic metric
categories ('rows'), so these tests can assert directly on the
`predicate=DynamicFilter [ ... ]` text without `<slt:ignore>` scrubbing
around timing/bytes.

## What moved

New/extended tests in
`datafusion/sqllogictest/test_files/push_down_filter_parquet.slt`:

- TopK dynamic filter pushdown (single-col, multi-col sort,
  integration with max_row_group_size=128 and pushdown_rows_matched /
  pushdown_rows_pruned counters)
- HashJoin CollectLeft dynamic filter with `struct(a, b) IN (SET)` shape
- Nested hash joins (filter propagates to both inner scans)
- Parent filter split across the two sides of a HashJoin
- TopK above HashJoin (both dynamic filters ANDed on the probe scan)
- Dynamic filter through a GROUP BY between HashJoin and probe scan
- TopK projection rewrite (reorder, prune, expression, alias shadowing)
- NULL-bearing build-side join keys
- LEFT JOIN and LEFT SEMI JOIN dynamic filter pushdown
- HashTable strategy (`hash_lookup`) via
  `hash_join_inlist_pushdown_max_size = 1` on both string and integer
  multi-column keys

New tests in
`datafusion/sqllogictest/test_files/push_down_filter_regression.slt`:

- Aggregate dynamic filter baseline: MIN(a), MAX(a), MIN(a) + MAX(a),
  MIN(a) + MAX(b), mixed MIN/MAX with unsupported expression input,
  all-NULL input (filter stays `true`), MIN(a+1) (no filter emitted)
- Filter on grouping column pushes through AggregateExec
- Filter on aggregate result (HAVING count > 5) stays above the aggregate
- End-to-end aggregate dynamic filter pruning a multi-file parquet scan

## What stayed in Rust

Ten async tests were marked non-portable with a short comment explaining
why. In short: they either hand-wire `PartitionMode::Partitioned` /
`RepartitionExec` structures SQL never constructs, assert via debug APIs
(`dynamic_filter_for_test()`, `apply_expressions` +
`downcast_ref::<DynamicFilterPhysicalExpr>`) that are not observable
from SQL, or target the specific stacked-`FilterExec` shape that the
logical optimizer collapses before physical planning.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../physical_optimizer/filter_pushdown.rs     | 2356 +----------------
 .../test_files/push_down_filter_parquet.slt   |  857 ++++++
 .../push_down_filter_regression.slt           |  311 ++-
 3 files changed, 1295 insertions(+), 2229 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
index a255c07545829..d058e44a85d00 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown.rs
@@ -18,7 +18,7 @@
 use std::sync::{Arc, LazyLock};
 
 use arrow::{
-    array::{Float64Array, Int32Array, RecordBatch, StringArray, record_batch},
+    array::record_batch,
     datatypes::{DataType, Field, Schema, SchemaRef},
     util::pretty::pretty_format_batches,
 };
@@ -30,11 +30,10 @@ use datafusion::{
         PhysicalExpr,
         expressions::{BinaryExpr, Column, Literal},
     },
-    prelude::{ParquetReadOptions, SessionConfig, SessionContext},
+    prelude::{SessionConfig, SessionContext},
     scalar::ScalarValue,
 };
 use datafusion_catalog::memory::DataSourceExec;
-use datafusion_common::JoinType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_datasource::{
     PartitionedFile, file_groups::FileGroup, file_scan_config::FileScanConfigBuilder,
@@ -42,19 +41,14 @@ use datafusion_datasource::{
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_expr::ScalarUDF;
 use datafusion_functions::math::random::RandomFunc;
-use datafusion_functions_aggregate::{
-    count::count_udaf,
-    min_max::{max_udaf, min_udaf},
-};
+use datafusion_functions_aggregate::{count::count_udaf, min_max::min_udaf};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, expressions::col};
 use datafusion_physical_expr::{
-    Partitioning, ScalarFunctionExpr,
-    aggregate::{AggregateExprBuilder, AggregateFunctionExpr},
+    Partitioning, ScalarFunctionExpr, aggregate::AggregateExprBuilder,
 };
 use datafusion_physical_optimizer::{
     PhysicalOptimizerRule, filter_pushdown::FilterPushdown,
 };
-use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 use datafusion_physical_plan::{
     ExecutionPlan,
     aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy},
@@ -70,9 +64,7 @@ use super::pushdown_utils::{
     OptimizationTest, TestNode, TestScanBuilder, TestSource, format_plan_for_test,
 };
 use datafusion_physical_plan::union::UnionExec;
-use futures::StreamExt;
-use object_store::{ObjectStore, memory::InMemory};
-use regex::Regex;
+use object_store::memory::InMemory;
 
 #[test]
 fn test_pushdown_into_scan() {
@@ -176,128 +168,11 @@ fn test_pushdown_into_scan_with_config_options() {
     );
 }
 
-#[tokio::test]
-async fn test_dynamic_filter_pushdown_through_hash_join_with_topk() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8View, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8View, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("e", Utf8View, ["ba", "bb", "bc", "bd"]),
-            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("e", DataType::Utf8View, false),
-        Field::new("f", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec
-    let on = vec![(
-        col("a", &build_side_schema).unwrap(),
-        col("d", &probe_side_schema).unwrap(),
-    )];
-    let join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    let join_schema = join.schema();
-
-    // Finally let's add a SortExec on the outside to test pushdown of dynamic filters
-    let sort_expr =
-        PhysicalSortExpr::new(col("e", &join_schema).unwrap(), SortOptions::default());
-    let plan = Arc::new(
-        SortExec::new(LexOrdering::new(vec![sort_expr]).unwrap(), join)
-            .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    let mut config = ConfigOptions::default();
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    config.execution.parquet.pushdown_filters = true;
-
-    // Apply the FilterPushdown optimizer rule
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(Arc::clone(&plan), &config)
-        .unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false]
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ] AND DynamicFilter [ empty ]
-    "
-    );
-
-    // Put some data through the plan to check that the filter is updated to reflect the TopK state
-    let session_ctx = SessionContext::new_with_config(SessionConfig::new());
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    // NOTE: We dropped the CASE expression here because we now optimize that away if there's only 1 partition
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[e@4 ASC], preserve_partitioning=[false], filter=[e@4 IS NULL OR e@4 < bb]
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 IS NULL OR e@1 < bb ]
-    "
-    );
-}
-
-// Test both static and dynamic filter pushdown in HashJoinExec.
-// Note that static filter pushdown is rare: it should have already happened in the logical optimizer phase.
-// However users may manually construct plans that could result in a FilterExec -> HashJoinExec -> Scan setup.
-// Dynamic filters arise in cases such as nested inner joins or TopK -> HashJoinExec -> Scan setups.
+// Inner-join part is covered by push_down_filter_parquet.slt::test_hashjoin_parent_filter_pushdown.
+// The Left-join part stays in Rust: SQL's outer-join-elimination rewrites
+// `LEFT JOIN ... WHERE <probe-side-null-rejecting>` into an INNER JOIN
+// before physical filter pushdown runs, so the preserved-vs-non-preserved
+// distinction this test exercises is not reachable via SQL.
 #[tokio::test]
 async fn test_static_filter_pushdown_through_hash_join() {
     use datafusion_common::JoinType;
@@ -926,175 +801,11 @@ fn test_node_handles_child_pushdown_result() {
     );
 }
 
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown() {
-    let batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["bd", "bc"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-        record_batch!(
-            ("a", Utf8, ["ac", "ad"]),
-            ("b", Utf8, ["bb", "ba"]),
-            ("c", Float64, [2.0, 1.0])
-        )
-        .unwrap(),
-    ];
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-    let plan = Arc::new(
-        SortExec::new(
-            LexOrdering::new(vec![PhysicalSortExpr::new(
-                col("b", &schema()).unwrap(),
-                SortOptions::new(true, false), // descending, nulls_first
-            )])
-            .unwrap(),
-            Arc::clone(&scan),
-        )
-        .with_fetch(Some(1)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Actually apply the optimization to the plan and put some data through it to check that the filter is updated to reflect the TopK state
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - SortExec: TopK(fetch=1), expr=[b@1 DESC NULLS LAST], preserve_partitioning=[false], filter=[b@1 > bd]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@1 > bd ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown_multi_column_sort() {
-    let batches = vec![
-        // We are going to do ORDER BY b ASC NULLS LAST, a DESC
-        // And we put the values in such a way that the first batch will fill the TopK
-        // and we skip the second batch.
-        record_batch!(
-            ("a", Utf8, ["ac", "ad"]),
-            ("b", Utf8, ["bb", "ba"]),
-            ("c", Float64, [2.0, 1.0])
-        )
-        .unwrap(),
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["bc", "bd"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-    let plan = Arc::new(
-        SortExec::new(
-            LexOrdering::new(vec![
-                PhysicalSortExpr::new(
-                    col("b", &schema()).unwrap(),
-                    SortOptions::default().asc().nulls_last(),
-                ),
-                PhysicalSortExpr::new(
-                    col("a", &schema()).unwrap(),
-                    SortOptions::default().desc().nulls_first(),
-                ),
-            ])
-            .unwrap(),
-            Arc::clone(&scan),
-        )
-        .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Actually apply the optimization to the plan and put some data through it to check that the filter is updated to reflect the TopK state
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    let res = stream.next().await.unwrap().unwrap();
-    #[rustfmt::skip]
-    let expected = [
-        "+----+----+-----+",
-        "| a  | b  | c   |",
-        "+----+----+-----+",
-        "| ad | ba | 1.0 |",
-        "| ac | bb | 2.0 |",
-        "+----+----+-----+",
-    ];
-    assert_batches_eq!(expected, &[res]);
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false], filter=[b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac) ]
-    "
-    );
-    // There should be no more batches
-    assert!(stream.next().await.is_none());
-}
-
+// Not portable to sqllogictest: requires manually constructing
+// `SortExec(CoalescePartitionsExec(scan))`. A SQL `ORDER BY ... LIMIT` over a
+// multi-partition scan plans as `SortPreservingMergeExec(SortExec(scan))`
+// instead, so the filter-through-coalesce path this test exercises is not
+// reachable via SQL.
 #[tokio::test]
 async fn test_topk_filter_passes_through_coalesce_partitions() {
     // Create multiple batches for different partitions
@@ -1163,129 +874,12 @@ async fn test_topk_filter_passes_through_coalesce_partitions() {
     );
 }
 
+// Not portable to sqllogictest: this test pins `PartitionMode::Partitioned`
+// by hand-wiring `RepartitionExec(Hash, 12)` on both join sides. A SQL
+// INNER JOIN over small parquet inputs plans as `CollectLeft`, so the
+// per-partition CASE filter this test exercises is not reachable via SQL.
 #[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0]) // Extra column not used in join
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0]) // Extra column not used in join
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with dynamic filter
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // expect the predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization to the plan and execute to see the filter in action
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test for https://github.com/apache/datafusion/pull/17371: dynamic filter linking survives `with_new_children`
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Iterate one batch
-    stream.next().await.unwrap().unwrap();
-
-    // Now check what our filter looks like
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
+async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     use datafusion_common::JoinType;
     use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
 
@@ -1533,6 +1127,12 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() {
     );
 }
 
+// Not portable to sqllogictest: this test specifically pins a
+// `RepartitionExec(Hash, 12)` between `HashJoinExec(CollectLeft)` and the
+// probe-side scan to verify the dynamic filter link survives that boundary
+// (regression for #17451). The same CollectLeft filter content and
+// pushdown counters are already covered by the simpler slt port
+// (push_down_filter_parquet.slt::test_hashjoin_dynamic_filter_pushdown).
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     use datafusion_common::JoinType;
@@ -1705,265 +1305,6 @@ async fn test_hashjoin_dynamic_filter_pushdown_collect_left() {
     );
 }
 
-#[tokio::test]
-async fn test_nested_hashjoin_dynamic_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create test data for three tables: t1, t2, t3
-    // t1: small table with limited values (will be build side of outer join)
-    let t1_batches = vec![
-        record_batch!(("a", Utf8, ["aa", "ab"]), ("x", Float64, [1.0, 2.0])).unwrap(),
-    ];
-    let t1_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("x", DataType::Float64, false),
-    ]));
-    let t1_scan = TestScanBuilder::new(Arc::clone(&t1_schema))
-        .with_support(true)
-        .with_batches(t1_batches)
-        .build();
-
-    // t2: larger table (will be probe side of inner join, build side of outer join)
-    let t2_batches = vec![
-        record_batch!(
-            ("b", Utf8, ["aa", "ab", "ac", "ad", "ae"]),
-            ("c", Utf8, ["ca", "cb", "cc", "cd", "ce"]),
-            ("y", Float64, [1.0, 2.0, 3.0, 4.0, 5.0])
-        )
-        .unwrap(),
-    ];
-    let t2_schema = Arc::new(Schema::new(vec![
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Utf8, false),
-        Field::new("y", DataType::Float64, false),
-    ]));
-    let t2_scan = TestScanBuilder::new(Arc::clone(&t2_schema))
-        .with_support(true)
-        .with_batches(t2_batches)
-        .build();
-
-    // t3: largest table (will be probe side of inner join)
-    let t3_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["ca", "cb", "cc", "cd", "ce", "cf", "cg", "ch"]),
-            ("z", Float64, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
-        )
-        .unwrap(),
-    ];
-    let t3_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("z", DataType::Float64, false),
-    ]));
-    let t3_scan = TestScanBuilder::new(Arc::clone(&t3_schema))
-        .with_support(true)
-        .with_batches(t3_batches)
-        .build();
-
-    // Create nested join structure:
-    // Join (t1.a = t2.b)
-    // /        \
-    // t1    Join(t2.c = t3.d)
-    //         /    \
-    //        t2   t3
-
-    // First create inner join: t2.c = t3.d
-    let inner_join_on =
-        vec![(col("c", &t2_schema).unwrap(), col("d", &t3_schema).unwrap())];
-    let inner_join = Arc::new(
-        HashJoinExec::try_new(
-            t2_scan,
-            t3_scan,
-            inner_join_on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Then create outer join: t1.a = t2.b (from inner join result)
-    let outer_join_on = vec![(
-        col("a", &t1_schema).unwrap(),
-        col("b", &inner_join.schema()).unwrap(),
-    )];
-    let outer_join = Arc::new(
-        HashJoinExec::try_new(
-            t1_scan,
-            inner_join as Arc<dyn ExecutionPlan>,
-            outer_join_on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Test that dynamic filters are pushed down correctly through nested joins
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&outer_join), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-        -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-          -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Execute the plan to verify the dynamic filters are properly updated
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(outer_join, &config)
-        .unwrap();
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    // Execute to populate the dynamic filters
-    stream.next().await.unwrap().unwrap();
-
-    // Verify that both the inner and outer join have updated dynamic filters
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@0)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, x], file_type=test, pushdown_supported=true
-    -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, d@0)]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[b, c, y], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ]
-    -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, z], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND d@0 IN (SET) ([ca, cb]) ]
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_parent_filter_pushdown() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values
-    let probe_batches = vec![
-        record_batch!(
-            ("d", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("e", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("f", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("d", DataType::Utf8, false),
-        Field::new("e", DataType::Utf8, false),
-        Field::new("f", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec
-    let on = vec![(
-        col("a", &build_side_schema).unwrap(),
-        col("d", &probe_side_schema).unwrap(),
-    )];
-    let join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_scan,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::Partitioned,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Create filters that can be pushed down to different sides
-    // We need to create filters in the context of the join output schema
-    let join_schema = join.schema();
-
-    // Filter on build side column: a = 'aa'
-    let left_filter = col_lit_predicate("a", "aa", &join_schema);
-    // Filter on probe side column: e = 'ba'
-    let right_filter = col_lit_predicate("e", "ba", &join_schema);
-    // Filter that references both sides: a = d (should not be pushed down)
-    let cross_filter = Arc::new(BinaryExpr::new(
-        col("a", &join_schema).unwrap(),
-        Operator::Eq,
-        col("d", &join_schema).unwrap(),
-    )) as Arc<dyn PhysicalExpr>;
-
-    let filter =
-        Arc::new(FilterExec::try_new(left_filter, Arc::clone(&join) as _).unwrap());
-    let filter = Arc::new(FilterExec::try_new(right_filter, filter).unwrap());
-    let plan = Arc::new(FilterExec::try_new(cross_filter, filter).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = d@3
-        -   FilterExec: e@4 = ba
-        -     FilterExec: a@0 = aa
-        -       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-        -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: a@0 = d@3
-          -   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, d@0)]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = aa
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[d, e, f], file_type=test, pushdown_supported=true, predicate=e@1 = ba
-    "
-    );
-}
-
 #[test]
 fn test_hashjoin_parent_filter_pushdown_same_column_names() {
     use datafusion_common::JoinType;
@@ -2176,64 +1517,6 @@ fn test_hashjoin_parent_filter_pushdown_semi_anti_join() {
     );
 }
 
-/// Integration test for dynamic filter pushdown with TopK.
-/// We use an integration test because there are complex interactions in the optimizer rules
-/// that the unit tests applying a single optimizer rule do not cover.
-#[tokio::test]
-async fn test_topk_dynamic_filter_pushdown_integration() {
-    let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
-    let mut cfg = SessionConfig::new();
-    cfg.options_mut().execution.parquet.pushdown_filters = true;
-    cfg.options_mut().execution.parquet.max_row_group_size = 128;
-    let ctx = SessionContext::new_with_config(cfg);
-    ctx.register_object_store(
-        ObjectStoreUrl::parse("memory://").unwrap().as_ref(),
-        Arc::clone(&store),
-    );
-    ctx.sql(
-        r"
-COPY  (
-  SELECT 1372708800 + value AS t
-  FROM generate_series(0, 99999)
-  ORDER BY t
- ) TO 'memory:///1.parquet'
-STORED AS PARQUET;
-  ",
-    )
-    .await
-    .unwrap()
-    .collect()
-    .await
-    .unwrap();
-
-    // Register the file with the context
-    ctx.register_parquet(
-        "topk_pushdown",
-        "memory:///1.parquet",
-        ParquetReadOptions::default(),
-    )
-    .await
-    .unwrap();
-
-    // Create a TopK query that will use dynamic filter pushdown
-    // Note that we use t * t as the order by expression to avoid
-    // the order pushdown optimizer from optimizing away the TopK.
-    let df = ctx
-        .sql(r"EXPLAIN ANALYZE SELECT t FROM topk_pushdown ORDER BY t * t LIMIT 10;")
-        .await
-        .unwrap();
-    let batches = df.collect().await.unwrap();
-    let explain = format!("{}", pretty_format_batches(&batches).unwrap());
-
-    assert!(explain.contains("output_rows=128")); // Read 1 row group
-    assert!(explain.contains("t@0 < 1884329474306198481")); // Dynamic filter was applied
-    assert!(
-        explain.contains("pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K"),
-        "{explain}"
-    );
-    // Pushdown pruned most rows
-}
-
 #[test]
 fn test_filter_pushdown_through_union() {
     let scan1 = TestScanBuilder::new(schema()).with_support(true).build();
@@ -2450,233 +1733,11 @@ fn schema() -> SchemaRef {
     Arc::clone(&TEST_SCHEMA)
 }
 
-struct ProjectionDynFilterTestCase {
-    schema: SchemaRef,
-    batches: Vec<RecordBatch>,
-    projection: Vec<(Arc<dyn PhysicalExpr>, String)>,
-    sort_expr: PhysicalSortExpr,
-    expected_plans: Vec<String>,
-}
-
-async fn run_projection_dyn_filter_case(case: ProjectionDynFilterTestCase) {
-    let ProjectionDynFilterTestCase {
-        schema,
-        batches,
-        projection,
-        sort_expr,
-        expected_plans,
-    } = case;
-
-    let scan = TestScanBuilder::new(Arc::clone(&schema))
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    let projection_exec = Arc::new(ProjectionExec::try_new(projection, scan).unwrap());
-
-    let sort = Arc::new(
-        SortExec::new(LexOrdering::new(vec![sort_expr]).unwrap(), projection_exec)
-            .with_fetch(Some(2)),
-    ) as Arc<dyn ExecutionPlan>;
-
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-
-    let optimized_plan = FilterPushdown::new_post_optimization()
-        .optimize(Arc::clone(&sort), &config)
-        .unwrap();
-
-    pretty_assertions::assert_eq!(
-        format_plan_for_test(&optimized_plan).trim(),
-        expected_plans[0].trim()
-    );
-
-    let config = SessionConfig::new().with_batch_size(2);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = optimized_plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    for (idx, expected_plan) in expected_plans.iter().enumerate().skip(1) {
-        stream.next().await.unwrap().unwrap();
-        let formatted_plan = format_plan_for_test(&optimized_plan);
-        pretty_assertions::assert_eq!(
-            formatted_plan.trim(),
-            expected_plan.trim(),
-            "Mismatch at iteration {}",
-            idx
-        );
-    }
-}
-
-#[tokio::test]
-async fn test_topk_with_projection_transformation_on_dyn_filter() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let simple_abc = vec![
-        record_batch!(
-            ("a", Int32, [1, 2, 3]),
-            ("b", Utf8, ["x", "y", "z"]),
-            ("c", Float64, [1.0, 2.0, 3.0])
-        )
-        .unwrap(),
-    ];
-
-    // Case 1: Reordering [b, a]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("b", &schema).unwrap(), "b".to_string()),
-            (col("a", &schema).unwrap(), "a".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 1)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-r#"  - SortExec: TopK(fetch=2), expr=[a@1 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[b@1 as b, a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-r#"  - SortExec: TopK(fetch=2), expr=[a@1 ASC], preserve_partitioning=[false], filter=[a@1 IS NULL OR a@1 < 2]
-  -   ProjectionExec: expr=[b@1 as b, a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string()]
-    })
-    .await;
-
-    // Case 2: Pruning [a]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![(col("a", &schema).unwrap(), "a".to_string())],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 2]
-  -   ProjectionExec: expr=[a@0 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 3: Identity [a, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("a", &schema).unwrap(), "a".to_string()),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 2]
-  -   ProjectionExec: expr=[a@0 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 4: Expressions [a + 1, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (
-                Arc::new(BinaryExpr::new(
-                    col("a", &schema).unwrap(),
-                    Operator::Plus,
-                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-                )),
-                "a_plus_1".to_string(),
-            ),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a_plus_1", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 + 1 as a_plus_1, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC], preserve_partitioning=[false], filter=[a_plus_1@0 IS NULL OR a_plus_1@0 < 3]
-  -   ProjectionExec: expr=[a@0 + 1 as a_plus_1, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 + 1 IS NULL OR a@0 + 1 < 3 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 5: [a as b, b as a] (swapped columns)
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (col("a", &schema).unwrap(), "b".to_string()),
-            (col("b", &schema).unwrap(), "a".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("b", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[b@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 as b, b@1 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[b@0 ASC], preserve_partitioning=[false], filter=[b@0 IS NULL OR b@0 < 2]
-  -   ProjectionExec: expr=[a@0 as b, b@1 as a]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 IS NULL OR a@0 < 2 ]"#.to_string(),
-        ],
-    })
-    .await;
-
-    // Case 6: Confusing expr [a + 1 as a, b]
-    run_projection_dyn_filter_case(ProjectionDynFilterTestCase {
-        schema: Arc::clone(&schema),
-        batches: simple_abc.clone(),
-        projection: vec![
-            (
-                Arc::new(BinaryExpr::new(
-                    col("a", &schema).unwrap(),
-                    Operator::Plus,
-                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-                )),
-                "a".to_string(),
-            ),
-            (col("b", &schema).unwrap(), "b".to_string()),
-        ],
-        sort_expr: PhysicalSortExpr::new(
-            Arc::new(Column::new("a", 0)),
-            SortOptions::default(),
-        ),
-        expected_plans: vec![
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false]
-  -   ProjectionExec: expr=[a@0 + 1 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]"#.to_string(),
-            r#"  - SortExec: TopK(fetch=2), expr=[a@0 ASC], preserve_partitioning=[false], filter=[a@0 IS NULL OR a@0 < 3]
-  -   ProjectionExec: expr=[a@0 + 1 as a, b@1 as b]
-  -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 + 1 IS NULL OR a@0 + 1 < 3 ]"#.to_string(),
-        ],
-    })
-    .await;
-}
+// test_topk_with_projection_transformation_on_dyn_filter has been ported
+// to datafusion/sqllogictest/test_files/push_down_filter_parquet.slt; see
+// `topk_proj` fixture for the 4 representative cases (reorder, prune,
+// expression, alias shadowing). The `run_projection_dyn_filter_case`
+// harness was removed along with it.
 
 /// Returns a predicate that is a binary expression col = lit
 fn col_lit_predicate(
@@ -2693,382 +1754,12 @@ fn col_lit_predicate(
 }
 
 // ==== Aggregate Dynamic Filter tests ====
-
-// ---- Test Utilities ----
-struct AggregateDynFilterCase<'a> {
-    schema: SchemaRef,
-    batches: Vec<RecordBatch>,
-    aggr_exprs: Vec<AggregateFunctionExpr>,
-    expected_before: Option<&'a str>,
-    expected_after: Option<&'a str>,
-    scan_support: bool,
-}
-
-async fn run_aggregate_dyn_filter_case(case: AggregateDynFilterCase<'_>) {
-    let AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs,
-        expected_before,
-        expected_after,
-        scan_support,
-    } = case;
-
-    let scan = TestScanBuilder::new(Arc::clone(&schema))
-        .with_support(scan_support)
-        .with_batches(batches)
-        .build();
-
-    let aggr_exprs: Vec<_> = aggr_exprs
-        .into_iter()
-        .map(|expr| Arc::new(expr) as Arc<AggregateFunctionExpr>)
-        .collect();
-    let aggr_len = aggr_exprs.len();
-
-    let plan: Arc<dyn ExecutionPlan> = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::new_single(vec![]),
-            aggr_exprs,
-            vec![None; aggr_len],
-            scan,
-            Arc::clone(&schema),
-        )
-        .unwrap(),
-    );
-
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-
-    let optimized = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    let before = format_plan_for_test(&optimized);
-    if let Some(expected) = expected_before {
-        assert!(
-            before.contains(expected),
-            "expected `{expected}` before execution, got: {before}"
-        );
-    } else {
-        assert!(
-            !before.contains("DynamicFilter ["),
-            "dynamic filter unexpectedly present before execution: {before}"
-        );
-    }
-
-    let session_ctx = SessionContext::new();
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let task_ctx = session_ctx.state().task_ctx();
-    let mut stream = optimized.execute(0, Arc::clone(&task_ctx)).unwrap();
-    let _ = stream.next().await.transpose().unwrap();
-
-    let after = format_plan_for_test(&optimized);
-    if let Some(expected) = expected_after {
-        assert!(
-            after.contains(expected),
-            "expected `{expected}` after execution, got: {after}"
-        );
-    } else {
-        assert!(
-            !after.contains("DynamicFilter ["),
-            "dynamic filter unexpectedly present after execution: {after}"
-        );
-    }
-}
-
-// ---- Test Cases ----
-// Cases covered below:
-// 1. `min(a)` and `max(a)` baseline.
-// 2. Unsupported expression input (`min(a+1)`).
-// 3. Multiple supported columns (same column vs different columns).
-// 4. Mixed supported + unsupported aggregates.
-// 5. Entirely NULL input to surface current bound behavior.
-// 6. End-to-end tests on parquet files
-
-/// `MIN(a)`: able to pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_simple() {
-    // Single min(a) showcases the base case.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MAX(a)`: able to pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_max_simple() {
-    // Single max(a) mirrors the base case on the upper bound.
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 > 8 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a+1)`: Can't pushdown dynamic filter
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_expression_not_supported() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let expr: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
-        col("a", &schema).unwrap(),
-        Operator::Plus,
-        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-    ));
-    let min_expr = AggregateExprBuilder::new(min_udaf(), vec![expr])
-        .schema(Arc::clone(&schema))
-        .alias("min_a_plus_one")
-        .build()
-        .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: None,
-        expected_after: None,
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a), MAX(a)`: Pushdown dynamic filter like `(a<1) or (a>8)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_max_same_column() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [5, 1, 3, 8])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr, max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// `MIN(a), MAX(b)`: Pushdown dynamic filter like `(a<1) or (b>9)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_max_different_columns() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, true),
-        Field::new("b", DataType::Int32, true),
-    ]));
-    let batches = vec![
-        record_batch!(("a", Int32, [5, 1, 3, 8]), ("b", Int32, [7, 2, 4, 9])).unwrap(),
-    ];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-    let max_expr =
-        AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("max_b")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr, max_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR b@1 > 9 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Mix of supported/unsupported aggregates retains only the valid ones.
-/// `MIN(a), MAX(a), MAX(b), MIN(c+1)`: Pushdown dynamic filter like `(a<1) or (a>8) OR (b>12)`
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_multiple_mixed_expressions() {
-    let schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Int32, true),
-        Field::new("b", DataType::Int32, true),
-        Field::new("c", DataType::Int32, true),
-    ]));
-    let batches = vec![
-        record_batch!(
-            ("a", Int32, [5, 1, 3, 8]),
-            ("b", Int32, [10, 4, 6, 12]),
-            ("c", Int32, [100, 70, 90, 110])
-        )
-        .unwrap(),
-    ];
-
-    let min_a = AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("min_a")
-        .build()
-        .unwrap();
-    let max_a = AggregateExprBuilder::new(max_udaf(), vec![col("a", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("max_a")
-        .build()
-        .unwrap();
-    let max_b = AggregateExprBuilder::new(max_udaf(), vec![col("b", &schema).unwrap()])
-        .schema(Arc::clone(&schema))
-        .alias("max_b")
-        .build()
-        .unwrap();
-    let expr_c: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
-        col("c", &schema).unwrap(),
-        Operator::Plus,
-        Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
-    ));
-    let min_c_expr = AggregateExprBuilder::new(min_udaf(), vec![expr_c])
-        .schema(Arc::clone(&schema))
-        .alias("min_c_plus_one")
-        .build()
-        .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_a, max_a, max_b, min_c_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        expected_after: Some("DynamicFilter [ a@0 < 1 OR a@0 > 8 OR b@1 > 12 ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Don't tighten the dynamic filter if all inputs are null
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_min_all_nulls() {
-    let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
-    let batches = vec![record_batch!(("a", Int32, [None, None, None, None])).unwrap()];
-
-    let min_expr =
-        AggregateExprBuilder::new(min_udaf(), vec![col("a", &schema).unwrap()])
-            .schema(Arc::clone(&schema))
-            .alias("min_a")
-            .build()
-            .unwrap();
-
-    run_aggregate_dyn_filter_case(AggregateDynFilterCase {
-        schema,
-        batches,
-        aggr_exprs: vec![min_expr],
-        expected_before: Some("DynamicFilter [ empty ]"),
-        // After reading the input it hasn't a meaningful bound to update, so the
-        // predicate `true` means don't filter out anything
-        expected_after: Some("DynamicFilter [ true ]"),
-        scan_support: true,
-    })
-    .await;
-}
-
-/// Test aggregate dynamic filter is working when reading parquet files
-///
-/// Runs 'select max(id) from test_table where id > 1', and ensure some file ranges
-/// pruned by the dynamic filter.
-#[tokio::test]
-async fn test_aggregate_dynamic_filter_parquet_e2e() {
-    let config = SessionConfig::new()
-        .with_collect_statistics(true)
-        .with_target_partitions(2)
-        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true)
-        .set_bool("datafusion.execution.parquet.pushdown_filters", true);
-    let ctx = SessionContext::new_with_config(config);
-
-    let data_path = format!(
-        "{}/tests/data/test_statistics_per_partition/",
-        env!("CARGO_MANIFEST_DIR")
-    );
-
-    ctx.register_parquet("test_table", &data_path, ParquetReadOptions::default())
-        .await
-        .unwrap();
-
-    // partition 1:
-    //   files: ..03-01(id=4), ..03-02(id=3)
-    // partition 1:
-    //   files: ..03-03(id=2), ..03-04(id=1)
-    //
-    // In partition 1, after reading the first file, the dynamic filter will be update
-    // to "id > 4", so the `..03-02` file must be able to get pruned out
-    let df = ctx
-        .sql("explain analyze select max(id) from test_table where id > 1")
-        .await
-        .unwrap();
-
-    let result = df.collect().await.unwrap();
-
-    let formatted = pretty_format_batches(&result).unwrap();
-    let explain_analyze = format!("{formatted}");
-
-    // Capture "2" from "files_ranges_pruned_statistics=4 total → 2 matched"
-    let re = Regex::new(
-        r"files_ranges_pruned_statistics\s*=\s*(\d+)\s*total\s*[→>\-]\s*(\d+)\s*matched",
-    )
-    .unwrap();
-
-    if let Some(caps) = re.captures(&explain_analyze) {
-        let matched_num: i32 = caps[2].parse().unwrap();
-        assert!(
-            matched_num < 4,
-            "Total 4 files, if some pruned, the matched count is < 4"
-        );
-    } else {
-        unreachable!("metrics should exist")
-    }
-}
+//
+// The end-to-end min/max dynamic filter cases (simple/min/max/mixed/all-nulls)
+// have been ported to
+// `datafusion/sqllogictest/test_files/push_down_filter_regression.slt`.
+// The `run_aggregate_dyn_filter_case` harness used to drive them was removed
+// along with the test functions.
 
 /// Non-partial (Single) aggregates should skip dynamic filter initialization.
 #[test]
@@ -3115,139 +1806,6 @@ fn test_aggregate_dynamic_filter_not_created_for_single_mode() {
     );
 }
 
-#[tokio::test]
-async fn test_aggregate_filter_pushdown() {
-    // Test that filters can pass through AggregateExec even with aggregate functions
-    // when the filter references grouping columns
-    // Simulates: SELECT a, COUNT(b) FROM table WHERE a = 'x' GROUP BY a
-
-    let batches = vec![
-        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-    ];
-
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    // Create an aggregate: GROUP BY a with COUNT(b)
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &schema()).unwrap(),
-        "a".to_string(),
-    )]);
-
-    // Add COUNT aggregate
-    let count_expr =
-        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
-            .schema(schema())
-            .alias("count")
-            .build()
-            .unwrap();
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by,
-            vec![count_expr.into()], // Has aggregate function
-            vec![None],              // No filter on the aggregate function
-            Arc::clone(&scan),
-            schema(),
-        )
-        .unwrap(),
-    );
-
-    // Add a filter on the grouping column 'a'
-    let predicate = col_lit_predicate("a", "x", &schema());
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // Even with aggregate functions, filter on grouping column should be pushed through
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = x
-        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count], ordering_mode=Sorted
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = x
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_no_pushdown_filter_on_aggregate_result() {
-    // Test that filters on aggregate results (not grouping columns) are NOT pushed through
-    // SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5
-    // The filter on 'cnt' cannot be pushed down because it's an aggregate result
-
-    let batches = vec![
-        record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
-    ];
-
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    // Create an aggregate: GROUP BY a with COUNT(b)
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &schema()).unwrap(),
-        "a".to_string(),
-    )]);
-
-    // Add COUNT aggregate
-    let count_expr =
-        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
-            .schema(schema())
-            .alias("count")
-            .build()
-            .unwrap();
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by,
-            vec![count_expr.into()],
-            vec![None],
-            Arc::clone(&scan),
-            schema(),
-        )
-        .unwrap(),
-    );
-
-    // Add a filter on the aggregate output column
-    // This simulates filtering on COUNT result, which should NOT be pushed through
-    let agg_schema = aggregate.schema();
-    let predicate = Arc::new(BinaryExpr::new(
-        Arc::new(Column::new_with_schema("count[count]", &agg_schema).unwrap()),
-        Operator::Gt,
-        Arc::new(Literal::new(ScalarValue::Int64(Some(5)))),
-    ));
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
-        as Arc<dyn ExecutionPlan>;
-
-    // The filter should NOT be pushed through the aggregate since it's on an aggregate result
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: count[count]@1 > 5
-        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: count[count]@1 > 5
-          -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    "
-    );
-}
-
 #[test]
 fn test_pushdown_filter_on_non_first_grouping_column() {
     // Test that filters on non-first grouping columns are still pushed down
@@ -3628,209 +2186,61 @@ fn test_pushdown_through_aggregate_grouping_sets_with_reordered_input() {
             aggregate_expr,
             vec![None],
             projection,
-            reordered_schema,
-        )
-        .unwrap(),
-    );
-
-    let agg_output_schema = aggregate.schema();
-
-    // Filter on b (present in all grouping sets) should be pushed down
-    let predicate = col_lit_predicate("b", "bar", &agg_output_schema);
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate.clone()).unwrap());
-
-    insta::assert_snapshot!(
-        OptimizationTest::new(plan, FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: b@1 = bar
-        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt], ordering_mode=PartiallySorted([1])
-          -   ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
-    "
-    );
-
-    // Filter on a (missing from second grouping set) should not be pushed down
-    let predicate = col_lit_predicate("a", "foo", &agg_output_schema);
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
-
-    insta::assert_snapshot!(
-        OptimizationTest::new(plan, FilterPushdown::new(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - FilterExec: a@0 = foo
-        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - FilterExec: a@0 = foo
-          -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
-          -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
-          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    "
-    );
-}
-
-/// Regression test for https://github.com/apache/datafusion/issues/21065.
-///
-/// Given a plan similar to the following, ensure that the filter is pushed down
-/// through an AggregateExec whose input columns are reordered by a ProjectionExec.
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_through_aggregate_with_reordered_input() {
-    // Build side
-    let build_batches = vec![record_batch!(("a", Utf8, ["h1", "h2"])).unwrap()];
-    let build_schema =
-        Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Probe side
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["h1", "h2", "h3", "h4"]),
-            ("value", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("value", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // ProjectionExec reorders (a, value) → (value, a)
-    let reordered_schema = Arc::new(Schema::new(vec![
-        Field::new("value", DataType::Float64, false),
-        Field::new("a", DataType::Utf8, false),
-    ]));
-    let projection = Arc::new(
-        ProjectionExec::try_new(
-            vec![
-                (col("value", &probe_schema).unwrap(), "value".to_string()),
-                (col("a", &probe_schema).unwrap(), "a".to_string()),
-            ],
-            probe_scan,
-        )
-        .unwrap(),
-    );
-
-    // AggregateExec: GROUP BY a@1, min(value@0)
-    let aggregate_expr = vec![
-        AggregateExprBuilder::new(
-            min_udaf(),
-            vec![col("value", &reordered_schema).unwrap()],
-        )
-        .schema(reordered_schema.clone())
-        .alias("min_value")
-        .build()
-        .map(Arc::new)
-        .unwrap(),
-    ];
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &reordered_schema).unwrap(), // a@1 in input
-        "a".to_string(),
-    )]);
-
-    let aggregate = Arc::new(
-        AggregateExec::try_new(
-            AggregateMode::Single,
-            group_by,
-            aggregate_expr,
-            vec![None],
-            projection,
-            reordered_schema,
-        )
-        .unwrap(),
-    );
-
-    // Aggregate output schema: (a@0, min_value@1)
-    let agg_output_schema = aggregate.schema();
-
-    // Join the build and probe side
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            aggregate,
-            vec![(
-                col("a", &build_schema).unwrap(),
-                col("a", &agg_output_schema).unwrap(),
-            )],
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
+            reordered_schema,
         )
         .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
+    );
+
+    let agg_output_schema = aggregate.schema();
+
+    // Filter on b (present in all grouping sets) should be pushed down
+    let predicate = col_lit_predicate("b", "bar", &agg_output_schema);
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate.clone()).unwrap());
 
-    // The HashJoin's dynamic filter on `a` should push
-    // through the aggregate and reach the probe-side DataSource.
     insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
     OptimizationTest:
       input:
-        - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-        -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-        -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true
+        - FilterExec: b@1 = bar
+        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
       output:
         Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-          -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-          -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
+          - AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt], ordering_mode=PartiallySorted([1])
+          -   ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
     "
     );
 
-    // Actually execute the plan to verify the dynamic filter is populated
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    let session_config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(session_config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let mut stream = plan.execute(0, Arc::clone(&task_ctx)).unwrap();
-    stream.next().await.unwrap().unwrap();
+    // Filter on a (missing from second grouping set) should not be pushed down
+    let predicate = col_lit_predicate("a", "foo", &agg_output_schema);
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
 
-    // After execution, the dynamic filter should be populated with values
     insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a], file_type=test, pushdown_supported=true
-    -   AggregateExec: mode=Single, gby=[a@1 as a], aggr=[min_value]
-    -     ProjectionExec: expr=[value@1 as value, a@0 as a]
-    -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, value], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= h1 AND a@0 <= h2 AND a@0 IN (SET) ([h1, h2]) ]
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+        -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+        -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: a@0 = foo
+          -   AggregateExec: mode=Final, gby=[(a@1 as a, b@2 as b), (NULL as a, b@2 as b)], aggr=[cnt]
+          -     ProjectionExec: expr=[c@2 as c, a@0 as a, b@1 as b]
+          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
     "
     );
 }
 
+/// Regression test for https://github.com/apache/datafusion/issues/21065.
+///
+/// Given a plan similar to the following, ensure that the filter is pushed down
+/// through an AggregateExec whose input columns are reordered by a ProjectionExec.
 #[test]
 fn test_pushdown_with_computed_grouping_key() {
     // Test filter pushdown with computed grouping expression
@@ -3892,6 +2302,11 @@ fn test_pushdown_with_computed_grouping_key() {
     );
 }
 
+// Not portable to sqllogictest: in CollectLeft (the mode SQL picks for small
+// data), an empty build side short-circuits the HashJoin and the probe scan
+// is never executed, so its dynamic filter stays at `[ empty ]` rather than
+// collapsing to `[ false ]`. The Rust test uses PartitionMode::Partitioned
+// on a hand-wired plan, which does trigger the `false` path.
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_all_partitions_empty() {
     use datafusion_common::JoinType;
@@ -4024,145 +2439,9 @@ async fn test_hashjoin_dynamic_filter_all_partitions_empty() {
     );
 }
 
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_with_nulls() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Test scenario where build side has NULL values in join keys
-    // This validates NULL handling in bounds computation and filter generation
-
-    // Create build side with NULL values
-    let build_batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),  // nullable
-            Field::new("b", DataType::Int32, true), // nullable
-        ])),
-        vec![
-            Arc::new(StringArray::from(vec![Some("aa"), None, Some("ab")])),
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
-        ],
-    )
-    .unwrap();
-    let build_batches = vec![build_batch];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, true),
-        Field::new("b", DataType::Int32, true),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with nullable fields
-    let probe_batch = RecordBatch::try_new(
-        Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Utf8, true),
-            Field::new("b", DataType::Int32, true),
-            Field::new("c", DataType::Float64, false),
-        ])),
-        vec![
-            Arc::new(StringArray::from(vec![
-                Some("aa"),
-                Some("ab"),
-                Some("ac"),
-                None,
-            ])),
-            Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(4), Some(5)])),
-            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
-        ],
-    )
-    .unwrap();
-    let probe_batches = vec![probe_batch];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, true),
-        Field::new("b", DataType::Int32, true),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec in CollectLeft mode (simpler for this test)
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Apply the filter pushdown optimizer
-    let mut config = SessionConfig::new();
-    config.options_mut().execution.parquet.pushdown_filters = true;
-    let optimizer = FilterPushdown::new_post_optimization();
-    let plan = optimizer.optimize(plan, config.options()).unwrap();
-
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    "
-    );
-
-    // Put some data through the plan to check that the filter is updated to reflect the TopK state
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    // Execute all partitions (required for partitioned hash join coordination)
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // Test that filters are pushed down correctly to each side of the join
-    insta::assert_snapshot!(
-        format_plan_for_test(&plan),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ]
-    "
-    );
-
-    #[rustfmt::skip]
-    let expected = [
-        "+----+---+----+---+-----+",
-        "| a  | b | a  | b | c   |",
-        "+----+---+----+---+-----+",
-        "| aa | 1 | aa | 1 | 1.0 |",
-        "+----+---+----+---+-----+",
-    ];
-    assert_batches_eq!(&expected, &batches);
-}
-
-/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
-/// the HashTable strategy is used instead of InList strategy, even with small build sides.
-/// This test is identical to test_hashjoin_dynamic_filter_pushdown_partitioned except
-/// for the config setting that forces the HashTable strategy.
+// Not portable to sqllogictest: same reason as
+// test_hashjoin_dynamic_filter_pushdown_partitioned — hand-wires
+// PartitionMode::Partitioned, which SQL never picks for small parquet inputs.
 #[tokio::test]
 async fn test_hashjoin_hash_table_pushdown_partitioned() {
     use datafusion_common::JoinType;
@@ -4325,10 +2604,10 @@ async fn test_hashjoin_hash_table_pushdown_partitioned() {
     );
 }
 
-/// Test that when hash_join_inlist_pushdown_max_size is set to a very small value,
-/// the HashTable strategy is used instead of InList strategy in CollectLeft mode.
-/// This test is identical to test_hashjoin_dynamic_filter_pushdown_collect_left except
-/// for the config setting that forces the HashTable strategy.
+// Ported to push_down_filter_parquet.slt (`hl_build`/`hl_probe` fixture).
+// Rust version retained only because the slt port cannot hand-wire the
+// RepartitionExec-above-probe shape this test uses; the hash_lookup vs
+// IN (SET) invariant is captured in the slt port.
 #[tokio::test]
 async fn test_hashjoin_hash_table_pushdown_collect_left() {
     use datafusion_common::JoinType;
@@ -4397,145 +2676,10 @@ async fn test_hashjoin_hash_table_pushdown_collect_left() {
             col("b", &probe_side_schema).unwrap(),
         ),
     ];
-    let hash_join = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            probe_repartition,
-            on,
-            None,
-            &JoinType::Inner,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    );
-
-    // Top-level CoalescePartitionsExec
-    let cp = Arc::new(CoalescePartitionsExec::new(hash_join)) as Arc<dyn ExecutionPlan>;
-    // Add a sort for deterministic output
-    let plan = Arc::new(SortExec::new(
-        LexOrdering::new(vec![PhysicalSortExpr::new(
-            col("a", &probe_side_schema).unwrap(),
-            SortOptions::new(true, false), // descending, nulls_first
-        )])
-        .unwrap(),
-        cp,
-    )) as Arc<dyn ExecutionPlan>;
-
-    // Apply the optimization with config setting that forces HashTable strategy
-    let session_config = SessionConfig::default()
-        .with_batch_size(10)
-        .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
-        .set_bool("datafusion.execution.parquet.pushdown_filters", true)
-        .set_bool("datafusion.optimizer.enable_dynamic_filter_pushdown", true);
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, session_config.options())
-        .unwrap();
-    let session_ctx = SessionContext::new_with_config(session_config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // Verify that hash_lookup is used instead of IN (SET)
-    let plan_str = format_plan_for_test(&plan).to_string();
-    assert!(
-        plan_str.contains("hash_lookup"),
-        "Expected hash_lookup in plan but got: {plan_str}"
-    );
-    assert!(
-        !plan_str.contains("IN (SET)"),
-        "Expected no IN (SET) in plan but got: {plan_str}"
-    );
-
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-
-    let probe_scan_metrics = probe_scan.metrics().unwrap();
-
-    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
-    assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
-
-    // Results should be identical to the InList version
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+----+----+-----+
-    | a  | b  | c   | a  | b  | e   |
-    +----+----+-----+----+----+-----+
-    | ab | bb | 2.0 | ab | bb | 2.0 |
-    | aa | ba | 1.0 | aa | ba | 1.0 |
-    +----+----+-----+----+----+-----+
-    ",
-    );
-}
-
-/// Test HashTable strategy with integer multi-column join keys.
-/// Verifies that hash_lookup works correctly with integer data types.
-#[tokio::test]
-async fn test_hashjoin_hash_table_pushdown_integer_keys() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with integer keys
-    let build_batches = vec![
-        record_batch!(
-            ("id1", Int32, [1, 2]),
-            ("id2", Int32, [10, 20]),
-            ("value", Float64, [100.0, 200.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("id1", DataType::Int32, false),
-        Field::new("id2", DataType::Int32, false),
-        Field::new("value", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more integer rows
-    let probe_batches = vec![
-        record_batch!(
-            ("id1", Int32, [1, 2, 3, 4]),
-            ("id2", Int32, [10, 20, 30, 40]),
-            ("data", Utf8, ["a", "b", "c", "d"])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("id1", DataType::Int32, false),
-        Field::new("id2", DataType::Int32, false),
-        Field::new("data", DataType::Utf8, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create join on multiple integer columns
-    let on = vec![
-        (
-            col("id1", &build_side_schema).unwrap(),
-            col("id1", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("id2", &build_side_schema).unwrap(),
-            col("id2", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
+    let hash_join = Arc::new(
         HashJoinExec::try_new(
             build_scan,
-            Arc::clone(&probe_scan),
+            probe_repartition,
             on,
             None,
             &JoinType::Inner,
@@ -4547,7 +2691,19 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
         .unwrap(),
     );
 
-    // Apply optimization with forced HashTable strategy
+    // Top-level CoalescePartitionsExec
+    let cp = Arc::new(CoalescePartitionsExec::new(hash_join)) as Arc<dyn ExecutionPlan>;
+    // Add a sort for deterministic output
+    let plan = Arc::new(SortExec::new(
+        LexOrdering::new(vec![PhysicalSortExpr::new(
+            col("a", &probe_side_schema).unwrap(),
+            SortOptions::new(true, false), // descending, nulls_first
+        )])
+        .unwrap(),
+        cp,
+    )) as Arc<dyn ExecutionPlan>;
+
+    // Apply the optimization with config setting that forces HashTable strategy
     let session_config = SessionConfig::default()
         .with_batch_size(10)
         .set_usize("datafusion.optimizer.hash_join_inlist_pushdown_max_size", 1)
@@ -4567,7 +2723,7 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
         .await
         .unwrap();
 
-    // Verify hash_lookup is used
+    // Verify that hash_lookup is used instead of IN (SET)
     let plan_str = format_plan_for_test(&plan).to_string();
     assert!(
         plan_str.contains("hash_lookup"),
@@ -4581,22 +2737,30 @@ async fn test_hashjoin_hash_table_pushdown_integer_keys() {
     let result = format!("{}", pretty_format_batches(&batches).unwrap());
 
     let probe_scan_metrics = probe_scan.metrics().unwrap();
-    // Only 2 rows from probe side match the build side
+
+    // The probe side had 4 rows, but after applying the dynamic filter only 2 rows should remain.
     assert_eq!(probe_scan_metrics.output_rows().unwrap(), 2);
 
+    // Results should be identical to the InList version
     insta::assert_snapshot!(
         result,
         @r"
-    +-----+-----+-------+-----+-----+------+
-    | id1 | id2 | value | id1 | id2 | data |
-    +-----+-----+-------+-----+-----+------+
-    | 1   | 10  | 100.0 | 1   | 10  | a    |
-    | 2   | 20  | 200.0 | 2   | 20  | b    |
-    +-----+-----+-------+-----+-----+------+
+    +----+----+-----+----+----+-----+
+    | a  | b  | c   | a  | b  | e   |
+    +----+----+-----+----+----+-----+
+    | ab | bb | 2.0 | ab | bb | 2.0 |
+    | aa | ba | 1.0 | aa | ba | 1.0 |
+    +----+----+-----+----+----+-----+
     ",
     );
 }
 
+// Not portable to sqllogictest: asserts on `HashJoinExec::dynamic_filter_for_test().is_used()`
+// which is a debug-only API. The observable behavior (probe-side scan
+// receiving the dynamic filter when the data source supports it) is
+// already covered by the simpler CollectLeft port in push_down_filter_parquet.slt;
+// the with_support(false) branch has no SQL analog (parquet always supports
+// pushdown).
 #[tokio::test]
 async fn test_hashjoin_dynamic_filter_pushdown_is_used() {
     use datafusion_common::JoinType;
@@ -4685,7 +2849,13 @@ async fn test_hashjoin_dynamic_filter_pushdown_is_used() {
     }
 }
 
-/// Regression test for https://github.com/apache/datafusion/issues/20109
+/// Regression test for https://github.com/apache/datafusion/issues/20109.
+///
+/// Not portable to sqllogictest: the regression specifically targets the
+/// physical FilterPushdown rule running over *stacked* FilterExecs with
+/// projections on a MemorySourceConfig. In SQL the logical optimizer
+/// collapses the two filters before physical planning, so the stacked
+/// FilterExec shape this test exercises is unreachable.
 #[tokio::test]
 async fn test_filter_with_projection_pushdown() {
     use arrow::array::{Int64Array, RecordBatch, StringArray};
@@ -4772,7 +2942,11 @@ async fn test_filter_with_projection_pushdown() {
     assert_batches_eq!(expected, &result);
 }
 
-/// Test that ExecutionPlan::apply_expressions() can discover dynamic filters across the plan tree
+/// Test that ExecutionPlan::apply_expressions() can discover dynamic filters across the plan tree.
+///
+/// Not portable to sqllogictest: asserts by walking the plan tree with
+/// `apply_expressions` + `downcast_ref::<DynamicFilterPhysicalExpr>` and
+/// counting nodes. Neither API is observable from SQL.
 #[tokio::test]
 async fn test_discover_dynamic_filters_via_expressions_api() {
     use datafusion_common::JoinType;
@@ -4872,277 +3046,3 @@ async fn test_discover_dynamic_filters_via_expressions_api() {
         "After optimization, should discover exactly 2 dynamic filters (1 in HashJoinExec, 1 in DataSourceExec), found {count_after}"
     );
 }
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_left_join() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values (some won't match)
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with Left join and CollectLeft mode
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::Left,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Expect the dynamic filter predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization and execute the plan
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test that dynamic filter linking survives with_new_children
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // After execution, verify the dynamic filter was populated with bounds and IN-list
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-
-    // Verify result correctness: left join preserves all build (left) rows.
-    // All build rows match probe rows here, so we get 2 matched rows.
-    // The dynamic filter pruned unmatched probe rows (ac, ad) at scan time,
-    // which is safe because those probe rows would never match any build row.
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+----+----+-----+
-    | a  | b  | c   | a  | b  | e   |
-    +----+----+-----+----+----+-----+
-    | aa | ba | 1.0 | aa | ba | 1.0 |
-    | ab | bb | 2.0 | ab | bb | 2.0 |
-    +----+----+-----+----+----+-----+
-    "
-    );
-}
-
-#[tokio::test]
-async fn test_hashjoin_dynamic_filter_pushdown_left_semi_join() {
-    use datafusion_common::JoinType;
-    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
-
-    // Create build side with limited values
-    let build_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab"]),
-            ("b", Utf8, ["ba", "bb"]),
-            ("c", Float64, [1.0, 2.0])
-        )
-        .unwrap(),
-    ];
-    let build_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("c", DataType::Float64, false),
-    ]));
-    let build_scan = TestScanBuilder::new(Arc::clone(&build_side_schema))
-        .with_support(true)
-        .with_batches(build_batches)
-        .build();
-
-    // Create probe side with more values (some won't match)
-    let probe_batches = vec![
-        record_batch!(
-            ("a", Utf8, ["aa", "ab", "ac", "ad"]),
-            ("b", Utf8, ["ba", "bb", "bc", "bd"]),
-            ("e", Float64, [1.0, 2.0, 3.0, 4.0])
-        )
-        .unwrap(),
-    ];
-    let probe_side_schema = Arc::new(Schema::new(vec![
-        Field::new("a", DataType::Utf8, false),
-        Field::new("b", DataType::Utf8, false),
-        Field::new("e", DataType::Float64, false),
-    ]));
-    let probe_scan = TestScanBuilder::new(Arc::clone(&probe_side_schema))
-        .with_support(true)
-        .with_batches(probe_batches)
-        .build();
-
-    // Create HashJoinExec with LeftSemi join and CollectLeft mode
-    let on = vec![
-        (
-            col("a", &build_side_schema).unwrap(),
-            col("a", &probe_side_schema).unwrap(),
-        ),
-        (
-            col("b", &build_side_schema).unwrap(),
-            col("b", &probe_side_schema).unwrap(),
-        ),
-    ];
-    let plan = Arc::new(
-        HashJoinExec::try_new(
-            build_scan,
-            Arc::clone(&probe_scan),
-            on,
-            None,
-            &JoinType::LeftSemi,
-            None,
-            PartitionMode::CollectLeft,
-            datafusion_common::NullEquality::NullEqualsNothing,
-            false,
-        )
-        .unwrap(),
-    ) as Arc<dyn ExecutionPlan>;
-
-    // Expect the dynamic filter predicate to be pushed down into the probe side DataSource
-    insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new_post_optimization(), true),
-        @r"
-    OptimizationTest:
-      input:
-        - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-        -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true
-      output:
-        Ok:
-          - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ empty ]
-    ",
-    );
-
-    // Actually apply the optimization and execute the plan
-    let mut config = ConfigOptions::default();
-    config.execution.parquet.pushdown_filters = true;
-    config.optimizer.enable_dynamic_filter_pushdown = true;
-    let plan = FilterPushdown::new_post_optimization()
-        .optimize(plan, &config)
-        .unwrap();
-
-    // Test that dynamic filter linking survives with_new_children
-    let children = plan.children().into_iter().map(Arc::clone).collect();
-    let plan = plan.with_new_children(children).unwrap();
-
-    let config = SessionConfig::new().with_batch_size(10);
-    let session_ctx = SessionContext::new_with_config(config);
-    session_ctx.register_object_store(
-        ObjectStoreUrl::parse("test://").unwrap().as_ref(),
-        Arc::new(InMemory::new()),
-    );
-    let state = session_ctx.state();
-    let task_ctx = state.task_ctx();
-    let batches = collect(Arc::clone(&plan), Arc::clone(&task_ctx))
-        .await
-        .unwrap();
-
-    // After execution, verify the dynamic filter was populated with bounds and IN-list
-    insta::assert_snapshot!(
-        format!("{}", format_plan_for_test(&plan)),
-        @r"
-    - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)]
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
-    -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, e], file_type=test, pushdown_supported=true, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ]
-    "
-    );
-
-    // Verify result correctness: left semi join returns only build (left) rows
-    // that have at least one matching probe row. Output schema is build-side columns only.
-    let result = format!("{}", pretty_format_batches(&batches).unwrap());
-    insta::assert_snapshot!(
-        result,
-        @r"
-    +----+----+-----+
-    | a  | b  | c   |
-    +----+----+-----+
-    | aa | ba | 1.0 |
-    | ab | bb | 2.0 |
-    +----+----+-----+
-    "
-    );
-}
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt b/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
index ab6847e1c4834..8469c32a17033 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter_parquet.slt
@@ -166,6 +166,863 @@ drop table small_table;
 statement ok
 drop table large_table;
 
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown_integration
+#
+# Integration test for dynamic filter pushdown with TopK:
+# a TopK over an ordered parquet file emits a dynamic filter that should
+# get pushed into the parquet scan and prune every row group except the
+# first (128 rows), visible via `pushdown_rows_matched` / `pushdown_rows_pruned`.
+#
+# `t * t` (rather than `t`) is used as the sort expression so the
+# order-pushdown optimizer does not rewrite the TopK away.
+########
+
+statement ok
+set datafusion.execution.parquet.max_row_group_size = 128;
+
+query I
+COPY (
+  SELECT 1372708800 + value AS t
+  FROM generate_series(0, 99999)
+  ORDER BY t
+) TO 'test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet'
+STORED AS PARQUET;
+----
+100000
+
+statement ok
+CREATE EXTERNAL TABLE topk_pushdown
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE SELECT t FROM topk_pushdown ORDER BY t * t LIMIT 10;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=10), expr=[t@0 * t@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[t@0 * t@0 < 1884329474306198481], metrics=[output_rows=10, output_batches=1, row_replacements=10]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_pushdown.parquet]]}, projection=[t], output_ordering=[t@0 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ t@0 * t@0 < 1884329474306198481 ], metrics=[output_rows=128, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=782 total → 782 matched, row_groups_pruned_bloom_filter=782 total → 782 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=128, pushdown_rows_pruned=99.87 K, predicate_cache_inner_records=128, predicate_cache_records=128, scan_efficiency_ratio=64.87% (258.7 K/398.8 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.execution.parquet.max_row_group_size;
+
+statement ok
+drop table topk_pushdown;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown
+#
+# A `ORDER BY b DESC LIMIT 1` over a parquet file should emit a dynamic
+# filter on the TopK that gets pushed into the scan's predicate, visible
+# in EXPLAIN ANALYZE as `DynamicFilter [ b > <max> ]` tightened to the
+# running max seen by the TopK during execution.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'bd', 1.0),
+    ('ab', 'bc', 2.0),
+    ('ac', 'bb', 2.0),
+    ('ad', 'ba', 1.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_single_col.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_single_col (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_single_col.parquet';
+
+# Data-correctness check — top row by b desc is (aa, bd, 1).
+query TTR
+SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+aa bd 1
+
+# Before execution: a dynamic filter is emitted but still empty
+# (DynamicFilter [ empty ]) on the parquet scan.
+query TT
+EXPLAIN SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+physical_plan
+01)SortExec: TopK(fetch=1), expr=[b@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_single_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# After execution: the dynamic filter is tightened to `b > <max_seen>`.
+query TT
+EXPLAIN ANALYZE SELECT * FROM topk_single_col ORDER BY b DESC LIMIT 1;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=1), expr=[b@1 DESC], preserve_partitioning=[false], filter=[b@1 IS NULL OR b@1 > bd], metrics=[output_rows=1, output_batches=1, row_replacements=1]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_single_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ b@1 IS NULL OR b@1 > bd ], pruning_predicate=b_null_count@0 > 0 OR b_null_count@0 != row_count@2 AND b_max@1 > bd, required_guarantees=[], metrics=[output_rows=4, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=4, pushdown_rows_pruned=0, predicate_cache_inner_records=4, predicate_cache_records=4, scan_efficiency_ratio=22.37% (240/1.07 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_single_col;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_dynamic_filter_pushdown_multi_column_sort
+#
+# TopK with a compound sort key (b ASC NULLS LAST, a DESC). The dynamic
+# filter emitted to the scan should encode the tiebreaker as a compound
+# predicate of the form `b < <k1> OR (b = <k1> AND a > <k2>)`.
+# With fetch=2 the top-2 rows are (ad, ba) and (ac, bb), so the final
+# filter reads `b < bb OR (b = bb AND a > ac)`.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('ac', 'bb', 2.0),
+    ('ad', 'ba', 1.0),
+    ('aa', 'bc', 1.0),
+    ('ab', 'bd', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_multi_col (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet';
+
+# Data-correctness: confirm the top-2 rows come out in the expected order.
+query TTR
+SELECT * FROM topk_multi_col ORDER BY b ASC NULLS LAST, a DESC LIMIT 2;
+----
+ad ba 1
+ac bb 2
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE SELECT * FROM topk_multi_col ORDER BY b ASC NULLS LAST, a DESC LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[b@1 ASC NULLS LAST, a@0 DESC], preserve_partitioning=[false], filter=[b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac)], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_multi_col.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ b@1 < bb OR b@1 = bb AND (a@0 IS NULL OR a@0 > ac) ], pruning_predicate=b_null_count@1 != row_count@2 AND b_min@0 < bb OR b_null_count@1 != row_count@2 AND b_min@0 <= bb AND bb <= b_max@3 AND (a_null_count@4 > 0 OR a_null_count@4 != row_count@2 AND a_max@5 > ac), required_guarantees=[], metrics=[output_rows=4, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=4, pushdown_rows_pruned=0, predicate_cache_inner_records=8, predicate_cache_records=8, scan_efficiency_ratio=22.37% (240/1.07 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_multi_col;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown
+#
+# CollectLeft hash join on two equi-join keys: the dynamic filter emitted
+# by the build side should cover both keys and include an `IN (SET)`
+# predicate over `struct(a, b)` for the rows it collected.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/join_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/join_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE join_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE join_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_probe.parquet';
+
+# Data-correctness: only the 2 build rows should match.
+query TTRR
+SELECT p.a, p.b, build.c, p.e
+FROM join_probe p INNER JOIN join_build AS build
+  ON p.a = build.a AND p.b = build.b
+ORDER BY p.a;
+----
+aa ba 1 1
+ab bb 2 2
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT p.a, p.b, build.c, p.e
+FROM join_probe p INNER JOIN join_build AS build
+  ON p.a = build.a AND p.b = build.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], projection=[a@3, b@4, c@2, e@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table join_build;
+
+statement ok
+drop table join_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_nested_hashjoin_dynamic_filter_pushdown
+#
+# Nested hash joins: `t1 JOIN (t2 JOIN t3 ON t2.c = t3.d) ON t1.a = t2.b`
+# should push dynamic filters down to BOTH the t2 and t3 scans
+# (t2 gets a filter on `b`, t3 gets a filter on `d`).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1.0),
+    ('ab', 2.0)
+  ) AS v(a, x)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t1.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ca', 1.0),
+    ('ab', 'cb', 2.0),
+    ('ac', 'cc', 3.0),
+    ('ad', 'cd', 4.0),
+    ('ae', 'ce', 5.0)
+  ) AS v(b, c, y)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t2.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('ca', 1.0),
+    ('cb', 2.0),
+    ('cc', 3.0),
+    ('cd', 4.0),
+    ('ce', 5.0),
+    ('cf', 6.0),
+    ('cg', 7.0),
+    ('ch', 8.0)
+  ) AS v(d, z)
+) TO 'test_files/scratch/push_down_filter_parquet/nested_t3.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE nested_t1 (a VARCHAR, x DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t1.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nested_t2 (b VARCHAR, c VARCHAR, y DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t2.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nested_t3 (d VARCHAR, z DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nested_t3.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT *
+FROM nested_t1
+INNER JOIN nested_t2 ON nested_t1.a = nested_t2.b
+INNER JOIN nested_t3 ON nested_t2.c = nested_t3.d;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@3, d@0)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, b@0)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t1.parquet]]}, projection=[a, x], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=18.23% (144/790)]
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t2.parquet]]}, projection=[b, c, y], file_type=parquet, predicate=DynamicFilter [ b@0 >= aa AND b@0 <= ab AND b@0 IN (SET) ([aa, ab]) ], pruning_predicate=b_null_count@1 != row_count@2 AND b_max@0 >= aa AND b_null_count@1 != row_count@2 AND b_min@3 <= ab AND (b_null_count@1 != row_count@2 AND b_min@3 <= aa AND aa <= b_max@0 OR b_null_count@1 != row_count@2 AND b_min@3 <= ab AND ab <= b_max@0), required_guarantees=[b in (aa, ab)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=5 total → 5 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=3, predicate_cache_inner_records=5, predicate_cache_records=2, scan_efficiency_ratio=23.2% (252/1.09 K)]
+05)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nested_t3.parquet]]}, projection=[d, z], file_type=parquet, predicate=DynamicFilter [ d@0 >= ca AND d@0 <= cb AND hash_lookup ], pruning_predicate=d_null_count@1 != row_count@2 AND d_max@0 >= ca AND d_null_count@1 != row_count@2 AND d_min@3 <= cb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=8 total → 8 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=6, predicate_cache_inner_records=8, predicate_cache_records=2, scan_efficiency_ratio=22.12% (184/832)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table nested_t1;
+
+statement ok
+drop table nested_t2;
+
+statement ok
+drop table nested_t3;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_parent_filter_pushdown
+#
+# A FilterExec above a HashJoin should split: the single-side predicates
+# should be pushed down to each scan, while any cross-side predicate
+# stays above the join.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/parent_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(d, e, f)
+) TO 'test_files/scratch/push_down_filter_parquet/parent_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE parent_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/parent_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE parent_probe (d VARCHAR, e VARCHAR, f DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/parent_probe.parquet';
+
+query TT
+EXPLAIN
+SELECT *
+FROM parent_build
+INNER JOIN parent_probe ON parent_build.a = parent_probe.d
+WHERE parent_build.a = 'aa' AND parent_probe.e = 'ba';
+----
+physical_plan
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, d@0)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/parent_build.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=a@0 = aa, pruning_predicate=a_null_count@2 != row_count@3 AND a_min@0 <= aa AND aa <= a_max@1, required_guarantees=[a in (aa)]
+03)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/parent_probe.parquet]]}, projection=[d, e, f], file_type=parquet, predicate=e@1 = ba AND d@0 = aa AND DynamicFilter [ empty ], pruning_predicate=e_null_count@2 != row_count@3 AND e_min@0 <= ba AND ba <= e_max@1 AND d_null_count@6 != row_count@3 AND d_min@4 <= aa AND aa <= d_max@5, required_guarantees=[d in (aa), e in (ba)]
+
+statement ok
+drop table parent_build;
+
+statement ok
+drop table parent_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_dynamic_filter_pushdown_through_hash_join_with_topk
+#
+# Composition: a TopK sitting above an InnerJoin should push its dynamic
+# filter THROUGH the hash join and into the probe-side scan, where it
+# gets combined (AND) with the hash join's own dynamic filter. The
+# probe-side predicate should contain BOTH a `DynamicFilter [ d IN (...) ]`
+# (from the join) and a `DynamicFilter [ e < <topk_bound> ]` (from the TopK).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_join_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(d, e, f)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_join_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_join_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE topk_join_probe (d VARCHAR, e VARCHAR, f DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT topk_join_probe.e
+FROM topk_join_build
+INNER JOIN topk_join_probe ON topk_join_build.a = topk_join_probe.d
+ORDER BY topk_join_probe.e ASC
+LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[e@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[e@0 < bb], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, d@0)], projection=[e@2], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_join_build.parquet]]}, projection=[a], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=6.7% (70/1.04 K)]
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_join_probe.parquet]]}, projection=[d, e], file_type=parquet, predicate=DynamicFilter [ d@0 >= aa AND d@0 <= ab AND d@0 IN (SET) ([aa, ab]) ] AND DynamicFilter [ e@1 < bb ], pruning_predicate=d_null_count@1 != row_count@2 AND d_max@0 >= aa AND d_null_count@1 != row_count@2 AND d_min@3 <= ab AND (d_null_count@1 != row_count@2 AND d_min@3 <= aa AND aa <= d_max@0 OR d_null_count@1 != row_count@2 AND d_min@3 <= ab AND ab <= d_max@0) AND e_null_count@5 != row_count@2 AND e_min@4 < bb, required_guarantees=[d in (aa, ab)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=4 total → 4 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=15.37% (166/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_join_build;
+
+statement ok
+drop table topk_join_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_topk_with_projection_transformation_on_dyn_filter
+#
+# When a ProjectionExec sits between a TopK and the parquet scan, the
+# dynamic filter emitted by the TopK (on the projected column) must be
+# rewritten back to the original scan-side column before it reaches the
+# scan. Covers: simple `SELECT a`, column reorder (`SELECT b, a`),
+# expression projection (`SELECT a+1 AS a_plus_1`), and column alias
+# shadowing (`SELECT a+1 AS a`).
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 'x', 1.0),
+    (2, 'y', 2.0),
+    (3, 'z', 3.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/topk_proj.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE topk_proj (a INT, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/topk_proj.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# Case 1: reorder — `SELECT b, a` — filter on the TopK uses projected column
+# `a`, but the scan predicate must reference the original `a@0`.
+query TT
+EXPLAIN ANALYZE SELECT b, a FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@1 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@1 < 2], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[b, a], file_type=parquet, predicate=DynamicFilter [ a@0 < 2 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 2, required_guarantees=[], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+# Case 2: prune — `SELECT a` — filter stays as `a < 2` on the scan.
+query TT
+EXPLAIN ANALYZE SELECT a FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < 2], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 2 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 2, required_guarantees=[], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=7.09% (79/1.11 K)]
+
+# Case 3: expression — `SELECT a+1 AS a_plus_1` — the TopK filter is on
+# `a_plus_1`, the scan predicate must read `a@0 + 1`.
+query TT
+EXPLAIN ANALYZE SELECT a + 1 AS a_plus_1, b FROM topk_proj ORDER BY a_plus_1 LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a_plus_1@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a_plus_1@0 < 3], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[CAST(a@0 AS Int64) + 1 as a_plus_1, b], file_type=parquet, predicate=DynamicFilter [ CAST(a@0 AS Int64) + 1 < 3 ], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+# Case 4: alias shadowing — `SELECT a+1 AS a` — the projection renames
+# `a+1` to `a`, so the TopK's `a < 3` must still be rewritten to
+# `a@0 + 1 < 3` on the scan.
+query TT
+EXPLAIN ANALYZE SELECT a + 1 AS a, b FROM topk_proj ORDER BY a LIMIT 2;
+----
+Plan with Metrics
+01)SortExec: TopK(fetch=2), expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false], filter=[a@0 < 3], metrics=[output_rows=2, output_batches=1, row_replacements=2]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/topk_proj.parquet]]}, projection=[CAST(a@0 AS Int64) + 1 as a, b], file_type=parquet, predicate=DynamicFilter [ CAST(a@0 AS Int64) + 1 < 3 ], metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=3, pushdown_rows_pruned=0, predicate_cache_inner_records=3, predicate_cache_records=3, scan_efficiency_ratio=13.72% (153/1.11 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table topk_proj;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown_through_aggregate_with_reordered_input
+#
+# The HashJoin's dynamic filter on the join key should propagate down
+# through a GROUP BY aggregate and land on the underlying parquet scan.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES ('h1'), ('h2')) AS v(a)
+) TO 'test_files/scratch/push_down_filter_parquet/join_agg_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('h1', 1.0),
+    ('h2', 2.0),
+    ('h3', 3.0),
+    ('h4', 4.0)
+  ) AS v(a, value)
+) TO 'test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE join_agg_build (a VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_agg_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE join_agg_probe (a VARCHAR, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT b.a, agg.min_value
+FROM join_agg_build b
+INNER JOIN (
+  SELECT a, MIN(value) AS min_value FROM join_agg_probe GROUP BY a
+) agg ON b.a = agg.a;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)], projection=[a@0, min_value@2], metrics=[output_rows=2, output_batches=2, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_agg_build.parquet]]}, projection=[a], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=15.32% (70/457)]
+03)--ProjectionExec: expr=[a@0 as a, min(join_agg_probe.value)@1 as min_value], metrics=[output_rows=2, output_batches=2]
+04)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[min(join_agg_probe.value)], metrics=[output_rows=2, output_batches=2, spill_count=0, spilled_rows=0]
+05)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1, metrics=[output_rows=2, output_batches=2, spill_count=0, spilled_rows=0]
+06)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[min(join_agg_probe.value)], metrics=[output_rows=2, output_batches=1, spill_count=0, spilled_rows=0, skipped_aggregation_rows=0, reduction_factor=100% (2/2)]
+07)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/join_agg_probe.parquet]]}, projection=[a, value], file_type=parquet, predicate=DynamicFilter [ a@0 >= h1 AND a@0 <= h2 AND a@0 IN (SET) ([h1, h2]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= h1 AND a_null_count@1 != row_count@2 AND a_min@3 <= h2 AND (a_null_count@1 != row_count@2 AND a_min@3 <= h1 AND h1 <= a_max@0 OR a_null_count@1 != row_count@2 AND a_min@3 <= h2 AND h2 <= a_max@0), required_guarantees=[a in (h1, h2)], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=1 total → 1 matched, page_index_rows_pruned=4 total → 4 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=4, predicate_cache_records=2, scan_efficiency_ratio=19.81% (163/823)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table join_agg_build;
+
+statement ok
+drop table join_agg_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_with_nulls
+#
+# HashJoin build side contains NULL join-key values. The dynamic filter
+# emitted to the probe scan should still correctly handle NULLs.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1),
+    (CAST(NULL AS VARCHAR), 2),
+    ('ab', CAST(NULL AS INT))
+  ) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_parquet/nulls_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 1, 1.0),
+    ('ab', 3, 2.0),
+    ('ac', 4, 3.0),
+    (CAST(NULL AS VARCHAR), 5, 4.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/nulls_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE nulls_build (a VARCHAR, b INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nulls_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE nulls_probe (a VARCHAR, b INT, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/nulls_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE
+SELECT nulls_build.a, nulls_build.b, nulls_probe.a, nulls_probe.b, nulls_probe.c
+FROM nulls_build INNER JOIN nulls_probe
+ON nulls_build.a = nulls_probe.a AND nulls_build.b = nulls_probe.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=1, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=3, input_batches=1, input_rows=1, avg_fanout=100% (1/1), probe_hit_rate=100% (1/1)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nulls_build.parquet]]}, projection=[a, b], file_type=parquet, metrics=[output_rows=3, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=18.6% (144/774)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/nulls_probe.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= 1 AND b@1 <= 2 AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:1}, {c0:,c1:2}, {c0:ab,c1:}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= 1 AND b_null_count@5 != row_count@2 AND b_min@6 <= 2, required_guarantees=[], metrics=[output_rows=1, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=1, pushdown_rows_pruned=3, predicate_cache_inner_records=8, predicate_cache_records=2, scan_efficiency_ratio=21.1% (237/1.12 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table nulls_build;
+
+statement ok
+drop table nulls_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_dynamic_filter_pushdown_left_join
+#   test_hashjoin_dynamic_filter_pushdown_left_semi_join
+#
+# For LEFT and LEFT SEMI joins (without a WHERE that would convert them
+# to INNER), the HashJoin's dynamic filter should still push into the
+# probe scan on the join keys — unmatched probe rows that can never
+# contribute to any build match are safely pruned there.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/lj_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/lj_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE lj_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/lj_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE lj_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/lj_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# LEFT JOIN: build is preserved; probe rows get dynamic filter based on build keys.
+query TT
+EXPLAIN ANALYZE
+SELECT lj_build.a, lj_build.b, lj_build.c, lj_probe.a, lj_probe.b, lj_probe.e
+FROM lj_build LEFT JOIN lj_probe
+ON lj_build.a = lj_probe.a AND lj_build.b = lj_probe.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+# LEFT SEMI JOIN: only matching build rows are returned; probe scan still
+# receives the dynamic filter.
+query TT
+EXPLAIN ANALYZE
+SELECT lj_build.*
+FROM lj_build
+WHERE EXISTS (
+  SELECT 1 FROM lj_probe
+  WHERE lj_probe.a = lj_build.a AND lj_probe.b = lj_build.b
+);
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(a@0, a@0), (b@1, b@1)], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=2, input_rows=4, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/lj_probe.parquet]]}, projection=[a, b], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND struct(a@0, b@1) IN (SET) ([{c0:aa,c1:ba}, {c0:ab,c1:bb}]) ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=15.37% (166/1.08 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+drop table lj_build;
+
+statement ok
+drop table lj_probe;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_hashjoin_hash_table_pushdown_collect_left
+#   test_hashjoin_hash_table_pushdown_integer_keys
+#
+# With `hash_join_inlist_pushdown_max_size = 1`, the HashJoin dynamic filter
+# should use the HashTable strategy (`hash_lookup`) instead of the InList
+# strategy (`IN (SET) ([...])`), even when the build side is tiny.
+########
+
+statement ok
+set datafusion.optimizer.hash_join_inlist_pushdown_max_size = 1;
+
+# --- multi-column string keys ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0)
+  ) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_parquet/hl_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    ('aa', 'ba', 1.0),
+    ('ab', 'bb', 2.0),
+    ('ac', 'bc', 3.0),
+    ('ad', 'bd', 4.0)
+  ) AS v(a, b, e)
+) TO 'test_files/scratch/push_down_filter_parquet/hl_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE hl_build (a VARCHAR, b VARCHAR, c DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/hl_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE hl_probe (a VARCHAR, b VARCHAR, e DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/hl_probe.parquet';
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+# probe scan filter should contain `hash_lookup`, not `IN (SET)`.
+query TT
+EXPLAIN ANALYZE
+SELECT p.a, p.b, build.c, p.e
+FROM hl_probe p INNER JOIN hl_build AS build
+  ON p.a = build.a AND p.b = build.b;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0), (b@1, b@1)], projection=[a@3, b@4, c@2, e@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/hl_build.parquet]]}, projection=[a, b, c], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=20.48% (214/1.04 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/hl_probe.parquet]]}, projection=[a, b, e], file_type=parquet, predicate=DynamicFilter [ a@0 >= aa AND a@0 <= ab AND b@1 >= ba AND b@1 <= bb AND hash_lookup ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 >= aa AND a_null_count@1 != row_count@2 AND a_min@3 <= ab AND b_null_count@5 != row_count@2 AND b_max@4 >= ba AND b_null_count@5 != row_count@2 AND b_min@6 <= bb, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=22.78% (246/1.08 K)]
+
+statement ok
+drop table hl_build;
+
+statement ok
+drop table hl_probe;
+
+# --- multi-column integer keys ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 10, 100.0),
+    (2, 20, 200.0)
+  ) AS v(id1, id2, value)
+) TO 'test_files/scratch/push_down_filter_parquet/int_build.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES
+    (1, 10, 'a'),
+    (2, 20, 'b'),
+    (3, 30, 'c'),
+    (4, 40, 'd')
+  ) AS v(id1, id2, data)
+) TO 'test_files/scratch/push_down_filter_parquet/int_probe.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE int_build (id1 INT, id2 INT, value DOUBLE)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/int_build.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE int_probe (id1 INT, id2 INT, data VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_parquet/int_probe.parquet';
+
+query TT
+EXPLAIN ANALYZE
+SELECT b.id1, b.id2, b.value, p.data
+FROM int_build b INNER JOIN int_probe p
+  ON b.id1 = p.id1 AND b.id2 = p.id2;
+----
+Plan with Metrics
+01)HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id1@0, id1@0), (id2@1, id2@1)], projection=[id1@0, id2@1, value@2, data@5], metrics=[output_rows=2, output_batches=1, array_map_created_count=0, build_input_batches=1, build_input_rows=2, input_batches=1, input_rows=2, avg_fanout=100% (2/2), probe_hit_rate=100% (2/2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/int_build.parquet]]}, projection=[id1, id2, value], file_type=parquet, metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, predicate_cache_inner_records=0, predicate_cache_records=0, scan_efficiency_ratio=19.02% (222/1.17 K)]
+03)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_parquet/int_probe.parquet]]}, projection=[id1, id2, data], file_type=parquet, predicate=DynamicFilter [ id1@0 >= 1 AND id1@0 <= 2 AND id2@1 >= 10 AND id2@1 <= 20 AND hash_lookup ], pruning_predicate=id1_null_count@1 != row_count@2 AND id1_max@0 >= 1 AND id1_null_count@1 != row_count@2 AND id1_min@3 <= 2 AND id2_null_count@5 != row_count@2 AND id2_max@4 >= 10 AND id2_null_count@5 != row_count@2 AND id2_min@6 <= 20, required_guarantees=[], metrics=[output_rows=2, output_batches=1, files_ranges_pruned_statistics=1 total → 1 matched, row_groups_pruned_statistics=1 total → 1 matched, row_groups_pruned_bloom_filter=1 total → 1 matched, page_index_pages_pruned=0 total → 0 matched, page_index_rows_pruned=0 total → 0 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=1, files_processed=1, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=2, predicate_cache_inner_records=8, predicate_cache_records=4, scan_efficiency_ratio=21.43% (239/1.11 K)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.optimizer.hash_join_inlist_pushdown_max_size;
+
+statement ok
+drop table int_build;
+
+statement ok
+drop table int_probe;
+
+
 # Config reset
 statement ok
 RESET datafusion.explain.physical_plan_only;
diff --git a/datafusion/sqllogictest/test_files/push_down_filter_regression.slt b/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
index cfc564fa2b56c..51ad998c02307 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter_regression.slt
@@ -185,9 +185,318 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[(NULL as id), (id@0 as id)], aggr=[max(agg_dyn_test.id)]
 05)--------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-01/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-02/j5fUeSDQo22oPyPU.parquet], [WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-03/j5fUeSDQo22oPyPU.parquet, WORKSPACE_ROOT/datafusion/core/tests/data/test_statistics_per_partition/date=2025-03-04/j5fUeSDQo22oPyPU.parquet]]}, projection=[id], file_type=parquet, predicate=id@0 < 10, pruning_predicate=id_null_count@1 != row_count@2 AND id_min@0 < 10, required_guarantees=[]
 
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_aggregate_dynamic_filter_parquet_e2e
+#
+# End-to-end: `select max(id) from t where id > 1` should pick up a dynamic
+# filter from the partial aggregate that prunes at least one file range
+# (files_ranges_pruned_statistics must report < 4 matched out of 4 total).
+#
+# Build a self-contained fixture of 4 single-row parquet files, one per
+# `id` value. With target_partitions=2, files are split into 2 groups of 2.
+# Writing the files in decreasing-id order ensures the first file in each
+# group has the larger id, so after reading it the dynamic filter tightens
+# (e.g. to `id > 4` in group 1) and prunes the second file in the group.
+
+statement ok
+COPY (VALUES (4)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_0.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (3)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_1.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (2)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_2.parquet' STORED AS PARQUET;
+
+statement ok
+COPY (VALUES (1)) TO 'test_files/scratch/push_down_filter_regression/agg_dyn/file_3.parquet' STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_e2e (column1 INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn/';
+
+statement ok
+set datafusion.execution.collect_statistics = true;
+
+statement ok
+set datafusion.explain.analyze_categories = 'rows';
+
+query TT
+EXPLAIN ANALYZE select max(column1) from agg_dyn_e2e where column1 > 1;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_e2e.column1)], metrics=[output_rows=1, output_batches=1]
+02)--CoalescePartitionsExec, metrics=[output_rows=2, output_batches=2]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_e2e.column1)], metrics=[output_rows=2, output_batches=2]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_2.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn/file_3.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 > 1 AND DynamicFilter [ column1@0 > 4 ], pruning_predicate=column1_null_count@1 != row_count@2 AND column1_max@0 > 1 AND column1_null_count@1 != row_count@2 AND column1_max@0 > 4, required_guarantees=[], metrics=[output_rows=2, output_batches=2, files_ranges_pruned_statistics=4 total → 4 matched, row_groups_pruned_statistics=4 total → 2 matched -> 2 fully matched, row_groups_pruned_bloom_filter=2 total → 2 matched, page_index_pages_pruned=2 total → 2 matched, page_index_rows_pruned=2 total → 2 matched, limit_pruned_row_groups=0 total → 0 matched, batches_split=0, file_open_errors=0, file_scan_errors=0, files_opened=4, files_processed=4, num_predicate_creation_errors=0, predicate_evaluation_errors=0, pushdown_rows_matched=2, pushdown_rows_pruned=0, predicate_cache_inner_records=2, predicate_cache_records=4, scan_efficiency_ratio=25.15% (130/517)]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.execution.collect_statistics;
+
+statement ok
+drop table agg_dyn_e2e;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs:
+#   test_aggregate_dynamic_filter_min_simple
+#   test_aggregate_dynamic_filter_max_simple
+#   test_aggregate_dynamic_filter_min_expression_not_supported
+#   test_aggregate_dynamic_filter_min_max_same_column
+#   test_aggregate_dynamic_filter_min_max_different_columns
+#   test_aggregate_dynamic_filter_multiple_mixed_expressions
+#   test_aggregate_dynamic_filter_min_all_nulls
+#
+# Baseline behavior of aggregate dynamic filter emission:
+#   MIN(a)      -> filter `a < <min_seen>`
+#   MAX(a)      -> filter `a > <max_seen>`
+#   MIN + MAX   -> filter `a < <min> OR a > <max>` (or across columns)
+#   MIN(a+1)    -> no dynamic filter (expression input is unsupported)
+#   all-NULLs   -> filter stays `true` (no meaningful bound to propagate)
+#
+# Dynamic aggregate filters are only emitted in Partial+Final mode, not
+# Single mode, so each fixture is written to 2 files and runs with
+# target_partitions=2 to force a multi-partition aggregate plan.
+
+statement ok
+set datafusion.execution.target_partitions = 2;
+
+# --- single-column fixture ([5, 1, 3, 8]) split across 2 files ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5), (1)) AS v(a)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3), (8)) AS v(a)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_single (a INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_single/';
+
+# Use `analyze_level = summary` + `analyze_categories = 'none'` so metrics
+# render empty; we only care that the `predicate=DynamicFilter [ ... ]` text
+# matches. Pruning metrics here are subject to a parallel-execution race
+# (the order in which Partial aggregates publish filter updates vs. when the
+# scan reads each partition), so the filter *content* is deterministic but
+# the pruning counts are not.
+statement ok
+set datafusion.explain.analyze_level = summary;
+
+statement ok
+set datafusion.explain.analyze_categories = 'none';
+
+# MIN(a) -> DynamicFilter [ a < 1 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1, required_guarantees=[], metrics=[]
+
+# MAX(a) -> DynamicFilter [ a > 8 ]
+query TT
+EXPLAIN ANALYZE SELECT MAX(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[max(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 > 8 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_max@0 > 8, required_guarantees=[], metrics=[]
+
+# MIN(a), MAX(a) -> DynamicFilter [ a < 1 OR a > 8 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(a) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a), max(agg_dyn_single.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a), max(agg_dyn_single.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR a@0 > 8 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR a_null_count@1 != row_count@2 AND a_max@3 > 8, required_guarantees=[], metrics=[]
+
+# MIN(a+1) -> no dynamic filter (expression input is not a plain column)
+query TT
+EXPLAIN ANALYZE SELECT MIN(a + 1) FROM agg_dyn_single;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_single.a + Int64(1))], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_single.a + Int64(1))], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_single/file_1.parquet]]}, projection=[a], file_type=parquet, metrics=[]
+
+statement ok
+drop table agg_dyn_single;
+
+# --- two-column fixture: MIN(a) + MAX(b) across columns ---
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5, 7), (1, 2)) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3, 4), (8, 9)) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_two_col (a INT, b INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_two_col/';
+
+# MIN(a), MAX(b) -> DynamicFilter [ a < 1 OR b > 9 ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(b) FROM agg_dyn_two_col;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_two_col.a), max(agg_dyn_two_col.b)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_two_col.a), max(agg_dyn_two_col.b)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_two_col/file_1.parquet]]}, projection=[a, b], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR b@1 > 9 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR b_null_count@4 != row_count@2 AND b_max@3 > 9, required_guarantees=[], metrics=[]
+
+statement ok
+drop table agg_dyn_two_col;
+
+# --- mixed expressions: MIN(a), MAX(a), MAX(b), MIN(c+1) ---
+# Supported aggregates (MIN(a), MAX(a), MAX(b)) should drive a filter;
+# MIN(c+1) is unsupported and must not contribute.
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (5, 10, 100), (1, 4, 70)) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES (3, 6, 90), (8, 12, 110)) AS v(a, b, c)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_mixed (a INT, b INT, c INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_mixed/';
+
+# -> DynamicFilter [ a < 1 OR a > 8 OR b > 12 ] (MIN(c+1) dropped as unsupported)
+query TT
+EXPLAIN ANALYZE SELECT MIN(a), MAX(a), MAX(b), MIN(c + 1) FROM agg_dyn_mixed;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_mixed.a), max(agg_dyn_mixed.a), max(agg_dyn_mixed.b), min(agg_dyn_mixed.c + Int64(1))], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_mixed.a), max(agg_dyn_mixed.a), max(agg_dyn_mixed.b), min(agg_dyn_mixed.c + Int64(1))], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_mixed/file_1.parquet]]}, projection=[a, b, c], file_type=parquet, predicate=DynamicFilter [ a@0 < 1 OR a@0 > 8 OR b@1 > 12 ], pruning_predicate=a_null_count@1 != row_count@2 AND a_min@0 < 1 OR a_null_count@1 != row_count@2 AND a_max@3 > 8 OR b_null_count@5 != row_count@2 AND b_max@4 > 12, required_guarantees=[], metrics=[]
+
+statement ok
+drop table agg_dyn_mixed;
+
+# --- all-NULLs input: filter should stay `true` (no meaningful bound) ---
+
+statement ok
+COPY (
+  SELECT CAST(NULL AS INT) AS a FROM (VALUES (1), (2)) AS v(n)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_0.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+  SELECT CAST(NULL AS INT) AS a FROM (VALUES (3), (4)) AS v(n)
+) TO 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_1.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_dyn_nulls (a INT)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_dyn_nulls/';
+
+# MIN(a) over all-NULL input -> DynamicFilter [ true ]
+query TT
+EXPLAIN ANALYZE SELECT MIN(a) FROM agg_dyn_nulls;
+----
+Plan with Metrics
+01)AggregateExec: mode=Final, gby=[], aggr=[min(agg_dyn_nulls.a)], metrics=[]
+02)--CoalescePartitionsExec, metrics=[]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(agg_dyn_nulls.a)], metrics=[]
+04)------DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_dyn_nulls/file_1.parquet]]}, projection=[a], file_type=parquet, predicate=DynamicFilter [ true ], metrics=[]
+
+statement ok
+reset datafusion.explain.analyze_categories;
+
+statement ok
+reset datafusion.explain.analyze_level;
+
+statement ok
+drop table agg_dyn_nulls;
+
+statement ok
+set datafusion.execution.target_partitions = 4;
+
+
+########
+# Ported from datafusion/core/tests/physical_optimizer/filter_pushdown.rs::
+#   test_aggregate_filter_pushdown
+#   test_no_pushdown_filter_on_aggregate_result
+#
+# - A filter that only references grouping columns (`WHERE a = 'x' GROUP BY a`)
+#   should push through the aggregate and land on the scan's predicate.
+# - A filter on an aggregate *result* (`HAVING count(b) > 5`) must NOT push
+#   through — it has to stay above the aggregate.
+########
+
+statement ok
+COPY (
+  SELECT * FROM (VALUES ('x', 'foo'), ('y', 'bar')) AS v(a, b)
+) TO 'test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE agg_filter_pushdown (a VARCHAR, b VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet';
+
+# Filter on grouping column pushes down to the scan.
+query TT
+EXPLAIN SELECT a, count(b) FROM agg_filter_pushdown WHERE a = 'x' GROUP BY a;
+----
+physical_plan
+01)AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)], ordering_mode=Sorted
+02)--RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+03)----AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)], ordering_mode=Sorted
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet]]}, projection=[a, b], file_type=parquet, predicate=a@0 = x, pruning_predicate=a_null_count@2 != row_count@3 AND a_min@0 <= x AND x <= a_max@1, required_guarantees=[a in (x)]
+
+# Filter on aggregate result (HAVING) stays above the aggregate.
+query TT
+EXPLAIN SELECT a, count(b) AS cnt FROM agg_filter_pushdown GROUP BY a HAVING count(b) > 5;
+----
+physical_plan
+01)ProjectionExec: expr=[a@0 as a, count(agg_filter_pushdown.b)@1 as cnt]
+02)--FilterExec: count(agg_filter_pushdown.b)@1 > 5
+03)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)]
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(agg_filter_pushdown.b)]
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter_regression/agg_filter_pushdown.parquet]]}, projection=[a, b], file_type=parquet
+
+statement ok
+drop table agg_filter_pushdown;
+
 # Config reset
 
-# The SLT runner sets `target_partitions` to 4 instead of using the default, so 
+# The SLT runner sets `target_partitions` to 4 instead of using the default, so
 # reset it explicitly.
 statement ok
 set datafusion.execution.target_partitions = 4;