From d4228feca341cd707a3a26372cae71a94a93b4fd Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Sun, 16 Jun 2024 18:54:11 -0700
Subject: [PATCH 1/4] refactor: remove extra default in max rows (#10941)

---
 datafusion-cli/src/main.rs          | 2 +-
 docs/source/user-guide/cli/usage.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 187f856894b2..f2b29fe78690 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -133,7 +133,7 @@ struct Args {
 
     #[clap(
         long,
-        help = "The max number of rows to display for 'Table' format\n[default: 40] [possible values: numbers(0/10/...), inf(no limit)]",
+        help = "The max number of rows to display for 'Table' format\n[possible values: numbers(0/10/...), inf(no limit)]",
         default_value = "40"
     )]
     maxrows: MaxRows,
diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index 617b462875c7..6a620fc69252 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -52,7 +52,7 @@ OPTIONS:
 
         --maxrows <MAXROWS>
             The max number of rows to display for 'Table' format
-            [default: 40] [possible values: numbers(0/10/...), inf(no limit)]
+            [possible values: numbers(0/10/...), inf(no limit)] [default: 40]
 
         --mem-pool-type <MEM_POOL_TYPE>
             Specify the memory pool type 'greedy' or 'fair', default to 'greedy'

From 378b9eecd4a77386a59953209f75fc5c192d7af4 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Mon, 17 Jun 2024 17:43:20 +0800
Subject: [PATCH 2/4] chore: Improve performance of Parquet statistics
 conversion (#10932)

---
 .../physical_plan/parquet/statistics.rs       | 32 +++----------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index a2e0d8fa66be..327a516f1af1 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -303,24 +303,12 @@ macro_rules! get_statistics {
             ))),
             DataType::Int8 => Ok(Arc::new(Int8Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        if let Ok(v) = i8::try_from(*x) {
-                            Some(v)
-                        } else {
-                            None
-                        }
-                    })
+                    x.and_then(|x| i8::try_from(*x).ok())
                 }),
             ))),
             DataType::Int16 => Ok(Arc::new(Int16Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        if let Ok(v) = i16::try_from(*x) {
-                            Some(v)
-                        } else {
-                            None
-                        }
-                    })
+                    x.and_then(|x| i16::try_from(*x).ok())
                 }),
             ))),
             DataType::Int32 => Ok(Arc::new(Int32Array::from_iter(
@@ -331,24 +319,12 @@ macro_rules! get_statistics {
             ))),
             DataType::UInt8 => Ok(Arc::new(UInt8Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        if let Ok(v) = u8::try_from(*x) {
-                            Some(v)
-                        } else {
-                            None
-                        }
-                    })
+                    x.and_then(|x| u8::try_from(*x).ok())
                 }),
             ))),
             DataType::UInt16 => Ok(Arc::new(UInt16Array::from_iter(
                 [<$stat_type_prefix Int32StatsIterator>]::new($iterator).map(|x| {
-                    x.and_then(|x| {
-                        if let Ok(v) = u16::try_from(*x) {
-                            Some(v)
-                        } else {
-                            None
-                        }
-                    })
+                    x.and_then(|x| u16::try_from(*x).ok())
                 }),
             ))),
             DataType::UInt32 => Ok(Arc::new(UInt32Array::from_iter(

From c4fd7545ba7719d6d12473694fcdf6f34d25b8cb Mon Sep 17 00:00:00 2001
From: Leonardo Yvens <leoyvens@gmail.com>
Date: Mon, 17 Jun 2024 12:17:58 +0100
Subject: [PATCH 3/4] Add catalog::resolve_table_references (#10876)

* resolve information_schema references only when necessary

* add `catalog::resolve_table_references` as a public utility

* collect CTEs separately in resolve_table_references

* test CTE name shadowing

* handle CTE name shadowing in resolve_table_references

* handle unions, recursive and nested CTEs in resolve_table_references
---
 datafusion/core/src/catalog/mod.rs            | 239 +++++++++++++++++-
 .../core/src/execution/session_state.rs       |  96 +------
 datafusion/sqllogictest/test_files/cte.slt    |   7 +
 3 files changed, 256 insertions(+), 86 deletions(-)

diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index 209d9b2af297..53b133339924 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -27,6 +27,8 @@ use crate::catalog::schema::SchemaProvider;
 use dashmap::DashMap;
 use datafusion_common::{exec_err, not_impl_err, Result};
 use std::any::Any;
+use std::collections::BTreeSet;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 
 /// Represent a list of named [`CatalogProvider`]s.
@@ -157,11 +159,11 @@ impl CatalogProviderList for MemoryCatalogProviderList {
 /// access required to read table details (e.g. statistics).
 ///
 /// The pattern that DataFusion itself uses to plan SQL queries is to walk over
-/// the query to [find all schema / table references in an `async` function],
+/// the query to [find all table references],
 /// performing required remote catalog in parallel, and then plans the query
 /// using that snapshot.
 ///
-/// [find all schema / table references in an `async` function]: crate::execution::context::SessionState::resolve_table_references
+/// [find all table references]: resolve_table_references
 ///
 /// # Example Catalog Implementations
 ///
@@ -295,6 +297,182 @@ impl CatalogProvider for MemoryCatalogProvider {
     }
 }
 
+/// Collects all tables and views referenced in the SQL statement. CTEs are collected separately.
+/// This can be used to determine which tables need to be in the catalog for a query to be planned.
+///
+/// # Returns
+///
+/// A `(table_refs, ctes)` tuple, the first element contains table and view references and the second
+/// element contains any CTE aliases that were defined and possibly referenced.
+///
+/// ## Example
+///
+/// ```
+/// # use datafusion_sql::parser::DFParser;
+/// # use datafusion::catalog::resolve_table_references;
+/// let query = "SELECT a FROM foo where x IN (SELECT y FROM bar)";
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
+/// assert_eq!(table_refs.len(), 2);
+/// assert_eq!(table_refs[0].to_string(), "bar");
+/// assert_eq!(table_refs[1].to_string(), "foo");
+/// assert_eq!(ctes.len(), 0);
+/// ```
+///
+/// ## Example with CTEs  
+///  
+/// ```  
+/// # use datafusion_sql::parser::DFParser;  
+/// # use datafusion::catalog::resolve_table_references;  
+/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";  
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();  
+/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();  
+/// assert_eq!(table_refs.len(), 0);
+/// assert_eq!(ctes.len(), 1);  
+/// assert_eq!(ctes[0].to_string(), "my_cte");  
+/// ```
+pub fn resolve_table_references(
+    statement: &datafusion_sql::parser::Statement,
+    enable_ident_normalization: bool,
+) -> datafusion_common::Result<(Vec<TableReference>, Vec<TableReference>)> {
+    use crate::sql::planner::object_name_to_table_reference;
+    use datafusion_sql::parser::{
+        CopyToSource, CopyToStatement, Statement as DFStatement,
+    };
+    use information_schema::INFORMATION_SCHEMA;
+    use information_schema::INFORMATION_SCHEMA_TABLES;
+    use sqlparser::ast::*;
+
+    struct RelationVisitor {
+        relations: BTreeSet<ObjectName>,
+        all_ctes: BTreeSet<ObjectName>,
+        ctes_in_scope: Vec<ObjectName>,
+    }
+
+    impl RelationVisitor {
+        /// Record the reference to `relation`, if it's not a CTE reference.
+        fn insert_relation(&mut self, relation: &ObjectName) {
+            if !self.relations.contains(relation)
+                && !self.ctes_in_scope.contains(relation)
+            {
+                self.relations.insert(relation.clone());
+            }
+        }
+    }
+
+    impl Visitor for RelationVisitor {
+        type Break = ();
+
+        fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow<()> {
+            self.insert_relation(relation);
+            ControlFlow::Continue(())
+        }
+
+        fn pre_visit_query(&mut self, q: &Query) -> ControlFlow<Self::Break> {
+            if let Some(with) = &q.with {
+                for cte in &with.cte_tables {
+                    // The non-recursive CTE name is not in scope when evaluating the CTE itself, so this is valid:
+                    // `WITH t AS (SELECT * FROM t) SELECT * FROM t`
+                    // Where the first `t` refers to a predefined table. So we are careful here
+                    // to visit the CTE first, before putting it in scope.
+                    if !with.recursive {
+                        // This is a bit hackish as the CTE will be visited again as part of visiting `q`,
+                        // but thankfully `insert_relation` is idempotent.
+                        cte.visit(self);
+                    }
+                    self.ctes_in_scope
+                        .push(ObjectName(vec![cte.alias.name.clone()]));
+                }
+            }
+            ControlFlow::Continue(())
+        }
+
+        fn post_visit_query(&mut self, q: &Query) -> ControlFlow<Self::Break> {
+            if let Some(with) = &q.with {
+                for _ in &with.cte_tables {
+                    // Unwrap: We just pushed these in `pre_visit_query`
+                    self.all_ctes.insert(self.ctes_in_scope.pop().unwrap());
+                }
+            }
+            ControlFlow::Continue(())
+        }
+
+        fn pre_visit_statement(&mut self, statement: &Statement) -> ControlFlow<()> {
+            if let Statement::ShowCreate {
+                obj_type: ShowCreateObject::Table | ShowCreateObject::View,
+                obj_name,
+            } = statement
+            {
+                self.insert_relation(obj_name)
+            }
+
+            // SHOW statements will later be rewritten into a SELECT from the information_schema
+            let requires_information_schema = matches!(
+                statement,
+                Statement::ShowFunctions { .. }
+                    | Statement::ShowVariable { .. }
+                    | Statement::ShowStatus { .. }
+                    | Statement::ShowVariables { .. }
+                    | Statement::ShowCreate { .. }
+                    | Statement::ShowColumns { .. }
+                    | Statement::ShowTables { .. }
+                    | Statement::ShowCollation { .. }
+            );
+            if requires_information_schema {
+                for s in INFORMATION_SCHEMA_TABLES {
+                    self.relations.insert(ObjectName(vec![
+                        Ident::new(INFORMATION_SCHEMA),
+                        Ident::new(*s),
+                    ]));
+                }
+            }
+            ControlFlow::Continue(())
+        }
+    }
+
+    let mut visitor = RelationVisitor {
+        relations: BTreeSet::new(),
+        all_ctes: BTreeSet::new(),
+        ctes_in_scope: vec![],
+    };
+
+    fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor) {
+        match statement {
+            DFStatement::Statement(s) => {
+                let _ = s.as_ref().visit(visitor);
+            }
+            DFStatement::CreateExternalTable(table) => {
+                visitor
+                    .relations
+                    .insert(ObjectName(vec![Ident::from(table.name.as_str())]));
+            }
+            DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
+                CopyToSource::Relation(table_name) => {
+                    visitor.insert_relation(table_name);
+                }
+                CopyToSource::Query(query) => {
+                    query.visit(visitor);
+                }
+            },
+            DFStatement::Explain(explain) => visit_statement(&explain.statement, visitor),
+        }
+    }
+
+    visit_statement(statement, &mut visitor);
+
+    let table_refs = visitor
+        .relations
+        .into_iter()
+        .map(|x| object_name_to_table_reference(x, enable_ident_normalization))
+        .collect::<datafusion_common::Result<_>>()?;
+    let ctes = visitor
+        .all_ctes
+        .into_iter()
+        .map(|x| object_name_to_table_reference(x, enable_ident_normalization))
+        .collect::<datafusion_common::Result<_>>()?;
+    Ok((table_refs, ctes))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -363,4 +541,61 @@ mod tests {
         let cat = Arc::new(MemoryCatalogProvider::new()) as Arc<dyn CatalogProvider>;
         assert!(cat.deregister_schema("foo", false).unwrap().is_none());
     }
+
+    #[test]
+    fn resolve_table_references_shadowed_cte() {
+        use datafusion_sql::parser::DFParser;
+
+        // An interesting edge case where the `t` name is used both as an ordinary table reference
+        // and as a CTE reference.
+        let query = "WITH t AS (SELECT * FROM t) SELECT * FROM t";
+        let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+        let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
+        assert_eq!(table_refs.len(), 1);
+        assert_eq!(ctes.len(), 1);
+        assert_eq!(ctes[0].to_string(), "t");
+        assert_eq!(table_refs[0].to_string(), "t");
+
+        // UNION is a special case where the CTE is not in scope for the second branch.
+        let query = "(with t as (select 1) select * from t) union (select * from t)";
+        let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+        let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
+        assert_eq!(table_refs.len(), 1);
+        assert_eq!(ctes.len(), 1);
+        assert_eq!(ctes[0].to_string(), "t");
+        assert_eq!(table_refs[0].to_string(), "t");
+
+        // Nested CTEs are also handled.
+        // Here the first `u` is a CTE, but the second `u` is a table reference.
+        // While `t` is always a CTE.
+        let query = "(with t as (with u as (select 1) select * from u) select * from u cross join t)";
+        let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+        let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
+        assert_eq!(table_refs.len(), 1);
+        assert_eq!(ctes.len(), 2);
+        assert_eq!(ctes[0].to_string(), "t");
+        assert_eq!(ctes[1].to_string(), "u");
+        assert_eq!(table_refs[0].to_string(), "u");
+    }
+
+    #[test]
+    fn resolve_table_references_recursive_cte() {
+        use datafusion_sql::parser::DFParser;
+
+        let query = "
+            WITH RECURSIVE nodes AS ( 
+                SELECT 1 as id
+                UNION ALL 
+                SELECT id + 1 as id 
+                FROM nodes
+                WHERE id < 10
+            )
+            SELECT * FROM nodes
+        ";
+        let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+        let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
+        assert_eq!(table_refs.len(), 0);
+        assert_eq!(ctes.len(), 1);
+        assert_eq!(ctes[0].to_string(), "nodes");
+    }
 }
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index fed101bd239b..1df77a1f9e0b 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -66,15 +66,12 @@ use datafusion_optimizer::{
 use datafusion_physical_expr::create_physical_expr;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_plan::ExecutionPlan;
-use datafusion_sql::parser::{CopyToSource, CopyToStatement, DFParser, Statement};
-use datafusion_sql::planner::{
-    object_name_to_table_reference, ContextProvider, ParserOptions, SqlToRel,
-};
+use datafusion_sql::parser::{DFParser, Statement};
+use datafusion_sql::planner::{ContextProvider, ParserOptions, SqlToRel};
 use sqlparser::dialect::dialect_from_str;
 use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
-use std::ops::ControlFlow;
 use std::sync::Arc;
 use url::Url;
 use uuid::Uuid;
@@ -493,91 +490,22 @@ impl SessionState {
         Ok(statement)
     }
 
-    /// Resolve all table references in the SQL statement.
+    /// Resolve all table references in the SQL statement. Does not include CTE references.
+    ///
+    /// See [`catalog::resolve_table_references`] for more information.
+    ///
+    /// [`catalog::resolve_table_references`]: crate::catalog::resolve_table_references
     pub fn resolve_table_references(
         &self,
         statement: &datafusion_sql::parser::Statement,
     ) -> datafusion_common::Result<Vec<TableReference>> {
-        use crate::catalog::information_schema::INFORMATION_SCHEMA_TABLES;
-        use datafusion_sql::parser::Statement as DFStatement;
-        use sqlparser::ast::*;
-
-        // Getting `TableProviders` is async but planing is not -- thus pre-fetch
-        // table providers for all relations referenced in this query
-        let mut relations = hashbrown::HashSet::with_capacity(10);
-
-        struct RelationVisitor<'a>(&'a mut hashbrown::HashSet<ObjectName>);
-
-        impl<'a> RelationVisitor<'a> {
-            /// Record that `relation` was used in this statement
-            fn insert(&mut self, relation: &ObjectName) {
-                self.0.get_or_insert_with(relation, |_| relation.clone());
-            }
-        }
-
-        impl<'a> Visitor for RelationVisitor<'a> {
-            type Break = ();
-
-            fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow<()> {
-                self.insert(relation);
-                ControlFlow::Continue(())
-            }
-
-            fn pre_visit_statement(&mut self, statement: &Statement) -> ControlFlow<()> {
-                if let Statement::ShowCreate {
-                    obj_type: ShowCreateObject::Table | ShowCreateObject::View,
-                    obj_name,
-                } = statement
-                {
-                    self.insert(obj_name)
-                }
-                ControlFlow::Continue(())
-            }
-        }
-
-        let mut visitor = RelationVisitor(&mut relations);
-        fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor<'_>) {
-            match statement {
-                DFStatement::Statement(s) => {
-                    let _ = s.as_ref().visit(visitor);
-                }
-                DFStatement::CreateExternalTable(table) => {
-                    visitor
-                        .0
-                        .insert(ObjectName(vec![Ident::from(table.name.as_str())]));
-                }
-                DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
-                    CopyToSource::Relation(table_name) => {
-                        visitor.insert(table_name);
-                    }
-                    CopyToSource::Query(query) => {
-                        query.visit(visitor);
-                    }
-                },
-                DFStatement::Explain(explain) => {
-                    visit_statement(&explain.statement, visitor)
-                }
-            }
-        }
-
-        visit_statement(statement, &mut visitor);
-
-        // Always include information_schema if available
-        if self.config.information_schema() {
-            for s in INFORMATION_SCHEMA_TABLES {
-                relations.insert(ObjectName(vec![
-                    Ident::new(INFORMATION_SCHEMA),
-                    Ident::new(*s),
-                ]));
-            }
-        }
-
         let enable_ident_normalization =
             self.config.options().sql_parser.enable_ident_normalization;
-        relations
-            .into_iter()
-            .map(|x| object_name_to_table_reference(x, enable_ident_normalization))
-            .collect::<datafusion_common::Result<_>>()
+        let (table_refs, _) = crate::catalog::resolve_table_references(
+            statement,
+            enable_ident_normalization,
+        )?;
+        Ok(table_refs)
     }
 
     /// Convert an AST Statement into a LogicalPlan
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index 1ff108cf6c5f..d8eaa51fc88a 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -828,3 +828,10 @@ SELECT * FROM non_recursive_cte, recursive_cte;
 ----
 1 1
 1 3
+
+# Name shadowing:
+# The first `t` refers to the table, the second to the CTE.
+query I
+WITH t AS (SELECT * FROM t where t.a < 2) SELECT * FROM t
+----
+1
\ No newline at end of file

From a923c659cf932f6369f2d5257e5b99128b67091a Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Mon, 17 Jun 2024 19:22:55 +0800
Subject: [PATCH 4/4] feat: Add support for Int8 and Int16 data types in data
 page statistics (#10931)

---
 .../physical_plan/parquet/statistics.rs       | 30 +++++++++++++++++++
 .../core/tests/parquet/arrow_statistics.rs    | 24 ++-------------
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 327a516f1af1..a2f17ca9b7a7 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -548,6 +548,8 @@ macro_rules! make_data_page_stats_iterator {
     };
 }
 
+make_data_page_stats_iterator!(MinInt32DataPageStatsIterator, min, Index::INT32, i32);
+make_data_page_stats_iterator!(MaxInt32DataPageStatsIterator, max, Index::INT32, i32);
 make_data_page_stats_iterator!(MinInt64DataPageStatsIterator, min, Index::INT64, i64);
 make_data_page_stats_iterator!(MaxInt64DataPageStatsIterator, max, Index::INT64, i64);
 
@@ -555,6 +557,29 @@ macro_rules! get_data_page_statistics {
     ($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
         paste! {
             match $data_type {
+                Some(DataType::Int8) => Ok(Arc::new(
+                    Int8Array::from_iter(
+                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
+                            .map(|x| {
+                                x.into_iter().filter_map(|x| {
+                                    x.and_then(|x| i8::try_from(x).ok())
+                                })
+                            })
+                            .flatten()
+                    )
+                )),
+                Some(DataType::Int16) => Ok(Arc::new(
+                    Int16Array::from_iter(
+                        [<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator)
+                            .map(|x| {
+                                x.into_iter().filter_map(|x| {
+                                    x.and_then(|x| i16::try_from(x).ok())
+                                })
+                            })
+                            .flatten()
+                    )
+                )),
+                Some(DataType::Int32) => Ok(Arc::new(Int32Array::from_iter([<$stat_type_prefix Int32DataPageStatsIterator>]::new($iterator).flatten()))),
                 Some(DataType::Int64) => Ok(Arc::new(Int64Array::from_iter([<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten()))),
                 _ => unimplemented!()
             }
@@ -642,6 +667,11 @@ where
 {
     let iter = iterator.flat_map(|(len, index)| match index {
         Index::NONE => vec![None; len],
+        Index::INT32(native_index) => native_index
+            .indexes
+            .iter()
+            .map(|x| x.null_count.map(|x| x as u64))
+            .collect::<Vec<_>>(),
         Index::INT64(native_index) => native_index
             .indexes
             .iter()
diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs
index 6b8705441d12..87bd1372225f 100644
--- a/datafusion/core/tests/parquet/arrow_statistics.rs
+++ b/datafusion/core/tests/parquet/arrow_statistics.rs
@@ -550,16 +550,11 @@ async fn test_int_32() {
         // row counts are [5, 5, 5, 5]
         expected_row_counts: UInt64Array::from(vec![5, 5, 5, 5]),
         column_name: "i32",
-        check: Check::RowGroup,
+        check: Check::Both,
     }
     .run();
 }
 
-// BUG: ignore this test for now
-// https://github.com/apache/datafusion/issues/10585
-// Note that the file has 4 columns named "i8", "i16", "i32", "i64".
-//   - The tests on column i32 and i64 passed.
-//   - The tests on column i8 and i16 failed.
 #[tokio::test]
 async fn test_int_16() {
     // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
@@ -573,16 +568,6 @@ async fn test_int_16() {
     Test {
         reader: &reader,
         // mins are [-5, -4, 0, 5]
-        // BUG: not sure why this returns same data but in Int32Array type even though I debugged and the columns name is "i16" an its data is Int16
-        // My debugging tells me the bug is either at:
-        //   1. The new code to get "iter". See the code in this PR with
-        // // Get an iterator over the column statistics
-        // let iter = row_groups
-        // .iter()
-        // .map(|x| x.column(parquet_idx).statistics());
-        //    OR
-        //   2. in the function (and/or its marco) `pub(crate) fn min_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>` here
-        //      https://github.com/apache/datafusion/blob/ea023e2d4878240eece870cf4b346c7a0667aeed/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs#L179
         expected_min: Arc::new(Int16Array::from(vec![-5, -4, 0, 5])), // panic here because the actual data is Int32Array
         // maxes are [-1, 0, 4, 9]
         expected_max: Arc::new(Int16Array::from(vec![-1, 0, 4, 9])),
@@ -591,13 +576,11 @@ async fn test_int_16() {
         // row counts are [5, 5, 5, 5]
         expected_row_counts: UInt64Array::from(vec![5, 5, 5, 5]),
         column_name: "i16",
-        check: Check::RowGroup,
+        check: Check::Both,
     }
     .run();
 }
 
-// BUG (same as above): ignore this test for now
-// https://github.com/apache/datafusion/issues/10585
 #[tokio::test]
 async fn test_int_8() {
     // This creates a parquet files of 4 columns named "i8", "i16", "i32", "i64"
@@ -611,7 +594,6 @@ async fn test_int_8() {
     Test {
         reader: &reader,
         // mins are [-5, -4, 0, 5]
-        // BUG: not sure why this returns same data but in Int32Array even though I debugged and the columns name is "i8" an its data is Int8
         expected_min: Arc::new(Int8Array::from(vec![-5, -4, 0, 5])), // panic here because the actual data is Int32Array
         // maxes are [-1, 0, 4, 9]
         expected_max: Arc::new(Int8Array::from(vec![-1, 0, 4, 9])),
@@ -620,7 +602,7 @@ async fn test_int_8() {
         // row counts are [5, 5, 5, 5]
         expected_row_counts: UInt64Array::from(vec![5, 5, 5, 5]),
         column_name: "i8",
-        check: Check::RowGroup,
+        check: Check::Both,
     }
     .run();
 }