From fff55ac8311207cbc2100a54909376e39be1f5d7 Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Wed, 7 May 2025 11:23:48 +0800
Subject: [PATCH 1/5] update Arrow to 56.0.0

---
 Cargo.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index d563db9..5308c2d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,10 +32,10 @@ rust-version = "1.73"
 all-features = true
 
 [dependencies]
-arrow = { version = ">= 53.1.0, < 55.0.0", features = ["prettyprint", "chrono-tz", "ipc_compression"] }
+arrow = { version = "56.0", features = ["prettyprint", "chrono-tz", "ipc_compression"] }
 bytemuck = { version = "1.18.0", features = ["must_cast"] }
 bytes = "1.4"
-chrono = { version = ">= 0.4.37, < 0.4.40", default-features = false, features = ["std"] }
+chrono = { version = "0.4.41", default-features = false, features = ["std"] }
 chrono-tz = "0.10"
 fallible-streaming-iterator = { version = "0.1" }
 flate2 = "1"
@@ -68,8 +68,8 @@ clap = { version = "4.5.4", features = ["derive"], optional = true }
 opendal = { version = "0.50", optional = true, default-features = false }
 
 [dev-dependencies]
-arrow-ipc = { version = "53.0.0", features = ["lz4"] }
-arrow-json = "53.0.0"
+arrow-ipc = { version = "56.0", features = ["lz4"] }
+arrow-json = "56.0"
 criterion = { version = "0.5", default-features = false, features = ["async_tokio"] }
 opendal = { version = "0.50", default-features = false, features = ["services-memory"] }
 pretty_assertions = "1.3.0"

From 24caf3c14e7714ab1612fc2946002e4c3a79545d Mon Sep 17 00:00:00 2001
From: luofucong <luofc@foxmail.com>
Date: Thu, 14 Aug 2025 19:29:41 +0800
Subject: [PATCH 2/5] fix ci

---
 format/orc_proto.proto      | 23 -----------------------
 src/arrow_writer.rs         |  2 +-
 src/encoding/integer/mod.rs |  1 +
 src/proto.rs                | 35 -----------------------------------
 4 files changed, 2 insertions(+), 59 deletions(-)

diff --git a/format/orc_proto.proto b/format/orc_proto.proto
index ff71659..e899912 100644
--- a/format/orc_proto.proto
+++ b/format/orc_proto.proto
@@ -100,24 +100,12 @@ message ColumnStatistics {
   optional CollectionStatistics collectionStatistics = 12;
 }
 
-message RowIndexEntry {
-  repeated uint64 positions = 1 [packed=true];
-  optional ColumnStatistics statistics = 2;
-}
-
-message RowIndex {
-  repeated RowIndexEntry entry = 1;
-}
-
 message BloomFilter {
   optional uint32 numHashFunctions = 1;
   repeated fixed64 bitset = 2;
   optional bytes utf8bitset = 3;
 }
 
-message BloomFilterIndex {
-  repeated BloomFilter bloomFilter = 1;
-}
 
 message Stream {
   // if you add new index stream kinds, you need to make sure to update
@@ -270,23 +258,12 @@ message Metadata {
   repeated StripeStatistics stripeStats = 1;
 }
 
-// In ORC v2 (and for encrypted columns in v1), each column has
-// their column statistics written separately.
-message ColumnarStripeStatistics {
-  // one value for each stripe in the file
-  repeated ColumnStatistics colStats = 1;
-}
-
 enum EncryptionAlgorithm {
   UNKNOWN_ENCRYPTION = 0;  // used for detecting future algorithms
   AES_CTR_128 = 1;
   AES_CTR_256 = 2;
 }
 
-message FileStatistics {
-  repeated ColumnStatistics column = 1;
-}
-
 // How was the data masked? This isn't necessary for reading the file, but
 // is documentation about how the file was written.
 message DataMask {
diff --git a/src/arrow_writer.rs b/src/arrow_writer.rs
index 0b4085d..e322493 100644
--- a/src/arrow_writer.rs
+++ b/src/arrow_writer.rs
@@ -350,7 +350,7 @@ mod tests {
         )
         .unwrap();
 
-        let rows = roundtrip(&[batch.clone()]);
+        let rows = roundtrip(std::slice::from_ref(&batch));
         assert_eq!(batch, rows[0]);
     }
 
diff --git a/src/encoding/integer/mod.rs b/src/encoding/integer/mod.rs
index f652d4e..be8919b 100644
--- a/src/encoding/integer/mod.rs
+++ b/src/encoding/integer/mod.rs
@@ -39,6 +39,7 @@ use crate::{
 
 use super::PrimitiveValueDecoder;
 
+#[allow(unused)]
 pub mod rle_v1;
 pub mod rle_v2;
 mod util;
diff --git a/src/proto.rs b/src/proto.rs
index ae71cdb..c425e5b 100644
--- a/src/proto.rs
+++ b/src/proto.rs
@@ -145,20 +145,6 @@ pub struct ColumnStatistics {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct RowIndexEntry {
-    #[prost(uint64, repeated, tag = "1")]
-    pub positions: ::prost::alloc::vec::Vec<u64>,
-    #[prost(message, optional, tag = "2")]
-    pub statistics: ::core::option::Option<ColumnStatistics>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct RowIndex {
-    #[prost(message, repeated, tag = "1")]
-    pub entry: ::prost::alloc::vec::Vec<RowIndexEntry>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct BloomFilter {
     #[prost(uint32, optional, tag = "1")]
     pub num_hash_functions: ::core::option::Option<u32>,
@@ -169,12 +155,6 @@ pub struct BloomFilter {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BloomFilterIndex {
-    #[prost(message, repeated, tag = "1")]
-    pub bloom_filter: ::prost::alloc::vec::Vec<BloomFilter>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct Stream {
     #[prost(enumeration = "stream::Kind", optional, tag = "1")]
     pub kind: ::core::option::Option<i32>,
@@ -499,21 +479,6 @@ pub struct Metadata {
     #[prost(message, repeated, tag = "1")]
     pub stripe_stats: ::prost::alloc::vec::Vec<StripeStatistics>,
 }
-/// In ORC v2 (and for encrypted columns in v1), each column has
-/// their column statistics written separately.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnarStripeStatistics {
-    /// one value for each stripe in the file
-    #[prost(message, repeated, tag = "1")]
-    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FileStatistics {
-    #[prost(message, repeated, tag = "1")]
-    pub column: ::prost::alloc::vec::Vec<ColumnStatistics>,
-}
 /// How was the data masked? This isn't necessary for reading the file, but
 /// is documentation about how the file was written.
 #[allow(clippy::derive_partial_eq_without_eq)]

From 92231ca5fae5c14d07bb93d31f2fdcc203c5024d Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Fri, 15 Aug 2025 12:48:25 +0800
Subject: [PATCH 3/5] Update orc_proto.proto

---
 format/orc_proto.proto | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/format/orc_proto.proto b/format/orc_proto.proto
index e899912..ff71659 100644
--- a/format/orc_proto.proto
+++ b/format/orc_proto.proto
@@ -100,12 +100,24 @@ message ColumnStatistics {
   optional CollectionStatistics collectionStatistics = 12;
 }
 
+message RowIndexEntry {
+  repeated uint64 positions = 1 [packed=true];
+  optional ColumnStatistics statistics = 2;
+}
+
+message RowIndex {
+  repeated RowIndexEntry entry = 1;
+}
+
 message BloomFilter {
   optional uint32 numHashFunctions = 1;
   repeated fixed64 bitset = 2;
   optional bytes utf8bitset = 3;
 }
 
+message BloomFilterIndex {
+  repeated BloomFilter bloomFilter = 1;
+}
 
 message Stream {
   // if you add new index stream kinds, you need to make sure to update
@@ -258,12 +270,23 @@ message Metadata {
   repeated StripeStatistics stripeStats = 1;
 }
 
+// In ORC v2 (and for encrypted columns in v1), each column has
+// their column statistics written separately.
+message ColumnarStripeStatistics {
+  // one value for each stripe in the file
+  repeated ColumnStatistics colStats = 1;
+}
+
 enum EncryptionAlgorithm {
   UNKNOWN_ENCRYPTION = 0;  // used for detecting future algorithms
   AES_CTR_128 = 1;
   AES_CTR_256 = 2;
 }
 
+message FileStatistics {
+  repeated ColumnStatistics column = 1;
+}
+
 // How was the data masked? This isn't necessary for reading the file, but
 // is documentation about how the file was written.
 message DataMask {

From 97c9ce82c6d3a7c3834007604e61d458cbf659c1 Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Fri, 15 Aug 2025 12:49:03 +0800
Subject: [PATCH 4/5] Update mod.rs

---
 src/encoding/integer/mod.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/encoding/integer/mod.rs b/src/encoding/integer/mod.rs
index be8919b..f652d4e 100644
--- a/src/encoding/integer/mod.rs
+++ b/src/encoding/integer/mod.rs
@@ -39,7 +39,6 @@ use crate::{
 
 use super::PrimitiveValueDecoder;
 
-#[allow(unused)]
 pub mod rle_v1;
 pub mod rle_v2;
 mod util;

From cd9054a47ff51aa2a898c3395917650a06e35f37 Mon Sep 17 00:00:00 2001
From: LFC <990479+MichaelScofield@users.noreply.github.com>
Date: Fri, 15 Aug 2025 12:52:04 +0800
Subject: [PATCH 5/5] Update proto.rs

---
 src/proto.rs | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/proto.rs b/src/proto.rs
index c425e5b..ae71cdb 100644
--- a/src/proto.rs
+++ b/src/proto.rs
@@ -145,6 +145,20 @@ pub struct ColumnStatistics {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct RowIndexEntry {
+    #[prost(uint64, repeated, tag = "1")]
+    pub positions: ::prost::alloc::vec::Vec<u64>,
+    #[prost(message, optional, tag = "2")]
+    pub statistics: ::core::option::Option<ColumnStatistics>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct RowIndex {
+    #[prost(message, repeated, tag = "1")]
+    pub entry: ::prost::alloc::vec::Vec<RowIndexEntry>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct BloomFilter {
     #[prost(uint32, optional, tag = "1")]
     pub num_hash_functions: ::core::option::Option<u32>,
@@ -155,6 +169,12 @@ pub struct BloomFilter {
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BloomFilterIndex {
+    #[prost(message, repeated, tag = "1")]
+    pub bloom_filter: ::prost::alloc::vec::Vec<BloomFilter>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct Stream {
     #[prost(enumeration = "stream::Kind", optional, tag = "1")]
     pub kind: ::core::option::Option<i32>,
@@ -479,6 +499,21 @@ pub struct Metadata {
     #[prost(message, repeated, tag = "1")]
     pub stripe_stats: ::prost::alloc::vec::Vec<StripeStatistics>,
 }
+/// In ORC v2 (and for encrypted columns in v1), each column has
+/// their column statistics written separately.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnarStripeStatistics {
+    /// one value for each stripe in the file
+    #[prost(message, repeated, tag = "1")]
+    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct FileStatistics {
+    #[prost(message, repeated, tag = "1")]
+    pub column: ::prost::alloc::vec::Vec<ColumnStatistics>,
+}
 /// How was the data masked? This isn't necessary for reading the file, but
 /// is documentation about how the file was written.
 #[allow(clippy::derive_partial_eq_without_eq)]