From 1b718f495756c3273ac83ca074c996ea264db907 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Mon, 13 Oct 2025 19:31:11 +0800 Subject: [PATCH 1/2] fix: Correct padding now added to arrow filemarker. Recordbatches are now written with correct alignment and metadata length. --- Arrow/Sources/Arrow/ArrowWriter.swift | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift b/Arrow/Sources/Arrow/ArrowWriter.swift index 3aa25b6..262bb53 100644 --- a/Arrow/Sources/Arrow/ArrowWriter.swift +++ b/Arrow/Sources/Arrow/ArrowWriter.swift @@ -138,18 +138,26 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length var rbBlocks = [org_apache_arrow_flatbuf_Block]() for batch in batches { + addPadForAlignment(&writer) let startIndex = writer.count switch writeRecordBatch(batch: batch) { case .success(let rbResult): withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian) {writer.append(Data($0))} withUnsafeBytes(of: rbResult.1.o.littleEndian) {writer.append(Data($0))} writer.append(rbResult.0) + addPadForAlignment(&writer) + let metadataEnd = writer.count + let metadataLength = metadataEnd - startIndex switch writeRecordBatchData(&writer, fields: batch.schema.fields, columns: batch.columns) { case .success: + addPadForAlignment(&writer) rbBlocks.append( - org_apache_arrow_flatbuf_Block(offset: Int64(startIndex), - metaDataLength: Int32(0), - bodyLength: Int64(rbResult.1.o))) + org_apache_arrow_flatbuf_Block( + offset: Int64(startIndex), + metaDataLength: Int32(metadataLength), + bodyLength: Int64(rbResult.1.o) + ) + ) case .failure(let error): return .failure(error) } @@ -379,7 +387,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length addPadForAlignment(&markerData) var writer: any DataWriter = FileDataWriter(fileHandle) - writer.append(FILEMARKER.data(using: .utf8)!) + writer.append(markerData) switch writeFile(&writer, info: info) { case .success: writer.append(FILEMARKER.data(using: .utf8)!) From a190494e9ca6d098513d01052f39f08d0fd024ed Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Tue, 14 Oct 2025 15:09:17 +0800 Subject: [PATCH 2/2] fix: Pad schema message instead of record batch start. Calculate record batch body length from writer offsets. Add guard to check total block size. --- Arrow/Sources/Arrow/ArrowWriter.swift | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift b/Arrow/Sources/Arrow/ArrowWriter.swift index 262bb53..24176d3 100644 --- a/Arrow/Sources/Arrow/ArrowWriter.swift +++ b/Arrow/Sources/Arrow/ArrowWriter.swift @@ -138,7 +138,6 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length var rbBlocks = [org_apache_arrow_flatbuf_Block]() for batch in batches { - addPadForAlignment(&writer) let startIndex = writer.count switch writeRecordBatch(batch: batch) { case .success(let rbResult): @@ -146,16 +145,22 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length withUnsafeBytes(of: rbResult.1.o.littleEndian) {writer.append(Data($0))} writer.append(rbResult.0) addPadForAlignment(&writer) - let metadataEnd = writer.count - let metadataLength = metadataEnd - startIndex + let metadataLength = writer.count - startIndex + let bodyStart = writer.count switch writeRecordBatchData(&writer, fields: batch.schema.fields, columns: batch.columns) { case .success: - addPadForAlignment(&writer) + let bodyLength = writer.count - bodyStart + let expectedSize = startIndex + metadataLength + bodyLength + guard expectedSize == writer.count else { + return .failure(.invalid( + "Invalid Block. Expected \(expectedSize), got \(writer.count)" + )) + } rbBlocks.append( org_apache_arrow_flatbuf_Block( offset: Int64(startIndex), metaDataLength: Int32(metadataLength), - bodyLength: Int64(rbResult.1.o) + bodyLength: Int64(bodyLength) ) ) case .failure(let error): @@ -301,6 +306,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length case .success(let schemaOffset): fbb.finish(offset: schemaOffset) writer.append(fbb.data) + addPadForAlignment(&writer) case .failure(let error): return .failure(error) }