From 019569df7aff4f96cdc98e3da6ec39df070c9699 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 14 May 2026 15:12:33 +0100 Subject: [PATCH] Harden Variant Reading With help from Claude Fixes #16334 Change-Id: I0fc34bfcb62ccab1d4111df5ca5db9e22ce5dfcc --- .../iceberg/variants/SerializedArray.java | 18 +++++++++- .../iceberg/variants/SerializedMetadata.java | 33 ++++++++++++++++--- .../iceberg/variants/SerializedObject.java | 23 +++++++++++-- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java index b390d96f31ed..9d313f63f62e 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -55,9 +55,25 @@ private SerializedArray(VariantMetadata metadata, ByteBuffer value, int header) this.value = value; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; + int remaining = value.remaining(); + Preconditions.checkArgument( + HEADER_SIZE + numElementsSize <= remaining, + "Variant array truncated: cannot read element count (numElementsSize=%s remaining=%s)", + numElementsSize, + remaining); int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + Preconditions.checkArgument( + numElements >= 0, "Invalid variant array element count: %s", numElements); this.offsetListOffset = HEADER_SIZE + numElementsSize; - this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + long offsetTableBytes = (1L + numElements) * offsetSize; + long computedDataOffset = (long) offsetListOffset + offsetTableBytes; + Preconditions.checkArgument( + computedDataOffset <= remaining, + "Variant array offset table extends past buffer: numElements=%s offsetSize=%s remaining=%s", + numElements, + offsetSize, + remaining); + this.dataOffset = (int) computedDataOffset; this.array = new VariantValue[numElements]; } diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 9113a8c1c969..cadced07777f 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -58,15 +58,38 @@ static SerializedMetadata from(ByteBuffer metadata) { private SerializedMetadata(ByteBuffer metadata, int header) { this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); + int remaining = metadata.remaining(); + Preconditions.checkArgument( + HEADER_SIZE + offsetSize <= remaining, + "Variant metadata truncated: cannot read dictionary size (offsetSize=%s remaining=%s)", + offsetSize, + remaining); int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, HEADER_SIZE, offsetSize); - this.dict = new String[dictSize]; + Preconditions.checkArgument( + dictSize >= 0, "Invalid variant metadata dictionary size: %s", dictSize); this.offsetListOffset = HEADER_SIZE + offsetSize; - this.dataOffset = offsetListOffset + ((1 + dictSize) * offsetSize); - int endOffset = - dataOffset + // Use long arithmetic so a malformed dictSize/offsetSize cannot overflow into a small int. + long offsetTableBytes = (1L + dictSize) * offsetSize; + long computedDataOffset = (long) offsetListOffset + offsetTableBytes; + Preconditions.checkArgument( + computedDataOffset <= remaining, + "Variant metadata offset table extends past buffer: dictSize=%s offsetSize=%s remaining=%s", + dictSize, + offsetSize, + remaining); + long computedEnd = + computedDataOffset + VariantUtil.readLittleEndianUnsigned( metadata, offsetListOffset + (offsetSize * dictSize), offsetSize); - if (endOffset < metadata.limit()) { + Preconditions.checkArgument( + computedEnd <= remaining, + "Variant metadata data section extends past buffer: end=%s remaining=%s", + computedEnd, + remaining); + this.dict = new String[dictSize]; + this.dataOffset = (int) computedDataOffset; + int endOffset = (int) computedEnd; + if (endOffset < remaining) { this.metadata = VariantUtil.slice(metadata, 0, endOffset); } else { this.metadata = metadata; diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java index d74565891a8a..cd24c9c4437c 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -67,13 +67,32 @@ private SerializedObject(VariantMetadata metadata, ByteBuffer value, int header) this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; + int remaining = value.remaining(); + Preconditions.checkArgument( + HEADER_SIZE + numElementsSize <= remaining, + "Variant object truncated: cannot read element count (numElementsSize=%s remaining=%s)", + numElementsSize, + remaining); int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + Preconditions.checkArgument( + numElements >= 0, "Invalid variant object element count: %s", numElements); this.fieldIdListOffset = HEADER_SIZE + numElementsSize; + long fieldIdBytes = (long) numElements * fieldIdSize; + long offsetTableBytes = (1L + numElements) * offsetSize; + long computedOffsetListOffset = (long) fieldIdListOffset + fieldIdBytes; + long computedDataOffset = computedOffsetListOffset + offsetTableBytes; + Preconditions.checkArgument( + computedDataOffset <= remaining, + "Variant object tables extend past buffer: numElements=%s fieldIdSize=%s offsetSize=%s remaining=%s", + numElements, + fieldIdSize, + offsetSize, + remaining); this.fieldIds = new Integer[numElements]; - this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); + this.offsetListOffset = (int) computedOffsetListOffset; this.offsets = new int[numElements]; this.lengths = new int[numElements]; - this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + this.dataOffset = (int) computedDataOffset; this.values = new VariantValue[numElements]; if (numElements > 0) {