Skip to content

Commit

Permalink
AVRO-3819: Centralize system properties that limit allocations (#2432)
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanSkraba committed Aug 19, 2023
1 parent 426c593 commit a12a7e4
Show file tree
Hide file tree
Showing 9 changed files with 605 additions and 134 deletions.
3 changes: 1 addition & 2 deletions lang/java/avro/src/main/java/org/apache/avro/Schema.java
Expand Up @@ -1385,8 +1385,7 @@ private static class FixedSchema extends NamedSchema {

public FixedSchema(Name name, String doc, int size) {
super(Type.FIXED, name, doc);

This comment has been minimized.

Copy link
@D4954484

D4954484 Sep 30, 2023

Busra Deniz Ozturk

if (size < 0)
throw new IllegalArgumentException("Invalid fixed size: " + size);
SystemLimitException.checkMaxBytesLength(size);
this.size = size;
}

Expand Down
190 changes: 190 additions & 0 deletions lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java
@@ -0,0 +1,190 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.avro;

import org.slf4j.LoggerFactory;

/**
* Thrown to prevent making large allocations when reading potentially
* pathological input data from an untrusted source.
* <p/>
* The following system properties can be set to limit the size of bytes,
* strings and collection types to be allocated:
* <ul>
* <li><tt>org.apache.avro.limits.byte.maxLength</tt></li> limits the maximum
* size of <tt>byte</tt> types.</li>
* <li><tt>org.apache.avro.limits.collectionItems.maxLength</tt></li> limits the
* maximum number of <tt>map</tt> and <tt>list</tt> items that can be read at
* once single sequence.</li>
* <li><tt>org.apache.avro.limits.string.maxLength</tt></li> limits the maximum
* size of <tt>string</tt> types.</li>
* </ul>
*
* The default is to permit sizes up to {@link #MAX_ARRAY_VM_LIMIT}.
*/
public class SystemLimitException extends AvroRuntimeException {

/**
* The maximum length of array to allocate (unless necessary). Some VMs reserve
* some header words in an array. Attempts to allocate larger arrays may result
* in {@code OutOfMemoryError: Requested array size exceeds VM limit}
*
* @see <a href="https://bugs.openjdk.org/browse/JDK-8246725">JDK-8246725</a>
*/
// VisibleForTesting
static final int MAX_ARRAY_VM_LIMIT = Integer.MAX_VALUE - 8;

public static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength";
public static final String MAX_COLLECTION_LENGTH_PROPERTY = "org.apache.avro.limits.collectionItems.maxLength";
public static final String MAX_STRING_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength";

private static int maxBytesLength = MAX_ARRAY_VM_LIMIT;
private static int maxCollectionLength = MAX_ARRAY_VM_LIMIT;
private static int maxStringLength = MAX_ARRAY_VM_LIMIT;

static {
resetLimits();
}

public SystemLimitException(String message) {
super(message);
}

/**
* Get an integer value stored in a system property, used to configure the
* system behaviour of decoders
*
* @param property The system property to fetch
* @param defaultValue The value to use if the system property is not present or
* parsable as an int
* @return The value from the system property
*/
private static int getLimitFromProperty(String property, int defaultValue) {
String o = System.getProperty(property);
int i = defaultValue;
if (o != null) {
try {
i = Integer.parseUnsignedInt(o);
} catch (NumberFormatException nfe) {
LoggerFactory.getLogger(SystemLimitException.class).warn("Could not parse property " + property + ": " + o,
nfe);
}
}
return i;
}

/**
* Check to ensure that reading the bytes is within the specified limits.
*
* @param length The proposed size of the bytes to read
* @return The size of the bytes if and only if it is within the limit and
* non-negative.
* @throws UnsupportedOperationException if reading the datum would allocate a
* collection that the Java VM would be
* unable to handle
* @throws SystemLimitException if the decoding should fail because it
* would otherwise result in an allocation
* exceeding the set limit
* @throws AvroRuntimeException if the length is negative
*/
public static int checkMaxBytesLength(long length) {
if (length < 0) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + length);
}
if (length > MAX_ARRAY_VM_LIMIT) {
throw new UnsupportedOperationException(
"Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + " bytes in Java library");
}
if (length > maxBytesLength) {
throw new SystemLimitException("Bytes length " + length + " exceeds maximum allowed");
}
return (int) length;
}

/**
* Check to ensure that reading the specified number of items remains within the
* specified limits.
*
* @param existing The number of elements items read in the collection
* @param items The next number of items to read. In normal usage, this is
* always a positive, permitted value. Negative and zero values
* have a special meaning in Avro decoding.
* @return The total number of items in the collection if and only if it is
* within the limit and non-negative.
* @throws UnsupportedOperationException if reading the items would allocate a
* collection that the Java VM would be
* unable to handle
* @throws SystemLimitException if the decoding should fail because it
* would otherwise result in an allocation
* exceeding the set limit
* @throws AvroRuntimeException if the length is negative
*/
public static int checkMaxCollectionLength(long existing, long items) {
long length = existing + items;
if (existing < 0) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + existing);
}
if (items < 0) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + items);
}
if (length > MAX_ARRAY_VM_LIMIT || length < existing) {
throw new UnsupportedOperationException(
"Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library");
}
if (length > maxCollectionLength) {
throw new SystemLimitException("Collection length " + length + " exceeds maximum allowed");
}
return (int) length;
}

/**
* Check to ensure that reading the string size is within the specified limits.
*
* @param length The proposed size of the string to read
* @return The size of the string if and only if it is within the limit and
* non-negative.
* @throws UnsupportedOperationException if reading the items would allocate a
* collection that the Java VM would be
* unable to handle
* @throws SystemLimitException if the decoding should fail because it
* would otherwise result in an allocation
* exceeding the set limit
* @throws AvroRuntimeException if the length is negative
*/
public static int checkMaxStringLength(long length) {
if (length < 0) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + length);
}
if (length > MAX_ARRAY_VM_LIMIT) {
throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes");
}
if (length > maxStringLength) {
throw new SystemLimitException("String length " + length + " exceeds maximum allowed");
}
return (int) length;
}

/** Reread the limits from the system properties. */
// VisibleForTesting
static void resetLimits() {
maxBytesLength = getLimitFromProperty(MAX_BYTES_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT);
maxCollectionLength = getLimitFromProperty(MAX_COLLECTION_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT);
maxStringLength = getLimitFromProperty(MAX_STRING_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT);
}
}
74 changes: 22 additions & 52 deletions lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
Expand Up @@ -26,8 +26,8 @@

import org.apache.avro.AvroRuntimeException;
import org.apache.avro.InvalidNumberEncodingException;
import org.apache.avro.SystemLimitException;
import org.apache.avro.util.Utf8;
import org.slf4j.LoggerFactory;

/**
* An {@link Decoder} for binary-format data.
Expand All @@ -39,27 +39,20 @@
* can be accessed by inputStream().remaining(), if the BinaryDecoder is not
* 'direct'.
* <p/>
* To prevent this class from making large allocations when handling potentially
* pathological input data, set Java properties
* <tt>org.apache.avro.limits.string.maxLength</tt> and
* <tt>org.apache.avro.limits.bytes.maxLength</tt> before instantiating this
* class to limit the maximum sizes of <tt>string</tt> and <tt>bytes</tt> types
* handled. The default is to permit sizes up to Java's maximum array length.
*
* @see Encoder
* @see SystemLimitException
*/

public class BinaryDecoder extends Decoder {

/**
* The maximum size of array to allocate. Some VMs reserve some header words in
* an array. Attempts to allocate larger arrays may result in OutOfMemoryError:
* Requested array size exceeds VM limit
* When reading a collection (MAP or ARRAY), this keeps track of the number of
* elements to ensure that the
* {@link SystemLimitException#checkMaxCollectionLength} constraint is
* respected.
*/
static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L;

private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength";
protected final int maxBytesLength;
private long collectionCount = 0L;

private ByteSource source = null;
// we keep the buffer and its state variables in this class and not in a
Expand Down Expand Up @@ -99,17 +92,6 @@ void clearBuf() {
/** protected constructor for child classes */
protected BinaryDecoder() {
super();
String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY);
int i = Integer.MAX_VALUE;
if (o != null) {
try {
i = Integer.parseUnsignedInt(o);
} catch (NumberFormatException nfe) {
LoggerFactory.getLogger(BinaryDecoder.class)
.warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe);
}
}
maxBytesLength = i;
}

BinaryDecoder(InputStream in, int bufferSize) {
Expand Down Expand Up @@ -300,17 +282,11 @@ public double readDouble() throws IOException {

@Override
public Utf8 readString(Utf8 old) throws IOException {
long length = readLong();
if (length > MAX_ARRAY_SIZE) {
throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes");
}
if (length < 0L) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + length);
}
int length = SystemLimitException.checkMaxStringLength(readLong());
Utf8 result = (old != null ? old : new Utf8());
result.setByteLength((int) length);
if (0L != length) {
doReadBytes(result.getBytes(), 0, (int) length);
result.setByteLength(length);
if (0 != length) {
doReadBytes(result.getBytes(), 0, length);
}
return result;
}
Expand All @@ -329,17 +305,7 @@ public void skipString() throws IOException {

@Override
public ByteBuffer readBytes(ByteBuffer old) throws IOException {
long length = readLong();
if (length > MAX_ARRAY_SIZE) {
throw new UnsupportedOperationException(
"Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes in Java library");
}
if (length > maxBytesLength) {
throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed");
}
if (length < 0L) {
throw new AvroRuntimeException("Malformed data. Length is negative: " + length);
}
int length = SystemLimitException.checkMaxBytesLength(readLong());
final ByteBuffer result;
if (old != null && length <= old.capacity()) {
result = old;
Expand Down Expand Up @@ -444,7 +410,6 @@ protected long doReadItemCount() throws IOException {
* @return Zero if there are no more items to skip and end of array/map is
* reached. Positive number if some items are found that cannot be
* skipped and the client needs to skip them individually.
*
* @throws IOException If the first byte cannot be read for any reason other
* than the end of the file, if the input stream has been
* closed, or if some other I/O error occurs.
Expand All @@ -461,12 +426,15 @@ private long doSkipItems() throws IOException {

@Override
public long readArrayStart() throws IOException {
return doReadItemCount();
collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount());
return collectionCount;
}

@Override
public long arrayNext() throws IOException {
return doReadItemCount();
long length = doReadItemCount();

This comment has been minimized.

Copy link
@xjin

xjin Oct 7, 2023

This seems to be the only extra size check added to BynaryDecoder. This will be used to allocate the next buffer size. But where is this used? The reader actually will enforce the buffer to take maximally INT.MAX_VALUE which is around 2GB. Is that the cause of OOM and thus https://nvd.nist.gov/vuln/detail/CVE-2023-39410? I think this test is missing in the PR. We should have a unit test for this corrupted byte array.

collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length);
return length;
}

@Override
Expand All @@ -476,12 +444,15 @@ public long skipArray() throws IOException {

@Override
public long readMapStart() throws IOException {
return doReadItemCount();
collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount());
return collectionCount;
}

@Override
public long mapNext() throws IOException {
return doReadItemCount();
long length = doReadItemCount();
collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length);
return length;
}

@Override
Expand Down Expand Up @@ -933,7 +904,6 @@ public void close() throws IOException {
/**
* This byte source is special. It will avoid copying data by using the source's
* byte[] as a buffer in the decoder.
*
*/
private static class ByteArrayByteSource extends ByteSource {
private static final int MIN_SIZE = 16;
Expand Down

0 comments on commit a12a7e4

Please sign in to comment.