Skip to content

Commit

Permalink
PARQUET-1835: [C++] Fix crashes on invalid input
Browse files Browse the repository at this point in the history
Will hopefully fix the following issues:
* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=21377
* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=21567

Closes #6848 from pitrou/PARQUET-1835-oss-fuzz

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
pitrou authored and wesm committed Apr 6, 2020
1 parent 42ef1f9 commit 2898577
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
6 changes: 5 additions & 1 deletion cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
case Encoding::BIT_PACKED: {
num_bytes =
static_cast<int32_t>(BitUtil::BytesForBits(num_buffered_values * bit_width_));
if (num_bytes > data_size) {
if (num_bytes < 0 || num_bytes > data_size - 4) {
throw ParquetException("Received invalid number of bytes (corrupt data page?)");
}
if (!bit_packed_decoder_) {
Expand Down Expand Up @@ -375,6 +375,10 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
if (header.num_values < 0) {
throw ParquetException("Invalid page header (negative number of values)");
}
if (header.definition_levels_byte_length < 0 ||
header.repetition_levels_byte_length < 0) {
throw ParquetException("Invalid page header (negative levels byte length)");
}
bool is_compressed = header.__isset.is_compressed ? header.is_compressed : false;
EncodedStatistics page_statistics = ExtractStatsFromHeader(header);
seen_num_rows_ += header.num_values;
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1045,15 +1045,15 @@ int PlainDecoder<DType>::DecodeArrow(
template <typename T>
inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
int type_length, T* out) {
int bytes_to_decode = num_values * static_cast<int>(sizeof(T));
if (data_size < bytes_to_decode) {
int64_t bytes_to_decode = num_values * static_cast<int64_t>(sizeof(T));
if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
ParquetException::EofException();
}
// If bytes_to_decode == 0, data could be null
if (bytes_to_decode > 0) {
memcpy(out, data, bytes_to_decode);
}
return bytes_to_decode;
return static_cast<int>(bytes_to_decode);
}

template <typename DType>
Expand Down Expand Up @@ -1108,16 +1108,16 @@ template <>
inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size,
int num_values, int type_length,
FixedLenByteArray* out) {
int bytes_to_decode = type_length * num_values;
if (data_size < bytes_to_decode) {
int64_t bytes_to_decode = static_cast<int64_t>(type_length) * num_values;
if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
ParquetException::EofException();
}
for (int i = 0; i < num_values; ++i) {
out[i].ptr = data;
data += type_length;
data_size -= type_length;
}
return bytes_to_decode;
return static_cast<int>(bytes_to_decode);
}

template <typename DType>
Expand Down

0 comments on commit 2898577

Please sign in to comment.