From e3b923ce3924218a3737fbac3a97e7faaa286280 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Fri, 2 Mar 2018 21:54:35 -0800 Subject: [PATCH 1/3] ORC-312: fix buffer overflow in corrupt StringDictionaryColumn --- c++/src/ColumnReader.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index 5462becbc4..5c6c3a3b5c 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -468,6 +468,9 @@ namespace orc { if (!stream->Next(&chunk, &length)) { throw ParseError("bad read in readFully"); } + if (posn + length > bufferSize) { + throw ParseError("Corrupt dictionary blob in StringDictionaryColumn"); + } memcpy(buffer + posn, chunk, static_cast(length)); posn += length; } @@ -549,6 +552,9 @@ namespace orc { for(uint64_t i=0; i < numValues; ++i) { if (notNull[i]) { int64_t entry = outputLengths[i]; + if (entry < 0 || (uint64_t)entry >= dictionaryCount) { + throw ParseError("Entry index out of range in StringDictionaryColumn"); + } outputStarts[i] = blob + dictionaryOffsets[entry]; outputLengths[i] = dictionaryOffsets[entry+1] - dictionaryOffsets[entry]; @@ -557,6 +563,9 @@ namespace orc { } else { for(uint64_t i=0; i < numValues; ++i) { int64_t entry = outputLengths[i]; + if (entry < 0 || (uint64_t)entry >= dictionaryCount) { + throw ParseError("Entry index out of range in StringDictionaryColumn"); + } outputStarts[i] = blob + dictionaryOffsets[entry]; outputLengths[i] = dictionaryOffsets[entry+1] - dictionaryOffsets[entry]; From 972e8a1cf531a02cb580d37903e8d91626659f6e Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Fri, 2 Mar 2018 22:24:07 -0800 Subject: [PATCH 2/3] fix compile errors in clang --- c++/src/ColumnReader.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index 5c6c3a3b5c..0fb1a60bca 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -552,7 +552,7 @@ namespace orc { for(uint64_t i=0; i < numValues; ++i) { if (notNull[i]) { int64_t entry = outputLengths[i]; - if (entry < 0 || (uint64_t)entry >= dictionaryCount) { + if (entry < 0 || static_cast(entry) >= dictionaryCount) { throw ParseError("Entry index out of range in StringDictionaryColumn"); } outputStarts[i] = blob + dictionaryOffsets[entry]; @@ -563,7 +563,7 @@ namespace orc { } else { for(uint64_t i=0; i < numValues; ++i) { int64_t entry = outputLengths[i]; - if (entry < 0 || (uint64_t)entry >= dictionaryCount) { + if (entry < 0 || static_cast(entry) >= dictionaryCount) { throw ParseError("Entry index out of range in StringDictionaryColumn"); } outputStarts[i] = blob + dictionaryOffsets[entry]; From 8e9823ec5767f57b7ae3260615bf1d4f1036f9cb Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Tue, 6 Mar 2018 04:46:29 -0800 Subject: [PATCH 3/3] Check dictionary entry length to avoid crash on negative length In the constructor of StringDictionaryColumnReader, if there're negative values in the decoded lengthArray, the calculated blobSize may be negative. It will be transformed to a huge unsigned integer. Finally, we crash on DataBuffer::resize dictionaryBlob.resize(static_cast(blobSize)) This patch checks for negative length and throws ParseError in time. --- c++/src/ColumnReader.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index 0fb1a60bca..53997a422e 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -517,6 +517,8 @@ namespace orc { lengthDecoder->next(lengthArray + 1, dictionaryCount, nullptr); lengthArray[0] = 0; for(uint64_t i=1; i < dictionaryCount + 1; ++i) { + if (lengthArray[i] < 0) + throw ParseError("Negative dictionary entry length"); lengthArray[i] += lengthArray[i-1]; } int64_t blobSize = lengthArray[dictionaryCount];