From 1b3831fb7f92b630a8703269bb8aebbc87f298c7 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Sat, 3 Mar 2018 01:21:23 -0800 Subject: [PATCH 1/4] ORC-313: Check subtype count in converting protobuf Types We need to verify the subtype count of LIST and MAP types. Otherwise, the ill types will lead the c++ reader to crash. The java reader also has this problem. --- c++/src/TypeImpl.cc | 4 ++++ java/core/src/java/org/apache/orc/OrcUtils.java | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index f9e4cd6cc9..303ca1d1e8 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -379,6 +379,10 @@ namespace orc { case proto::Type_Kind_MAP: case proto::Type_Kind_UNION: { TypeImpl* result = new TypeImpl(static_cast(type.kind())); + if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) + throw ParseError("Illegal LIST type that doesn't contain one subtype"); + if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) + throw ParseError("Illegal MAP type that doesn't contain two subtypes"); for(int i=0; i < type.subtypes_size(); ++i) { result->addUnionChild(convertType(footer.types(static_cast (type.subtypes(i))), diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java index 37e624ccea..671a6715a5 100644 --- a/java/core/src/java/org/apache/orc/OrcUtils.java +++ b/java/core/src/java/org/apache/orc/OrcUtils.java @@ -458,7 +458,8 @@ public static int appendOrcTypesRebuildSubtypes(List result, */ public static TypeDescription convertTypeFromProtobuf(List types, - int rootColumn) { + int rootColumn) + throws FileFormatException { OrcProto.Type type = types.get(rootColumn); switch (type.getKind()) { case BOOLEAN: @@ -503,9 +504,17 @@ TypeDescription convertTypeFromProtobuf(List types, return result; } case LIST: + if (type.getSubtypesCount() != 1) { + throw new FileFormatException("LIST type should contains exactly " + + "one subtype but has " + type.getSubtypesCount()); + } return TypeDescription.createList( convertTypeFromProtobuf(types, type.getSubtypes(0))); case MAP: + if (type.getSubtypesCount() != 2) { + throw new FileFormatException("MAP type should contains exactly " + + "two subtypes but has " + type.getSubtypesCount()); + } return TypeDescription.createMap( convertTypeFromProtobuf(types, type.getSubtypes(0)), convertTypeFromProtobuf(types, type.getSubtypes(1))); From 94ecddfc07a85ab578f157896e24d63b4ccdbc0b Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Wed, 7 Mar 2018 17:17:30 -0800 Subject: [PATCH 2/4] ORC-317: check that indices in the type tree are valid --- c++/src/Reader.cc | 31 ++++++++++++++++ c++/src/TypeImpl.cc | 4 -- .../java/org/apache/orc/impl/ReaderImpl.java | 37 +++++++++++++++++-- 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index fd22f5f7b9..529d28d8da 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -897,6 +897,34 @@ namespace orc { return REDUNDANT_MOVE(postscript); } + void checkTypes(int& index, const proto::Footer& footer) { + if (index >= footer.types_size()) + throw ParseError(std::string("Footer is corrupt that it lost types(") + + std::to_string(index) + ")"); + const proto::Type& type = footer.types(index); + + // ORC-313: check subtype count of LIST and MAP + if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) + throw ParseError("LIST type should contains exactly one subtype but has " + + std::to_string(type.subtypes_size())); + if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) + throw ParseError("MAP type should contains exactly two subtypes but has " + + std::to_string(type.subtypes_size())); + + // ORC-317: check that indices in the type tree are valid + int origin_index = index; + for (int i = 0; i < type.subtypes_size(); ++i) { + int proto_index = static_cast(type.subtypes(i)); + if (++index != proto_index) { + std::stringstream msg; + msg << "Footer is corrupt: subType(" << i << ") should be " << index + << " but was " << proto_index << " in types(" << origin_index << ")"; + throw ParseError(msg.str()); + } + checkTypes(index, footer); + } + } + /** * Parse the footer from the given buffer. * @param stream the file's stream @@ -926,6 +954,9 @@ namespace orc { throw ParseError("Failed to parse the footer from " + stream->getName()); } + + int index = 0; + checkTypes(index, *footer); return REDUNDANT_MOVE(footer); } diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index 303ca1d1e8..f9e4cd6cc9 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -379,10 +379,6 @@ namespace orc { case proto::Type_Kind_MAP: case proto::Type_Kind_UNION: { TypeImpl* result = new TypeImpl(static_cast(type.kind())); - if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) - throw ParseError("Illegal LIST type that doesn't contain one subtype"); - if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) - throw ParseError("Illegal MAP type that doesn't contain two subtypes"); for(int i=0; i < type.subtypes_size(); ++i) { result->addUnionChild(convertType(footer.types(static_cast (type.subtypes(i))), diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java index 0daa0ed2ed..d0928e70a6 100644 --- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java @@ -410,8 +410,39 @@ private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos, int footerSize, CompressionCodec codec, int bufferSize) throws IOException { bb.position(footerAbsPos); bb.limit(footerAbsPos + footerSize); - return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer", - singleton(new BufferChunk(bb, 0)), footerSize, codec, bufferSize)); + OrcProto.Footer footer = OrcProto.Footer.parseFrom(InStream.createCodedInputStream( + "footer", singleton(new BufferChunk(bb, 0)), footerSize, codec, bufferSize)); + checkTypes(0, footer); + return footer; + } + + private static int checkTypes(int index, OrcProto.Footer footer) throws FileFormatException { + if (index >= footer.getTypesCount()) { + throw new FileFormatException("Footer is corrupt that it lost types(" + index + ")"); + } + OrcProto.Type type = footer.getTypes(index); + + // ORC-313: check subtype count of LIST and MAP + if (type.getKind() == OrcProto.Type.Kind.LIST && type.getSubtypesCount() != 1) { + throw new FileFormatException("LIST type should contains exactly " + + "one subtype but has " + type.getSubtypesCount()); + } + if (type.getKind() == OrcProto.Type.Kind.MAP && type.getSubtypesCount() != 2) { + throw new FileFormatException("MAP type should contains exactly " + + "two subtypes but has " + type.getSubtypesCount()); + } + + // ORC-317: check that indices in the type tree are valid + int origin_index = index; + for (int i = 0; i < type.getSubtypesCount(); ++i) { + if (++index != type.getSubtypes(i)) { + throw new FileFormatException(String.format( + "Footer is corrupt: subType(%d) should be %d but was %d in types(%d)", + i, index, type.getSubtypes(i), origin_index)); + } + index = checkTypes(index, footer); + } + return index; } public static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, @@ -537,7 +568,7 @@ protected OrcTail extractFileTail(FileSystem fs, Path path, return buildEmptyTail(); } else if (size <= OrcFile.MAGIC.length()) { // Anything smaller than MAGIC header cannot be valid (valid ORC files - // are actually around 40 bytes, this is more conservative) + // are actually around 40 bytes, this is more conservative) throw new FileFormatException("Not a valid ORC file " + path + " (maxFileLength= " + maxFileLength + ")"); } From 0f17474e91ec6c84b4ad02385a8ce8a68c33e486 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Wed, 7 Mar 2018 17:29:32 -0800 Subject: [PATCH 3/4] remove redundant checks --- java/core/src/java/org/apache/orc/OrcUtils.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java index 671a6715a5..37e624ccea 100644 --- a/java/core/src/java/org/apache/orc/OrcUtils.java +++ b/java/core/src/java/org/apache/orc/OrcUtils.java @@ -458,8 +458,7 @@ public static int appendOrcTypesRebuildSubtypes(List result, */ public static TypeDescription convertTypeFromProtobuf(List types, - int rootColumn) - throws FileFormatException { + int rootColumn) { OrcProto.Type type = types.get(rootColumn); switch (type.getKind()) { case BOOLEAN: @@ -504,17 +503,9 @@ TypeDescription convertTypeFromProtobuf(List types, return result; } case LIST: - if (type.getSubtypesCount() != 1) { - throw new FileFormatException("LIST type should contains exactly " + - "one subtype but has " + type.getSubtypesCount()); - } return TypeDescription.createList( convertTypeFromProtobuf(types, type.getSubtypes(0))); case MAP: - if (type.getSubtypesCount() != 2) { - throw new FileFormatException("MAP type should contains exactly " + - "two subtypes but has " + type.getSubtypesCount()); - } return TypeDescription.createMap( convertTypeFromProtobuf(types, type.getSubtypes(0)), convertTypeFromProtobuf(types, type.getSubtypes(1))); From 5f849a2b68449f26e66b27ed80dbf508e557a7d0 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Thu, 8 Mar 2018 23:32:30 -0800 Subject: [PATCH 4/4] check subtype count of UNION type --- c++/src/Reader.cc | 6 ++++-- java/core/src/java/org/apache/orc/impl/ReaderImpl.java | 3 +++ java/tools/src/java/org/apache/orc/tools/PrintData.java | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 529d28d8da..8915df1628 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -905,11 +905,13 @@ namespace orc { // ORC-313: check subtype count of LIST and MAP if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) - throw ParseError("LIST type should contains exactly one subtype but has " + + throw ParseError("LIST type should contain exactly one subtype but has " + std::to_string(type.subtypes_size())); if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) - throw ParseError("MAP type should contains exactly two subtypes but has " + + throw ParseError("MAP type should contain exactly two subtypes but has " + std::to_string(type.subtypes_size())); + if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0) + throw ParseError("UNION type should contain subtypes but has none"); // ORC-317: check that indices in the type tree are valid int origin_index = index; diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java index d0928e70a6..a7c15eebb5 100644 --- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java @@ -431,6 +431,9 @@ private static int checkTypes(int index, OrcProto.Footer footer) throws FileForm throw new FileFormatException("MAP type should contains exactly " + "two subtypes but has " + type.getSubtypesCount()); } + if (type.getKind() == OrcProto.Type.Kind.UNION && type.getSubtypesCount() == 0) { + throw new FileFormatException("UNION type should contain subtypes but has none"); + } // ORC-317: check that indices in the type tree are valid int origin_index = index; diff --git a/java/tools/src/java/org/apache/orc/tools/PrintData.java b/java/tools/src/java/org/apache/orc/tools/PrintData.java index ebd9ae17e3..5d74a21655 100644 --- a/java/tools/src/java/org/apache/orc/tools/PrintData.java +++ b/java/tools/src/java/org/apache/orc/tools/PrintData.java @@ -242,6 +242,7 @@ static void main(Configuration conf, String[] args System.out.println(FileDump.SEPARATOR); } catch (Exception e) { System.err.println("Unable to dump data for file: " + file); + e.printStackTrace(); continue; } }