From 8661fc43ef76030cfb5d67844a7e6a3c26167cda Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Sat, 3 Mar 2018 01:21:23 -0800 Subject: [PATCH 1/3] ORC-313: Check subtype count in converting protobuf Types We need to verify the subtype count of LIST, MAP and UNION types. Otherwise, the ill types will lead the c++ reader to crash. The java reader also has this problem. --- c++/src/TypeImpl.cc | 6 ++++++ java/core/src/java/org/apache/orc/OrcUtils.java | 15 ++++++++++++++- .../src/java/org/apache/orc/tools/PrintData.java | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index f9e4cd6cc9..63a5038095 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -379,6 +379,12 @@ namespace orc { case proto::Type_Kind_MAP: case proto::Type_Kind_UNION: { TypeImpl* result = new TypeImpl(static_cast(type.kind())); + if (type.kind() == proto::Type_Kind_LIST && type.subtypes_size() != 1) + throw ParseError("Illegal LIST type that doesn't contain one subtype"); + if (type.kind() == proto::Type_Kind_MAP && type.subtypes_size() != 2) + throw ParseError("Illegal MAP type that doesn't contain two subtypes"); + if (type.kind() == proto::Type_Kind_UNION && type.subtypes_size() == 0) + throw ParseError("Illegal UNION type that doesn't contain any subtypes"); for(int i=0; i < type.subtypes_size(); ++i) { result->addUnionChild(convertType(footer.types(static_cast (type.subtypes(i))), diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java index 37e624ccea..407d365d3c 100644 --- a/java/core/src/java/org/apache/orc/OrcUtils.java +++ b/java/core/src/java/org/apache/orc/OrcUtils.java @@ -458,7 +458,8 @@ public static int appendOrcTypesRebuildSubtypes(List result, */ public static TypeDescription convertTypeFromProtobuf(List types, - int rootColumn) { + int rootColumn) + throws FileFormatException { OrcProto.Type type = types.get(rootColumn); switch (type.getKind()) { case BOOLEAN: @@ -503,9 +504,17 @@ TypeDescription convertTypeFromProtobuf(List types, return result; } case LIST: + if (type.getSubtypesCount() != 1) { + throw new FileFormatException("LIST type should contains exactly " + + "one subtype but has " + type.getSubtypesCount()); + } return TypeDescription.createList( convertTypeFromProtobuf(types, type.getSubtypes(0))); case MAP: + if (type.getSubtypesCount() != 2) { + throw new FileFormatException("MAP type should contains exactly " + + "two subtypes but has " + type.getSubtypesCount()); + } return TypeDescription.createMap( convertTypeFromProtobuf(types, type.getSubtypes(0)), convertTypeFromProtobuf(types, type.getSubtypes(1))); @@ -518,6 +527,10 @@ TypeDescription convertTypeFromProtobuf(List types, return result; } case UNION: { + if (type.getSubtypesCount() == 0) { + throw new FileFormatException("UNION type should contains at least" + + " one subtype but has none"); + } TypeDescription result = TypeDescription.createUnion(); for(int f=0; f < type.getSubtypesCount(); ++f) { result.addUnionChild( diff --git a/java/tools/src/java/org/apache/orc/tools/PrintData.java b/java/tools/src/java/org/apache/orc/tools/PrintData.java index ebd9ae17e3..5d74a21655 100644 --- a/java/tools/src/java/org/apache/orc/tools/PrintData.java +++ b/java/tools/src/java/org/apache/orc/tools/PrintData.java @@ -242,6 +242,7 @@ static void main(Configuration conf, String[] args System.out.println(FileDump.SEPARATOR); } catch (Exception e) { System.err.println("Unable to dump data for file: " + file); + e.printStackTrace(); continue; } } From 023f124550fc843d0d1aa0fe4d5c9e27e4d3bea9 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Mon, 12 Mar 2018 05:57:38 -0700 Subject: [PATCH 2/3] add unit tests --- c++/test/TestType.cc | 42 +++++++++++++++++ .../src/java/org/apache/orc/OrcUtils.java | 6 +-- .../test/org/apache/orc/TestCorruptTypes.java | 46 +++++++++++++++++++ 3 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 java/core/src/test/org/apache/orc/TestCorruptTypes.java diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc index 8ce9313228..d23c6b7dbf 100644 --- a/c++/test/TestType.cc +++ b/c++/test/TestType.cc @@ -18,6 +18,7 @@ #include "Adaptor.hh" #include "OrcTest.hh" +#include "orc/Exceptions.hh" #include "orc/Type.hh" #include "wrap/gtest-wrapper.h" @@ -297,4 +298,45 @@ namespace orc { type = Type::buildTypeFromString(typeStr); EXPECT_EQ(typeStr, type->toString()); } + + void testCorruptHelper(const proto::Type& type, + const proto::Footer& footer, + const char* errMsg) { + try { + convertType(type, footer); + FAIL() << "Should throw ParseError for ill types"; + } catch (ParseError& e) { + EXPECT_EQ(e.what(), std::string(errMsg)); + } catch (...) { + FAIL() << "Should only throw ParseError for ill types"; + } + } + + TEST(TestType, testCorruptNestType) { + proto::Footer footer; // not used + + proto::Type illListType; + illListType.set_kind(proto::Type_Kind_LIST); + testCorruptHelper(illListType, footer, + "Illegal LIST type that doesn't contain one subtype"); + illListType.add_subtypes(2); + illListType.add_subtypes(3); + testCorruptHelper(illListType, footer, + "Illegal LIST type that doesn't contain one subtype"); + + proto::Type illMapType; + illMapType.set_kind(proto::Type_Kind_MAP); + illMapType.add_subtypes(2); + testCorruptHelper(illMapType, footer, + "Illegal MAP type that doesn't contain two subtypes"); + illMapType.add_subtypes(3); + illMapType.add_subtypes(4); + testCorruptHelper(illMapType, footer, + "Illegal MAP type that doesn't contain two subtypes"); + + proto::Type illUnionType; + illUnionType.set_kind(proto::Type_Kind_UNION); + testCorruptHelper(illUnionType, footer, + "Illegal UNION type that doesn't contain any subtypes"); + } } diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java index 407d365d3c..604660ac62 100644 --- a/java/core/src/java/org/apache/orc/OrcUtils.java +++ b/java/core/src/java/org/apache/orc/OrcUtils.java @@ -505,14 +505,14 @@ TypeDescription convertTypeFromProtobuf(List types, } case LIST: if (type.getSubtypesCount() != 1) { - throw new FileFormatException("LIST type should contains exactly " + + throw new FileFormatException("LIST type should contain exactly " + "one subtype but has " + type.getSubtypesCount()); } return TypeDescription.createList( convertTypeFromProtobuf(types, type.getSubtypes(0))); case MAP: if (type.getSubtypesCount() != 2) { - throw new FileFormatException("MAP type should contains exactly " + + throw new FileFormatException("MAP type should contain exactly " + "two subtypes but has " + type.getSubtypesCount()); } return TypeDescription.createMap( @@ -528,7 +528,7 @@ TypeDescription convertTypeFromProtobuf(List types, } case UNION: { if (type.getSubtypesCount() == 0) { - throw new FileFormatException("UNION type should contains at least" + + throw new FileFormatException("UNION type should contain at least" + " one subtype but has none"); } TypeDescription result = TypeDescription.createUnion(); diff --git a/java/core/src/test/org/apache/orc/TestCorruptTypes.java b/java/core/src/test/org/apache/orc/TestCorruptTypes.java new file mode 100644 index 0000000000..bc123b7a32 --- /dev/null +++ b/java/core/src/test/org/apache/orc/TestCorruptTypes.java @@ -0,0 +1,46 @@ +package org.apache.orc; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.fail; + +public class TestCorruptTypes { + + @Test + public void testIllType() { + testCorruptHelper(OrcProto.Type.Kind.LIST, 0, + "LIST type should contain exactly one subtype but has 0"); + testCorruptHelper(OrcProto.Type.Kind.LIST, 2, + "LIST type should contain exactly one subtype but has 2"); + testCorruptHelper(OrcProto.Type.Kind.MAP, 1, + "MAP type should contain exactly two subtypes but has 1"); + testCorruptHelper(OrcProto.Type.Kind.MAP, 3, + "MAP type should contain exactly two subtypes but has 3"); + testCorruptHelper(OrcProto.Type.Kind.UNION, 0, + "UNION type should contain at least one subtype but has none"); + } + + private void testCorruptHelper(OrcProto.Type.Kind type, + int subTypesCnt, + String errMsg) { + + List types = new ArrayList(); + OrcProto.Type.Builder builder = OrcProto.Type.newBuilder().setKind(type); + for (int i = 0; i < subTypesCnt; ++i) { + builder.addSubtypes(i + 2); + } + types.add(builder.build()); + try { + OrcUtils.convertTypeFromProtobuf(types, 0); + fail("Should throw FileFormatException for ill types"); + } catch (FileFormatException e) { + assertEquals(errMsg, e.getMessage()); + } catch (Throwable e) { + fail("Should only trow FileFormatException for ill types"); + } + } +} From 071c3b44e841b4bb269d83588f8600b2fce1d9a7 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Mon, 12 Mar 2018 06:32:22 -0700 Subject: [PATCH 3/3] add missing license --- .../test/org/apache/orc/TestCorruptTypes.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/java/core/src/test/org/apache/orc/TestCorruptTypes.java b/java/core/src/test/org/apache/orc/TestCorruptTypes.java index bc123b7a32..8808a08f82 100644 --- a/java/core/src/test/org/apache/orc/TestCorruptTypes.java +++ b/java/core/src/test/org/apache/orc/TestCorruptTypes.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.orc; import org.junit.Test;