Skip to content

Commit

Permalink
PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for Sc…
Browse files Browse the repository at this point in the history
…hemaConverter

When I converted parquet(1.9.1-SNAPSHOT) schema to arrow(0.4.0) with SchemaConverter, this exception raised.
```
java.lang.NoClassDefFoundError: org/apache/arrow/vector/types/pojo/ArrowType$Struct_

	at net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:67)
	at net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:40)
	at net.wrap_trap.parquet_arrow.ParquetToArrowConverterTest.parquetToArrowConverterTest(ParquetToArrowConverterTest.java:27)
```

This reason is that SchemaConverter refer to Apache Arrow 0.1.0.
I upgrade the Apache Arrow version to 0.8.0(latest) for SchemaConverter.

Author: Masayuki Takahashi <masayuki038@gmail.com>

Closes #443 from masayuki038/PARQUET-1128 and squashes the following commits:

8ba4781 [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
b80d793 [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
  • Loading branch information
masayuki038 authored and xhochy committed Apr 21, 2018
1 parent f937112 commit af977ad
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 274 deletions.
2 changes: 1 addition & 1 deletion parquet-arrow/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<url>https://parquet.apache.org</url>

<properties>
<arrow.version>0.1.0</arrow.version>
<arrow.version>0.8.0</arrow.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
import java.util.ArrayList;
import java.util.List;

import org.apache.arrow.flatbuf.Precision;
import org.apache.arrow.flatbuf.TimeUnit;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
Expand All @@ -59,7 +59,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
Expand Down Expand Up @@ -141,13 +141,22 @@ public TypeMapping visit(Null type) {
}

@Override
public TypeMapping visit(Struct_ type) {
public TypeMapping visit(Struct type) {
List<TypeMapping> parquetTypes = fromArrow(children);
return new StructTypeMapping(field, addToBuilder(parquetTypes, Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return createListTypeMapping();
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
return createListTypeMapping();
}

private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly one child: " + field);
}
Expand All @@ -167,31 +176,31 @@ public TypeMapping visit(Union type) {
public TypeMapping visit(Int type) {
boolean signed = type.getIsSigned();
switch (type.getBitWidth()) {
case 8:
return primitive(INT32, signed ? INT_8 : UINT_8);
case 16:
return primitive(INT32, signed ? INT_16 : UINT_16);
case 32:
return primitive(INT32, signed ? INT_32 : UINT_32);
case 64:
return primitive(INT64, signed ? INT_64 : UINT_64);
default:
throw new IllegalArgumentException("Illegal int type: " + field);
case 8:
return primitive(INT32, signed ? INT_8 : UINT_8);
case 16:
return primitive(INT32, signed ? INT_16 : UINT_16);
case 32:
return primitive(INT32, signed ? INT_32 : UINT_32);
case 64:
return primitive(INT64, signed ? INT_64 : UINT_64);
default:
throw new IllegalArgumentException("Illegal int type: " + field);
}
}

@Override
public TypeMapping visit(FloatingPoint type) {
switch (type.getPrecision()) {
case Precision.HALF:
// TODO(PARQUET-757): original type HalfFloat
return primitive(FLOAT);
case Precision.SINGLE:
return primitive(FLOAT);
case Precision.DOUBLE:
return primitive(DOUBLE);
default:
throw new IllegalArgumentException("Illegal float type: " + field);
case HALF:
// TODO(PARQUET-757): original type HalfFloat
return primitive(FLOAT);
case SINGLE:
return primitive(FLOAT);
case DOUBLE:
return primitive(DOUBLE);
default:
throw new IllegalArgumentException("Illegal float type: " + field);
}
}

Expand Down Expand Up @@ -336,7 +345,7 @@ private TypeMapping fromParquetGroup(GroupType type, String name) {
OriginalType ot = type.getOriginalType();
if (ot == null) {
List<TypeMapping> typeMappings = fromParquet(type.getFields());
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct_(), fields(typeMappings));
Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
return new StructTypeMapping(arrowField, type, typeMappings);
} else {
switch (ot) {
Expand Down Expand Up @@ -366,12 +375,12 @@ private TypeMapping field(ArrowType arrowType) {

@Override
public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
return field(new ArrowType.FloatingPoint(Precision.SINGLE));
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
}

@Override
public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
return field(new ArrowType.FloatingPoint(Precision.DOUBLE));
return field(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
}

@Override
Expand All @@ -381,41 +390,41 @@ public TypeMapping convertINT32(PrimitiveTypeName primitiveTypeName) throws Runt
return integer(32, true);
}
switch (ot) {
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date());
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
case TIME_MILLIS:
return field(new ArrowType.Time());
default:
case TIME_MICROS:
case INT_64:
case UINT_64:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date(DateUnit.DAY));
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
case TIME_MILLIS:
return field(new ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32));
default:
case TIME_MICROS:
case INT_64:
case UINT_64:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand All @@ -426,43 +435,42 @@ public TypeMapping convertINT64(PrimitiveTypeName primitiveTypeName) throws Runt
return integer(64, true);
}
switch (ot) {
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case INT_64:
return integer(64, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case UINT_64:
return integer(64, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date());
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
case TIME_MILLIS:
return field(new ArrowType.Time());
default:
case TIME_MICROS:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
throw new IllegalArgumentException("illegal type " + type);
case INT_8:
return integer(8, true);
case INT_16:
return integer(16, true);
case INT_32:
return integer(32, true);
case INT_64:
return integer(64, true);
case UINT_8:
return integer(8, false);
case UINT_16:
return integer(16, false);
case UINT_32:
return integer(32, false);
case UINT_64:
return integer(64, false);
case DECIMAL:
return decimal(type.getDecimalMetadata());
case DATE:
return field(new ArrowType.Date(DateUnit.DAY));
case TIMESTAMP_MICROS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
case TIMESTAMP_MILLIS:
return field(new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
default:
case TIME_MICROS:
case UTF8:
case ENUM:
case BSON:
case INTERVAL:
case JSON:
case LIST:
case MAP:
case MAP_KEY_VALUE:
case TIME_MILLIS:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand All @@ -489,12 +497,12 @@ public TypeMapping convertBINARY(PrimitiveTypeName primitiveTypeName) throws Run
return field(new ArrowType.Binary());
}
switch (ot) {
case UTF8:
return field(new ArrowType.Utf8());
case DECIMAL:
return decimal(type.getDecimalMetadata());
default:
throw new IllegalArgumentException("illegal type " + type);
case UTF8:
return field(new ArrowType.Utf8());
case DECIMAL:
return decimal(type.getDecimalMetadata());
default:
throw new IllegalArgumentException("illegal type " + type);
}
}

Expand Down Expand Up @@ -545,7 +553,7 @@ public TypeMapping visit(Null type) {
}

@Override
public TypeMapping visit(Struct_ type) {
public TypeMapping visit(Struct type) {
if (parquetField.isPrimitive()) {
throw new IllegalArgumentException("Parquet type not a group: " + parquetField);
}
Expand All @@ -555,6 +563,15 @@ public TypeMapping visit(Struct_ type) {

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return createListTypeMapping(type);
}

@Override
public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
return createListTypeMapping(type);
}

private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
if (arrowField.getChildren().size() != 1) {
throw new IllegalArgumentException("Invalid list type: " + type);
}
Expand Down

0 comments on commit af977ad

Please sign in to comment.