diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java index 88effc50e0..82175a2fa4 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java @@ -331,7 +331,7 @@ public ConvertedField visit(I64Type i64Type, State state) { @Override public ConvertedField visit(StringType stringType, State state) { - return visitPrimitiveType(BINARY, UTF8, state); + return stringType.isBinary() ? visitPrimitiveType(BINARY, state) : visitPrimitiveType(BINARY, UTF8, state); } private static boolean isUnion(StructOrUnionType s) { diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java index b72f605ac3..c3a166a2d4 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java @@ -35,6 +35,7 @@ import org.apache.parquet.thrift.struct.ThriftType.*; import org.apache.parquet.thrift.struct.ThriftType.StructType.StructOrUnionType; import org.apache.parquet.thrift.struct.ThriftTypeID; +import org.apache.thrift.meta_data.FieldMetaData; import java.util.ArrayList; import java.util.Collection; @@ -162,7 +163,12 @@ private static ThriftField toThriftField(String name, Field field, ThriftField.R type = new I64Type(); break; case STRING: - type = new StringType(); + StringType stringType = new StringType(); + FieldMetaData fieldMetaData = field.getFieldMetaData(); + if (fieldMetaData != null && fieldMetaData.valueMetaData.isBinary()) { + stringType.setBinary(true); + } + type = stringType; break; case STRUCT: type = toStructType(field.gettStructDescriptor()); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java index 19c7c9fd4c..4c2d66231e 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java @@ -641,11 +641,21 @@ public void accept(TypeVisitor visitor) { } public static class StringType extends ThriftType { + private boolean binary = false; @JsonCreator public StringType() { super(STRING); } + + public boolean isBinary() { + return binary; + } + + public void setBinary(boolean binary) { + this.binary = binary; + } + @Override public R accept(StateVisitor visitor, S state) { return visitor.visit(this, state); diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java index c00daf90f0..b0d4feb0f2 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java @@ -21,9 +21,17 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.List; import java.util.UUID; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.junit.Assert; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Types; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -33,6 +41,9 @@ import org.apache.parquet.thrift.ThriftParquetWriter; import org.apache.parquet.thrift.test.binary.StringAndBinary; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + public class TestBinary { @Rule public TemporaryFolder tempDir = new TemporaryFolder(); @@ -57,10 +68,22 @@ public void testBinary() throws IOException { build(path) .withThriftClass(StringAndBinary.class) .build(); + + StringAndBinary record = reader.read(); reader.close(); - Assert.assertEquals("Should match after serialization round trip", + assertSchema(ParquetFileReader.readFooter(new Configuration(), path)); + assertEquals("Should match after serialization round trip", expected, record); } + + private void assertSchema(ParquetMetadata parquetMetadata) { + List fields = parquetMetadata.getFileMetaData().getSchema().getFields(); + assertEquals(2, fields.size()); + assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).named("s"), fields.get(0)); + assertEquals(OriginalType.UTF8, fields.get(0).getOriginalType()); + assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).named("b"), fields.get(1)); + assertNull(fields.get(1).getOriginalType()); + } }