Skip to content

Commit

Permalink
PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360)
Browse files Browse the repository at this point in the history
Co-authored-by: Michel Davit <michel@davit.fr>
  • Loading branch information
wgtmac and RustedBones committed May 31, 2024
1 parent 78a36be commit 9c8fde0
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 13 deletions.
10 changes: 10 additions & 0 deletions parquet-hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,21 @@
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>${jackson.groupId}</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>${jackson.groupId}</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson-databind.version}</version>
</dependency>
<dependency>
<groupId>${jackson.datatype.groupId}</groupId>
<artifactId>jackson-datatype-jdk8</artifactId>
<version>${jackson-modules-java8.version}</version>
</dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.apache.parquet.column.Encoding.RLE_DICTIONARY;
import static org.apache.parquet.format.Util.readColumnMetaData;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Set;
Expand Down Expand Up @@ -338,6 +339,7 @@ public ColumnPath getPath() {
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
*/
@Deprecated
@JsonIgnore
public PrimitiveTypeName getType() {
decryptIfNeeded();
return properties.getType();
Expand Down Expand Up @@ -380,13 +382,15 @@ public PrimitiveType getPrimitiveType() {
/**
* @return the stats for this column
*/
@JsonIgnore
public abstract Statistics getStatistics();

/**
* Method should be considered private
*
* @return the size stats for this column
*/
@JsonIgnore
public SizeStatistics getSizeStatistics() {
throw new UnsupportedOperationException("SizeStatistics is not implemented");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet.hadoop.metadata;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Arrays;
import java.util.Set;
import org.apache.parquet.column.Encoding;
Expand Down Expand Up @@ -76,6 +77,7 @@ public ColumnPath getPath() {
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
*/
@Deprecated
@JsonIgnore
public PrimitiveTypeName getType() {
return type.getPrimitiveTypeName();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import static java.util.Collections.unmodifiableMap;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.io.Serializable;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -109,6 +110,7 @@ public String getCreatedBy() {
return createdBy;
}

@JsonIgnore
public InternalFileDecryptor getFileDecryptor() {
return fileDecryptor;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
package org.apache.parquet.hadoop.metadata;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
Expand All @@ -32,6 +35,14 @@ public class ParquetMetadata {

private static final ObjectMapper objectMapper = new ObjectMapper();

static {
// Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature parquet-casdacing tests fail,
// because LogicalTypeAnnotation implementations are classes without any property.
objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
// Add support for Java 8 Optional
objectMapper.registerModule(new Jdk8Module());
}

/**
* @param parquetMetaData an instance of parquet metadata to convert
* @return the json representation
Expand All @@ -50,19 +61,23 @@ public static String toPrettyJSON(ParquetMetadata parquetMetaData) {

private static String toJSON(ParquetMetadata parquetMetaData, boolean isPrettyPrint) {
try (StringWriter stringWriter = new StringWriter()) {
Object objectToPrint;
if (parquetMetaData.getFileMetaData() == null
|| parquetMetaData.getFileMetaData().getEncryptionType()
== FileMetaData.EncryptionType.UNENCRYPTED) {
objectToPrint = parquetMetaData;
} else {
objectToPrint = parquetMetaData.getFileMetaData();
}

ObjectWriter writer;
if (isPrettyPrint) {
Object objectToPrint;
if (parquetMetaData.getFileMetaData() == null
|| parquetMetaData.getFileMetaData().getEncryptionType()
== FileMetaData.EncryptionType.UNENCRYPTED) {
objectToPrint = parquetMetaData;
} else {
objectToPrint = parquetMetaData.getFileMetaData();
}
objectMapper.writerWithDefaultPrettyPrinter().writeValue(stringWriter, objectToPrint);
writer = objectMapper.writerWithDefaultPrettyPrinter();
} else {
objectMapper.writeValue(stringWriter, parquetMetaData);
writer = objectMapper.writer();
}

writer.writeValue(stringWriter, objectToPrint);
return stringWriter.toString();
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@
import org.apache.parquet.column.statistics.LongStatistics;
import org.apache.parquet.column.statistics.SizeStatistics;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.crypto.DecryptionPropertiesFactory;
import org.apache.parquet.crypto.EncryptionPropertiesFactory;
import org.apache.parquet.crypto.FileDecryptionProperties;
import org.apache.parquet.crypto.InternalFileDecryptor;
import org.apache.parquet.example.Paper;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroup;
Expand Down Expand Up @@ -635,18 +639,49 @@ public void randomTestFilterMetaData() {
}

@Test
public void testNullFieldMetadataDebugLogging() {
public void testFieldMetadataDebugLogging() {
MessageType schema = parseMessageType("message test { optional binary some_null_field; }");
org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap<String, String>(), null);
List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>();
new org.apache.parquet.hadoop.metadata.FileMetaData(
schema,
new HashMap<>(),
null,
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.UNENCRYPTED,
null);
List<BlockMetaData> blockMetaDataList = new ArrayList<>();
BlockMetaData blockMetaData = new BlockMetaData();
blockMetaData.addColumn(createColumnChunkMetaData());
blockMetaDataList.add(blockMetaData);
ParquetMetadata metadata = new ParquetMetadata(fileMetaData, blockMetaDataList);
ParquetMetadata.toJSON(metadata);
}

@Test
public void testEncryptedFieldMetadataDebugLogging() {
Configuration conf = new Configuration();
conf.set(
EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
"org.apache.parquet.crypto.SampleDecryptionPropertiesFactory");
DecryptionPropertiesFactory decryptionPropertiesFactory = DecryptionPropertiesFactory.loadFactory(conf);
FileDecryptionProperties decryptionProperties =
decryptionPropertiesFactory.getFileDecryptionProperties(conf, null);

MessageType schema = parseMessageType("message test { optional binary some_null_field; }");

org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
new org.apache.parquet.hadoop.metadata.FileMetaData(
schema,
new HashMap<>(),
null,
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.ENCRYPTED_FOOTER,
new InternalFileDecryptor(decryptionProperties));

List<BlockMetaData> blockMetaDataList = new ArrayList<>();
ParquetMetadata metadata = new ParquetMetadata(fileMetaData, blockMetaDataList);
ParquetMetadata.toJSON(metadata);
System.out.println(ParquetMetadata.toPrettyJSON(metadata));
}

@Test
public void testMetadataToJson() {
ParquetMetadata metadata = new ParquetMetadata(null, null);
Expand Down
18 changes: 18 additions & 0 deletions parquet-jackson/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,22 @@
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>${jackson.groupId}</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson-databind.version}</version>
</dependency>
<!-- Add support for Java 8 Optional -->
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jdk8</artifactId>
<version>${jackson-modules-java8.version}</version>
</dependency>
</dependencies>

<properties>
Expand Down Expand Up @@ -70,6 +81,7 @@
<artifactSet>
<includes>
<include>${jackson.groupId}:*</include>
<include>${jackson.datatype.groupId}:*</include>
</includes>
</artifactSet>
<filters>
Expand All @@ -79,6 +91,12 @@
<include>**</include>
</includes>
</filter>
<filter>
<artifact>${jackson.datatype.groupId}:*</artifact>
<includes>
<include>**</include>
</includes>
</filter>
</filters>
<relocations>
<relocation>
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
<jackson.package>com.fasterxml.jackson</jackson.package>
<jackson.version>2.17.0</jackson.version>
<jackson-databind.version>2.17.0</jackson-databind.version>
<jackson-modules-java8.version>2.17.0</jackson-modules-java8.version>
<japicmp.version>0.21.0</japicmp.version>
<javax.annotation.version>1.3.2</javax.annotation.version>
<spotless.version>2.30.0</spotless.version>
Expand Down

0 comments on commit 9c8fde0

Please sign in to comment.