Skip to content
Permalink
Browse files
feat: add support for AvroOptions (#1630)
New AvroOptions class and wired into ExternalTableDefinition
  • Loading branch information
stephaniewang526 committed Oct 1, 2021
1 parent 6bdbc2d commit 10c1961f53ab6ba1b71ead9c51a369bf14389c49
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- see http://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html -->
<differences>
<difference>
<differenceType>7006</differenceType>
<className>com/google/cloud/bigquery/FormatOptions</className>
<method>com.google.cloud.bigquery.FormatOptions avro()</method>
<to>com.google.cloud.bigquery.AvroOptions</to>
</difference>
</differences>
@@ -0,0 +1,112 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import com.google.common.base.MoreObjects;
import java.util.Objects;

/**
* Google BigQuery options for AVRO format. This class wraps some properties of AVRO files used by
* BigQuery to parse external data.
*/
public final class AvroOptions extends FormatOptions {

private static final long serialVersionUID = 2293570529308612712L;

private final Boolean useAvroLogicalTypes;

public static final class Builder {

private Boolean useAvroLogicalTypes;

private Builder() {}

private Builder(AvroOptions avroOptions) {
this.useAvroLogicalTypes = avroOptions.useAvroLogicalTypes;
}

/**
* [Optional] Sets whether BigQuery should interpret logical types as the corresponding BigQuery
* data type (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER).
*/
public Builder setUseAvroLogicalTypes(boolean useAvroLogicalTypes) {
this.useAvroLogicalTypes = useAvroLogicalTypes;
return this;
}

/** Creates a {@code AvroOptions} object. */
public AvroOptions build() {
return new AvroOptions(this);
}
}

private AvroOptions(Builder builder) {
super(FormatOptions.AVRO);
this.useAvroLogicalTypes = builder.useAvroLogicalTypes;
}

/**
* Returns whether BigQuery should interpret logical types as the corresponding BigQuery data type
* (for example, TIMESTAMP), instead of using the raw type (for example, INTEGER).
*/
public Boolean useAvroLogicalTypes() {
return useAvroLogicalTypes;
}

public Builder toBuilder() {
return new Builder(this);
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("type", getType())
.add("useAvroLogicalTypes", useAvroLogicalTypes)
.toString();
}

@Override
public int hashCode() {
return Objects.hash(getType(), useAvroLogicalTypes);
}

@Override
public boolean equals(Object obj) {
return obj == this
|| obj instanceof AvroOptions && Objects.equals(toPb(), ((AvroOptions) obj).toPb());
}

com.google.api.services.bigquery.model.AvroOptions toPb() {
com.google.api.services.bigquery.model.AvroOptions avroOptions =
new com.google.api.services.bigquery.model.AvroOptions();
avroOptions.setUseAvroLogicalTypes(useAvroLogicalTypes);
return avroOptions;
}

/** Returns a builder for a AvroOptions object. */
public static AvroOptions.Builder newBuilder() {
return new AvroOptions.Builder();
}

static AvroOptions fromPb(com.google.api.services.bigquery.model.AvroOptions avroOptions) {
Builder builder = newBuilder();
if (avroOptions.getUseAvroLogicalTypes() != null) {
builder.setUseAvroLogicalTypes(avroOptions.getUseAvroLogicalTypes());
}
return builder.build();
}
}
@@ -300,6 +300,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
if (getDecimalTargetTypes() != null) {
externalConfigurationPb.setDecimalTargetTypes(getDecimalTargetTypes());
}
if (getFormatOptions() != null && FormatOptions.AVRO.equals(getFormatOptions().getType())) {
externalConfigurationPb.setAvroOptions(((AvroOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null && FormatOptions.CSV.equals(getFormatOptions().getType())) {
externalConfigurationPb.setCsvOptions(((CsvOptions) getFormatOptions()).toPb());
}
@@ -459,6 +462,9 @@ static ExternalTableDefinition fromPb(Table tablePb) {
builder.setConnectionId(externalDataConfiguration.getConnectionId());
}
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
@@ -508,6 +514,9 @@ static ExternalTableDefinition fromExternalDataConfiguration(
if (externalDataConfiguration.getIgnoreUnknownValues() != null) {
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
}
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
@@ -91,8 +91,8 @@ public static FormatOptions datastoreBackup() {
}

/** Default options for AVRO format. */
public static FormatOptions avro() {
return new FormatOptions(AVRO);
public static AvroOptions avro() {
return AvroOptions.newBuilder().build();
}

/** Default options for BIGTABLE format. */
@@ -120,6 +120,8 @@ public static FormatOptions of(String format) {
checkArgument(!isNullOrEmpty(format), "Provided format is null or empty");
if (format.equals(CSV)) {
return csv();
} else if (format.equals(AVRO)) {
return avro();
} else if (format.equals(DATASTORE_BACKUP)) {
return datastoreBackup();
} else if (format.equals(GOOGLE_SHEETS)) {
@@ -0,0 +1,56 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

public class AvroOptionsTest {

private static final Boolean USE_AVRO_LOGICAL_TYPES = true;
private static final AvroOptions AVRO_OPTIONS =
AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build();

@Test
public void testToBuilder() {
compareAvroOptions(AVRO_OPTIONS, AVRO_OPTIONS.toBuilder().build());
AvroOptions avroOptions = AVRO_OPTIONS.toBuilder().setUseAvroLogicalTypes(false).build();
assertEquals(false, avroOptions.useAvroLogicalTypes());
avroOptions = avroOptions.toBuilder().setUseAvroLogicalTypes(true).build();
compareAvroOptions(AVRO_OPTIONS, avroOptions);
}

@Test
public void testBuilder() {
assertEquals(FormatOptions.AVRO, AVRO_OPTIONS.getType());
assertEquals(USE_AVRO_LOGICAL_TYPES, AVRO_OPTIONS.useAvroLogicalTypes());
}

@Test
public void testToAndFromPb() {
compareAvroOptions(AVRO_OPTIONS, AvroOptions.fromPb(AVRO_OPTIONS.toPb()));
AvroOptions avroOptions =
AvroOptions.newBuilder().setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES).build();
compareAvroOptions(avroOptions, AvroOptions.fromPb(avroOptions.toPb()));
}

private void compareAvroOptions(AvroOptions expected, AvroOptions value) {
assertEquals(expected, value);
assertEquals(expected.useAvroLogicalTypes(), value.useAvroLogicalTypes());
}
}
@@ -50,6 +50,7 @@ public class ExternalTableDefinitionTest {
private static final String COMPRESSION = "GZIP";
private static final String CONNECTION_ID = "123456789";
private static final Boolean AUTODETECT = true;
private static final AvroOptions AVRO_OPTIONS = AvroOptions.newBuilder().build();
private static final CsvOptions CSV_OPTIONS = CsvOptions.newBuilder().build();
private static final HivePartitioningOptions HIVE_PARTITIONING_OPTIONS =
HivePartitioningOptions.newBuilder()
@@ -67,6 +68,9 @@ public class ExternalTableDefinitionTest {
.setHivePartitioningOptions(HIVE_PARTITIONING_OPTIONS)
.build();

private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION_AVRO =
ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, AVRO_OPTIONS).build();

@Test
public void testToBuilder() {
compareExternalTableDefinition(
@@ -109,6 +113,7 @@ public void testBuilder() {
assertEquals(TableDefinition.Type.EXTERNAL, EXTERNAL_TABLE_DEFINITION.getType());
assertEquals(COMPRESSION, EXTERNAL_TABLE_DEFINITION.getCompression());
assertEquals(CONNECTION_ID, EXTERNAL_TABLE_DEFINITION.getConnectionId());
assertEquals(AVRO_OPTIONS, EXTERNAL_TABLE_DEFINITION_AVRO.getFormatOptions());
assertEquals(CSV_OPTIONS, EXTERNAL_TABLE_DEFINITION.getFormatOptions());
assertEquals(IGNORE_UNKNOWN_VALUES, EXTERNAL_TABLE_DEFINITION.ignoreUnknownValues());
assertEquals(MAX_BAD_RECORDS, EXTERNAL_TABLE_DEFINITION.getMaxBadRecords());
@@ -56,7 +56,7 @@ public class LoadJobConfigurationTest {
ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION);
private static final Schema TABLE_SCHEMA = Schema.of(FIELD_SCHEMA);
private static final Boolean AUTODETECT = true;
private static final Boolean USERAVROLOGICALTYPES = true;
private static final Boolean USE_AVRO_LOGICAL_TYPES = true;
private static final EncryptionConfiguration JOB_ENCRYPTION_CONFIGURATION =
EncryptionConfiguration.newBuilder().setKmsKeyName("KMS_KEY_1").build();
private static final TimePartitioning TIME_PARTITIONING = TimePartitioning.of(Type.DAY);
@@ -128,7 +128,7 @@ public class LoadJobConfigurationTest {
.setDestinationEncryptionConfiguration(JOB_ENCRYPTION_CONFIGURATION)
.setTimePartitioning(TIME_PARTITIONING)
.setClustering(CLUSTERING)
.setUseAvroLogicalTypes(USERAVROLOGICALTYPES)
.setUseAvroLogicalTypes(USE_AVRO_LOGICAL_TYPES)
.setLabels(LABELS)
.setJobTimeoutMs(TIMEOUT)
.setRangePartitioning(RANGE_PARTITIONING)

0 comments on commit 10c1961

Please sign in to comment.