Skip to content
Permalink
Browse files
feat: expose hivepartitioningoptions (#233)
* feat: expose hivepartitioningoptions

* feat: add javadocs
  • Loading branch information
Praful Makani committed Mar 19, 2020
1 parent 7e568dc commit 6213ea98c7b58634c11ffd13e925482a3627c5dc
@@ -0,0 +1,130 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import com.google.common.base.MoreObjects;
import java.util.Objects;

/** HivePartitioningOptions currently supported types include: AVRO, CSV, JSON, ORC and Parquet. */
public final class HivePartitioningOptions {

private final String mode;
private final String sourceUriPrefix;

public static final class Builder {

private String mode;
private String sourceUriPrefix;

private Builder() {}

private Builder(HivePartitioningOptions options) {
this.mode = options.mode;
this.sourceUriPrefix = options.sourceUriPrefix;
}

/**
* [Optional] When set, what mode of hive partitioning to use when reading data. Two modes are
* supported. (1) AUTO: automatically infer partition key name(s) and type(s). (2) STRINGS:
* automatically infer partition key name(s). All types are interpreted as strings. Not all
* storage formats support hive partitioning. Requesting hive partitioning on an unsupported
* format will lead to an error. Currently supported types include: AVRO, CSV, JSON, ORC and
* Parquet.
*/
public Builder setMode(String mode) {
this.mode = mode;
return this;
}

/**
* [Optional] When hive partition detection is requested, a common prefix for all source uris
* should be supplied. The prefix must end immediately before the partition key encoding begins.
* For example, consider files following this data layout.
* gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro
* gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro When hive partitioning is
* requested with either AUTO or STRINGS detection, the common prefix can be either of
* gs://bucket/path_to_table or gs://bucket/path_to_table/ (trailing slash does not matter).
*/
public Builder setSourceUriPrefix(String sourceUriPrefix) {
this.sourceUriPrefix = sourceUriPrefix;
return this;
}

/** Creates a {@link HivePartitioningOptions} object. */
public HivePartitioningOptions build() {
return new HivePartitioningOptions(this);
}
}

private HivePartitioningOptions(Builder builder) {
this.mode = builder.mode;
this.sourceUriPrefix = builder.sourceUriPrefix;
}

/* Returns the mode of hive partitioning */
public String getMode() {
return mode;
}

/* Returns the sourceUriPrefix of hive partitioning */
public String getSourceUriPrefix() {
return sourceUriPrefix;
}

/** Returns a builder for the {@link HivePartitioningOptions} object. */
public Builder toBuilder() {
return new Builder(this);
}

/** Returns a builder for the {@link HivePartitioningOptions} object. */
public static Builder newBuilder() {
return new Builder();
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("mode", mode)
.add("sourceUriPrefix", sourceUriPrefix)
.toString();
}

@Override
public int hashCode() {
return Objects.hash(mode, sourceUriPrefix);
}

com.google.api.services.bigquery.model.HivePartitioningOptions toPb() {
com.google.api.services.bigquery.model.HivePartitioningOptions options =
new com.google.api.services.bigquery.model.HivePartitioningOptions();
options.setMode(mode);
options.setSourceUriPrefix(sourceUriPrefix);
return options;
}

static HivePartitioningOptions fromPb(
com.google.api.services.bigquery.model.HivePartitioningOptions options) {
Builder builder = newBuilder();
if (options.getMode() != null) {
builder.setMode(options.getMode());
}
if (options.getSourceUriPrefix() != null) {
builder.setSourceUriPrefix(options.getSourceUriPrefix());
}
return builder.build();
}
}
@@ -54,6 +54,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load
private final Map<String, String> labels;
private final Long jobTimeoutMs;
private final RangePartitioning rangePartitioning;
private final HivePartitioningOptions hivePartitioningOptions;

public static final class Builder extends JobConfiguration.Builder<LoadJobConfiguration, Builder>
implements LoadConfiguration.Builder {
@@ -77,6 +78,7 @@ public static final class Builder extends JobConfiguration.Builder<LoadJobConfig
private Map<String, String> labels;
private Long jobTimeoutMs;
private RangePartitioning rangePartitioning;
private HivePartitioningOptions hivePartitioningOptions;

private Builder() {
super(Type.LOAD);
@@ -103,6 +105,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
this.labels = loadConfiguration.labels;
this.jobTimeoutMs = loadConfiguration.jobTimeoutMs;
this.rangePartitioning = loadConfiguration.rangePartitioning;
this.hivePartitioningOptions = loadConfiguration.hivePartitioningOptions;
}

private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
@@ -186,6 +189,10 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
this.rangePartitioning =
RangePartitioning.fromPb(loadConfigurationPb.getRangePartitioning());
}
if (loadConfigurationPb.getHivePartitioningOptions() != null) {
this.hivePartitioningOptions =
HivePartitioningOptions.fromPb(loadConfigurationPb.getHivePartitioningOptions());
}
}

@Override
@@ -319,6 +326,11 @@ public Builder setRangePartitioning(RangePartitioning rangePartitioning) {
return this;
}

public Builder setHivePartitioningOptions(HivePartitioningOptions hivePartitioningOptions) {
this.hivePartitioningOptions = hivePartitioningOptions;
return this;
}

@Override
public LoadJobConfiguration build() {
return new LoadJobConfiguration(this);
@@ -345,6 +357,7 @@ private LoadJobConfiguration(Builder builder) {
this.labels = builder.labels;
this.jobTimeoutMs = builder.jobTimeoutMs;
this.rangePartitioning = builder.rangePartitioning;
this.hivePartitioningOptions = builder.hivePartitioningOptions;
}

@Override
@@ -452,6 +465,10 @@ public RangePartitioning getRangePartitioning() {
return rangePartitioning;
}

public HivePartitioningOptions getHivePartitioningOptions() {
return hivePartitioningOptions;
}

@Override
public Builder toBuilder() {
return new Builder(this);
@@ -477,7 +494,8 @@ ToStringHelper toStringHelper() {
.add("useAvroLogicalTypes", useAvroLogicalTypes)
.add("labels", labels)
.add("jobTimeoutMs", jobTimeoutMs)
.add("rangePartitioning", rangePartitioning);
.add("rangePartitioning", rangePartitioning)
.add("hivePartitioningOptions", hivePartitioningOptions);
}

@Override
@@ -570,6 +588,9 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
if (rangePartitioning != null) {
loadConfigurationPb.setRangePartitioning(rangePartitioning.toPb());
}
if (hivePartitioningOptions != null) {
loadConfigurationPb.setHivePartitioningOptions(hivePartitioningOptions.toPb());
}
jobConfiguration.setLoad(loadConfigurationPb);
return jobConfiguration;
}
@@ -0,0 +1,69 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.bigquery;

import static com.google.common.truth.Truth.assertThat;

import org.junit.Test;

public class HivePartitioningOptionsTest {

private static final String MODE = "STRING";
private static final String SOURCE_URI_PREFIX = "gs://bucket/path_to_table";
private static final HivePartitioningOptions HIVE_PARTITIONING_OPTIONS =
HivePartitioningOptions.newBuilder()
.setMode(MODE)
.setSourceUriPrefix(SOURCE_URI_PREFIX)
.build();

@Test
public void testToBuilder() {
compareHivePartitioningOptions(
HIVE_PARTITIONING_OPTIONS, HIVE_PARTITIONING_OPTIONS.toBuilder().build());
HivePartitioningOptions options = HIVE_PARTITIONING_OPTIONS.toBuilder().setMode("AUTO").build();
assertThat(options.getMode()).isEqualTo("AUTO");
options = HIVE_PARTITIONING_OPTIONS.toBuilder().setMode(MODE).build();
compareHivePartitioningOptions(HIVE_PARTITIONING_OPTIONS, options);
}

@Test
public void testToBuilderIncomplete() {
HivePartitioningOptions options = HivePartitioningOptions.newBuilder().build();
compareHivePartitioningOptions(options, options.toBuilder().build());
}

@Test
public void testBuilder() {
assertThat(HIVE_PARTITIONING_OPTIONS.getMode()).isEqualTo(MODE);
assertThat(HIVE_PARTITIONING_OPTIONS.getSourceUriPrefix()).isEqualTo(SOURCE_URI_PREFIX);
}

@Test
public void testToAndFromPb() {
compareHivePartitioningOptions(
HIVE_PARTITIONING_OPTIONS,
HivePartitioningOptions.fromPb(HIVE_PARTITIONING_OPTIONS.toPb()));
}

private void compareHivePartitioningOptions(
HivePartitioningOptions expected, HivePartitioningOptions value) {
assertThat(value.getMode()).isEqualTo(expected.getMode());
assertThat(value.getSourceUriPrefix()).isEqualTo(expected.getSourceUriPrefix());
assertThat(value.toString()).isEqualTo(expected.toString());
assertThat(value.hashCode()).isEqualTo(expected.hashCode());
}
}

0 comments on commit 6213ea9

Please sign in to comment.