Skip to content

Commit

Permalink
docs(samples): jsonstreamwriter samples (#756)
Browse files Browse the repository at this point in the history
* docs(samples): jsonstreamwriter samples

* Add copyright notice

* Remove allowUnknownFields parameter

* Added retry with exponential backoff

* Revert "Added retry with exponential backoff"

Remove the backoff logic to keep the sample code simpler.

* Addressed PR review feedback.

- Simplify code, remove duplicate-record example.
- Split all snippets and tests into separate classes.
- Add comments and javadocs links.
- Clean up imports.
- Add region tags.
- Catch only specific exceptions.
- Run linter and fmt-maven-plugin.

* docs(samples): Fix mismatched region tag

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteCommittedStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WritePendingStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WritePendingStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WritePendingStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteToDefaultStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteToDefaultStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteToDefaultStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteToDefaultStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WritePendingStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteCommittedStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteCommittedStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WriteCommittedStream.java

* Update samples/snippets/src/main/java/com/example/bigquerystorage/WritePendingStream.java

* docs(samples): Create test resouces

Create temporary dataset and table for sample integration tests

Co-authored-by: Stephanie Wang <stephaniewang526@users.noreply.github.com>
  • Loading branch information
VeronicaWasson and stephaniewang526 committed Jan 13, 2021
1 parent 1c2b5c1 commit 929b2ce
Show file tree
Hide file tree
Showing 6 changed files with 558 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_committed]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BigQueryWriteClient;
import com.google.cloud.bigquery.storage.v1beta2.CreateWriteStreamRequest;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.cloud.bigquery.storage.v1beta2.WriteStream;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WriteCommittedStream {

public static void runWriteCommittedStream()
throws DescriptorValidationException, InterruptedException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";

writeCommittedStream(projectId, datasetName, tableName);
}

public static void writeCommittedStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {

try (BigQueryWriteClient client = BigQueryWriteClient.create()) {
// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.COMMITTED).build();
TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest =
CreateWriteStreamRequest.newBuilder()
.setParent(parentTable.toString())
.setWriteStream(stream)
.build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema(), client)
.build()) {
// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {
// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

// To detect duplicate records, pass the index as the record offset.
// To disable deduplication, omit the offset or use WriteStream.Type.DEFAULT.
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr, i);
AppendRowsResponse response = future.get();
}
}
System.out.println("Appended records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println("Failed to append records. \n" + e.toString());
}
}
}
// [END bigquerystorage_jsonstreamwriter_committed]
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_pending]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BatchCommitWriteStreamsRequest;
import com.google.cloud.bigquery.storage.v1beta2.BatchCommitWriteStreamsResponse;
import com.google.cloud.bigquery.storage.v1beta2.BigQueryWriteClient;
import com.google.cloud.bigquery.storage.v1beta2.CreateWriteStreamRequest;
import com.google.cloud.bigquery.storage.v1beta2.FinalizeWriteStreamResponse;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.cloud.bigquery.storage.v1beta2.WriteStream;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WritePendingStream {

public static void runWritePendingStream()
throws DescriptorValidationException, InterruptedException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";

writePendingStream(projectId, datasetName, tableName);
}

public static void writePendingStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {
try (BigQueryWriteClient client = BigQueryWriteClient.create()) {
// Initialize a write stream for the specified table.
// For more information on WriteStream.Type, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/WriteStream.Type.html
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.PENDING).build();
TableName parentTable = TableName.of(projectId, datasetName, tableName);
CreateWriteStreamRequest createWriteStreamRequest =
CreateWriteStreamRequest.newBuilder()
.setParent(parentTable.toString())
.setWriteStream(stream)
.build();
WriteStream writeStream = client.createWriteStream(createWriteStreamRequest);

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JsonStreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(writeStream.getName(), writeStream.getTableSchema(), client)
.build()) {
// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {
// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("batch-record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
FinalizeWriteStreamResponse finalizeResponse =
client.finalizeWriteStream(writeStream.getName());
System.out.println("Rows written: " + finalizeResponse.getRowCount());
}

// Commit the streams.
BatchCommitWriteStreamsRequest commitRequest =
BatchCommitWriteStreamsRequest.newBuilder()
.setParent(parentTable.toString())
.addWriteStreams(writeStream.getName())
.build();
BatchCommitWriteStreamsResponse commitResponse =
client.batchCommitWriteStreams(commitRequest);
System.out.println("Appended and committed records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println(e);
}
}
}
// [END bigquerystorage_jsonstreamwriter_pending]
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

// [START bigquerystorage_jsonstreamwriter_default]
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.storage.v1beta2.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1beta2.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1beta2.TableName;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class WriteToDefaultStream {

public static void runWriteToDefaultStream()
throws DescriptorValidationException, InterruptedException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";

writeToDefaultStream(projectId, datasetName, tableName);
}

public static void writeToDefaultStream(String projectId, String datasetName, String tableName)
throws DescriptorValidationException, InterruptedException, IOException {
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Table table = bigquery.getTable(datasetName, tableName);
TableName parentTable = TableName.of(projectId, datasetName, tableName);
Schema schema = table.getDefinition().getSchema();

// Use the JSON stream writer to send records in JSON format.
// For more information about JsonStreamWriter, see:
// https://googleapis.dev/java/google-cloud-bigquerystorage/latest/com/google/cloud/bigquery/storage/v1beta2/JstreamWriter.html
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(parentTable.toString(), schema).createDefaultStream().build()) {
// Append 10 JSON objects to the stream.
for (int i = 0; i < 10; i++) {
// Create a JSON object that is compatible with the table schema.
JSONObject record = new JSONObject();
record.put("col1", String.format("record %03d", i));
JSONArray jsonArr = new JSONArray();
jsonArr.put(record);

ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
System.out.println("Appended records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println("Failed to append records. \n" + e.toString());
}
}
}
// [END bigquerystorage_jsonstreamwriter_default]
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.DatasetId;
import com.google.cloud.bigquery.DatasetInfo;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.StandardTableDefinition;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.UUID;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public class WriteCommittedStreamIT {

private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT");

private ByteArrayOutputStream bout;
private PrintStream out;
private BigQuery bigquery;
private String datasetName;
private String tableName;

private static void requireEnvVar(String varName) {
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_CLOUD_PROJECT");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);

bigquery = BigQueryOptions.getDefaultInstance().getService();

// Create a new dataset and table for each test.
datasetName = "WRITE_STREAM_TEST" + UUID.randomUUID().toString().substring(0, 8);
tableName = "COMMITTED_STREAM_TEST" + UUID.randomUUID().toString().substring(0, 8);
Schema schema = Schema.of(Field.of("col1", StandardSQLTypeName.STRING));
bigquery.create(DatasetInfo.newBuilder(datasetName).build());
TableInfo tableInfo =
TableInfo.newBuilder(TableId.of(datasetName, tableName), StandardTableDefinition.of(schema))
.build();
bigquery.create(tableInfo);
}

@After
public void tearDown() {
bigquery.delete(
DatasetId.of(GOOGLE_CLOUD_PROJECT, datasetName), DatasetDeleteOption.deleteContents());
System.setOut(null);
}

@Test
public void testWriteCommittedStream() throws Exception {
WriteCommittedStream.writeCommittedStream(GOOGLE_CLOUD_PROJECT, datasetName, tableName);
assertThat(bout.toString()).contains("Appended records successfully.");
}
}
Loading

0 comments on commit 929b2ce

Please sign in to comment.