Skip to content
This repository was archived by the owner on Feb 24, 2026. It is now read-only.

Commit 973afcc

Browse files
feat: add schema update support in JsonStreamWriter (#1447)
* feat: add schema update support in JsonStreamWriter * fix unit tests * fix lint * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * update concurrency logic and add corresponding testcase * update getUpdatedSchema() in StreamWriter.java to use lock; update concurrent test case * simplify logic * add unit test * update Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent b774f5d commit 973afcc

File tree

4 files changed

+373
-13
lines changed

4 files changed

+373
-13
lines changed

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/JsonStreamWriter.java

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.common.base.Preconditions;
2323
import com.google.protobuf.Descriptors;
2424
import com.google.protobuf.Descriptors.Descriptor;
25+
import com.google.protobuf.Descriptors.DescriptorValidationException;
2526
import com.google.protobuf.Message;
2627
import java.io.IOException;
2728
import java.util.logging.Logger;
@@ -34,7 +35,10 @@
3435
/**
3536
* A StreamWriter that can write JSON data (JSONObjects) to BigQuery tables. The JsonStreamWriter is
3637
* built on top of a StreamWriter, and it simply converts all JSON data to protobuf messages then
37-
* calls StreamWriter's append() method to write to BigQuery tables.
38+
* calls StreamWriter's append() method to write to BigQuery tables. It maintains all StreamWriter
39+
* functions, but also provides an additional feature: schema update support, where if the BigQuery
40+
* table schema is updated, users will be able to ingest data on the new schema after some time (in
41+
* order of minutes).
3842
*/
3943
public class JsonStreamWriter implements AutoCloseable {
4044
private static String streamPatternString =
@@ -81,27 +85,49 @@ private JsonStreamWriter(Builder builder)
8185
/**
8286
* Writes a JSONArray that contains JSONObjects to the BigQuery table by first converting the JSON
8387
* data to protobuf messages, then using StreamWriter's append() to write the data at current end
84-
* of stream.
88+
* of stream. If there is a schema update, the current StreamWriter is closed. A new StreamWriter
89+
* is created with the updated TableSchema.
8590
*
8691
* @param jsonArr The JSON array that contains JSONObjects to be written
8792
* @return ApiFuture<AppendRowsResponse> returns an AppendRowsResponse message wrapped in an
8893
* ApiFuture
8994
*/
90-
public ApiFuture<AppendRowsResponse> append(JSONArray jsonArr) {
95+
public ApiFuture<AppendRowsResponse> append(JSONArray jsonArr)
96+
throws IOException, DescriptorValidationException {
9197
return append(jsonArr, -1);
9298
}
9399

94100
/**
95101
* Writes a JSONArray that contains JSONObjects to the BigQuery table by first converting the JSON
96102
* data to protobuf messages, then using StreamWriter's append() to write the data at the
97-
* specified offset.
103+
* specified offset. If there is a schema update, the current StreamWriter is closed. A new
104+
* StreamWriter is created with the updated TableSchema.
98105
*
99106
* @param jsonArr The JSON array that contains JSONObjects to be written
100107
* @param offset Offset for deduplication
101108
* @return ApiFuture<AppendRowsResponse> returns an AppendRowsResponse message wrapped in an
102109
* ApiFuture
103110
*/
104-
public ApiFuture<AppendRowsResponse> append(JSONArray jsonArr, long offset) {
111+
public ApiFuture<AppendRowsResponse> append(JSONArray jsonArr, long offset)
112+
throws IOException, DescriptorValidationException {
113+
// Handle schema updates in a Thread-safe way by locking down the operation
114+
synchronized (this) {
115+
TableSchema updatedSchema = this.streamWriter.getUpdatedSchema();
116+
if (updatedSchema != null) {
117+
// Close the StreamWriter
118+
this.streamWriter.close();
119+
// Update JsonStreamWriter's TableSchema and Descriptor
120+
this.tableSchema = updatedSchema;
121+
this.descriptor =
122+
BQTableSchemaToProtoDescriptor.convertBQTableSchemaToProtoDescriptor(updatedSchema);
123+
// Create a new underlying StreamWriter with the updated TableSchema and Descriptor
124+
this.streamWriter =
125+
streamWriterBuilder
126+
.setWriterSchema(ProtoSchemaConverter.convert(this.descriptor))
127+
.build();
128+
}
129+
}
130+
105131
ProtoRows.Builder rowsBuilder = ProtoRows.newBuilder();
106132
// Any error in convertJsonToProtoMessage will throw an
107133
// IllegalArgumentException/IllegalStateException/NullPointerException and will halt processing
@@ -155,9 +181,9 @@ private void setStreamWriterSettings(
155181
streamWriterBuilder.setEndpoint(endpoint);
156182
}
157183
if (traceId != null) {
158-
streamWriterBuilder.setTraceId("JsonWriterBeta_" + traceId);
184+
streamWriterBuilder.setTraceId("JsonWriter_" + traceId);
159185
} else {
160-
streamWriterBuilder.setTraceId("JsonWriterBeta:null");
186+
streamWriterBuilder.setTraceId("JsonWriter:null");
161187
}
162188
if (flowControlSettings != null) {
163189
if (flowControlSettings.getMaxOutstandingRequestBytes() != null) {

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/StreamWriter.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@
4444
* A BigQuery Stream Writer that can be used to write data into BigQuery Table.
4545
*
4646
* <p>TODO: Support batching.
47-
*
48-
* <p>TODO: Support schema change.
4947
*/
5048
public class StreamWriter implements AutoCloseable {
5149
private static final Logger log = Logger.getLogger(StreamWriter.class.getName());
@@ -135,6 +133,12 @@ public class StreamWriter implements AutoCloseable {
135133
@GuardedBy("lock")
136134
private final Deque<AppendRequestAndResponse> inflightRequestQueue;
137135

136+
/*
137+
* Contains the updated TableSchema.
138+
*/
139+
@GuardedBy("lock")
140+
private TableSchema updatedSchema;
141+
138142
/*
139143
* A client used to interact with BigQuery.
140144
*/
@@ -528,6 +532,9 @@ private void cleanupInflightRequests() {
528532
private void requestCallback(AppendRowsResponse response) {
529533
AppendRequestAndResponse requestWrapper;
530534
this.lock.lock();
535+
if (response.hasUpdatedSchema()) {
536+
this.updatedSchema = response.getUpdatedSchema();
537+
}
531538
try {
532539
// Had a successful connection with at least one result, reset retries.
533540
// conectionRetryCountWithoutCallback is reset so that only multiple retries, without
@@ -624,7 +631,12 @@ public static StreamWriter.Builder newBuilder(String streamName) {
624631
return new StreamWriter.Builder(streamName);
625632
}
626633

627-
/** A builder of {@link StreamWriterV2}s. */
634+
/** Thread-safe getter of updated TableSchema */
635+
public synchronized TableSchema getUpdatedSchema() {
636+
return this.updatedSchema;
637+
}
638+
639+
/** A builder of {@link StreamWriter}s. */
628640
public static final class Builder {
629641

630642
private static final long DEFAULT_MAX_INFLIGHT_REQUESTS = 1000L;
@@ -651,6 +663,8 @@ public static final class Builder {
651663

652664
private String traceId = null;
653665

666+
private TableSchema updatedTableSchema = null;
667+
654668
private Builder(String streamName) {
655669
this.streamName = Preconditions.checkNotNull(streamName);
656670
this.client = null;

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/JsonStreamWriterTest.java

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigquery.storage.v1;
1717

1818
import static org.junit.Assert.assertEquals;
19+
import static org.junit.Assert.assertTrue;
1920

2021
import com.google.api.core.ApiFuture;
2122
import com.google.api.gax.core.ExecutorProvider;
@@ -26,6 +27,7 @@
2627
import com.google.api.gax.grpc.testing.MockServiceHelper;
2728
import com.google.cloud.bigquery.storage.test.JsonTest;
2829
import com.google.cloud.bigquery.storage.test.Test.FooType;
30+
import com.google.cloud.bigquery.storage.test.Test.UpdatedFooType;
2931
import com.google.protobuf.Descriptors.DescriptorValidationException;
3032
import com.google.protobuf.Int64Value;
3133
import com.google.protobuf.Timestamp;
@@ -193,7 +195,7 @@ public void testSingleAppendSimpleJson() throws Exception {
193195
.getSerializedRows(0),
194196
expectedProto.toByteString());
195197
assertEquals(
196-
testBigQueryWrite.getAppendRequests().get(0).getTraceId(), "JsonWriterBeta_test:empty");
198+
testBigQueryWrite.getAppendRequests().get(0).getTraceId(), "JsonWriter_test:empty");
197199
}
198200
}
199201

@@ -284,8 +286,7 @@ public void testSingleAppendMultipleSimpleJson() throws Exception {
284286
.getProtoRows()
285287
.getRows()
286288
.getSerializedRowsCount());
287-
assertEquals(
288-
testBigQueryWrite.getAppendRequests().get(0).getTraceId(), "JsonWriterBeta:null");
289+
assertEquals(testBigQueryWrite.getAppendRequests().get(0).getTraceId(), "JsonWriter:null");
289290
for (int i = 0; i < 4; i++) {
290291
assertEquals(
291292
testBigQueryWrite
@@ -388,4 +389,111 @@ public void testCreateDefaultStream() throws Exception {
388389
assertEquals("projects/p/datasets/d/tables/t/_default", writer.getStreamName());
389390
}
390391
}
392+
393+
@Test
394+
public void testSimpleSchemaUpdate() throws Exception {
395+
try (JsonStreamWriter writer =
396+
getTestJsonStreamWriterBuilder(TEST_STREAM, TABLE_SCHEMA).build()) {
397+
testBigQueryWrite.addResponse(
398+
AppendRowsResponse.newBuilder()
399+
.setAppendResult(
400+
AppendRowsResponse.AppendResult.newBuilder().setOffset(Int64Value.of(0)).build())
401+
.setUpdatedSchema(UPDATED_TABLE_SCHEMA)
402+
.build());
403+
testBigQueryWrite.addResponse(
404+
AppendRowsResponse.newBuilder()
405+
.setAppendResult(
406+
AppendRowsResponse.AppendResult.newBuilder().setOffset(Int64Value.of(1)).build())
407+
.build());
408+
testBigQueryWrite.addResponse(
409+
AppendRowsResponse.newBuilder()
410+
.setAppendResult(
411+
AppendRowsResponse.AppendResult.newBuilder().setOffset(Int64Value.of(2)).build())
412+
.build());
413+
testBigQueryWrite.addResponse(
414+
AppendRowsResponse.newBuilder()
415+
.setAppendResult(
416+
AppendRowsResponse.AppendResult.newBuilder().setOffset(Int64Value.of(3)).build())
417+
.build());
418+
// First append
419+
JSONObject foo = new JSONObject();
420+
foo.put("foo", "aaa");
421+
JSONArray jsonArr = new JSONArray();
422+
jsonArr.put(foo);
423+
424+
ApiFuture<AppendRowsResponse> appendFuture1 = writer.append(jsonArr);
425+
ApiFuture<AppendRowsResponse> appendFuture2 = writer.append(jsonArr);
426+
ApiFuture<AppendRowsResponse> appendFuture3 = writer.append(jsonArr);
427+
428+
assertEquals(0L, appendFuture1.get().getAppendResult().getOffset().getValue());
429+
assertEquals(1L, appendFuture2.get().getAppendResult().getOffset().getValue());
430+
assertEquals(
431+
1,
432+
testBigQueryWrite
433+
.getAppendRequests()
434+
.get(0)
435+
.getProtoRows()
436+
.getRows()
437+
.getSerializedRowsCount());
438+
assertEquals(
439+
testBigQueryWrite
440+
.getAppendRequests()
441+
.get(0)
442+
.getProtoRows()
443+
.getRows()
444+
.getSerializedRows(0),
445+
FooType.newBuilder().setFoo("aaa").build().toByteString());
446+
447+
assertEquals(2L, appendFuture3.get().getAppendResult().getOffset().getValue());
448+
assertEquals(
449+
1,
450+
testBigQueryWrite
451+
.getAppendRequests()
452+
.get(1)
453+
.getProtoRows()
454+
.getRows()
455+
.getSerializedRowsCount());
456+
assertEquals(
457+
testBigQueryWrite
458+
.getAppendRequests()
459+
.get(1)
460+
.getProtoRows()
461+
.getRows()
462+
.getSerializedRows(0),
463+
FooType.newBuilder().setFoo("aaa").build().toByteString());
464+
465+
// Second append with updated schema.
466+
JSONObject updatedFoo = new JSONObject();
467+
updatedFoo.put("foo", "aaa");
468+
updatedFoo.put("bar", "bbb");
469+
JSONArray updatedJsonArr = new JSONArray();
470+
updatedJsonArr.put(updatedFoo);
471+
472+
ApiFuture<AppendRowsResponse> appendFuture4 = writer.append(updatedJsonArr);
473+
474+
assertEquals(3L, appendFuture4.get().getAppendResult().getOffset().getValue());
475+
assertEquals(4, testBigQueryWrite.getAppendRequests().size());
476+
assertEquals(
477+
1,
478+
testBigQueryWrite
479+
.getAppendRequests()
480+
.get(3)
481+
.getProtoRows()
482+
.getRows()
483+
.getSerializedRowsCount());
484+
assertEquals(
485+
testBigQueryWrite
486+
.getAppendRequests()
487+
.get(3)
488+
.getProtoRows()
489+
.getRows()
490+
.getSerializedRows(0),
491+
UpdatedFooType.newBuilder().setFoo("aaa").setBar("bbb").build().toByteString());
492+
493+
assertTrue(testBigQueryWrite.getAppendRequests().get(0).getProtoRows().hasWriterSchema());
494+
assertTrue(
495+
testBigQueryWrite.getAppendRequests().get(2).getProtoRows().hasWriterSchema()
496+
|| testBigQueryWrite.getAppendRequests().get(3).getProtoRows().hasWriterSchema());
497+
}
498+
}
391499
}

0 commit comments

Comments
 (0)