Skip to content

Commit 663c899

Browse files
authored
feat(bigquery/storage/managedwriter): naming and doc improvements (#4508)
Minor changes: WithTracePrefix -> WithTraceID for the option and accompanying downstream usage exported TableParentFromStreamName to aid users of BatchCommit. The rest of the PR represents docstring improvements. Towards #4366
1 parent 41246e9 commit 663c899

9 files changed

+69
-37
lines changed

Diff for: bigquery/storage/managedwriter/appendresult.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,12 @@ func newAppendResult(data []byte) *AppendResult {
4747
}
4848
}
4949

50-
// Ready blocks until the append request is completed.
50+
// Ready blocks until the append request has reached a completed state,
51+
// which may be a successful append or an error.
5152
func (ar *AppendResult) Ready() <-chan struct{} { return ar.ready }
5253

5354
// GetResult returns the optional offset of this row, or the associated
54-
// error.
55+
// error. It blocks until the result is ready.
5556
func (ar *AppendResult) GetResult(ctx context.Context) (int64, error) {
5657
select {
5758
case <-ctx.Done():

Diff for: bigquery/storage/managedwriter/client.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ func (c *Client) validateOptions(ctx context.Context, ms *ManagedStream) error {
143143
}
144144
// update type and destination based on stream metadata
145145
ms.streamSettings.streamType = StreamType(info.Type.String())
146-
ms.destinationTable = tableParentFromStreamName(ms.streamSettings.streamID)
146+
ms.destinationTable = TableParentFromStreamName(ms.streamSettings.streamID)
147147
}
148148
if ms.destinationTable == "" {
149149
return fmt.Errorf("no destination table specified")
@@ -158,7 +158,13 @@ func (c *Client) validateOptions(ctx context.Context, ms *ManagedStream) error {
158158
// BatchCommit is used to commit one or more PendingStream streams belonging to the same table
159159
// as a single transaction. Streams must be finalized before committing.
160160
//
161-
// TODO: this currently exposes the raw proto response, but a future CL will wrap this with a nicer type.
161+
// Format of the parentTable is: projects/{project}/datasets/{dataset}/tables/{table} and the utility
162+
// function TableParentFromStreamName can be used to derive this from a Stream's name.
163+
//
164+
// If the returned response contains stream errors, this indicates that the batch commit failed and no data was
165+
// committed.
166+
//
167+
// TODO: currently returns the raw response. Determine how we want to surface StreamErrors.
162168
func (c *Client) BatchCommit(ctx context.Context, parentTable string, streamNames []string) (*storagepb.BatchCommitWriteStreamsResponse, error) {
163169

164170
// determine table from first streamName, as all must share the same table.
@@ -167,7 +173,7 @@ func (c *Client) BatchCommit(ctx context.Context, parentTable string, streamName
167173
}
168174

169175
req := &storagepb.BatchCommitWriteStreamsRequest{
170-
Parent: tableParentFromStreamName(streamNames[0]),
176+
Parent: TableParentFromStreamName(streamNames[0]),
171177
WriteStreams: streamNames,
172178
}
173179
return c.rawClient.BatchCommitWriteStreams(ctx, req)
@@ -183,9 +189,10 @@ func (c *Client) getWriteStream(ctx context.Context, streamName string) (*storag
183189
return c.rawClient.GetWriteStream(ctx, req)
184190
}
185191

186-
// tableParentFromStreamName return the corresponding parent table
187-
// identifier given a fully qualified streamname.
188-
func tableParentFromStreamName(streamName string) string {
192+
// TableParentFromStreamName is a utility function for extracting the parent table
193+
// prefix from a stream name. When an invalid stream ID is passed, this simply returns
194+
// the original stream name.
195+
func TableParentFromStreamName(streamName string) string {
189196
// Stream IDs have the following prefix:
190197
// projects/{project}/datasets/{dataset}/tables/{table}/blah
191198
parts := strings.SplitN(streamName, "/", 7)

Diff for: bigquery/storage/managedwriter/client_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func TestTableParentFromStreamName(t *testing.T) {
4040
}
4141

4242
for _, tc := range testCases {
43-
got := tableParentFromStreamName(tc.in)
43+
got := TableParentFromStreamName(tc.in)
4444
if got != tc.want {
4545
t.Errorf("mismatch on %s: got %s want %s", tc.in, got, tc.want)
4646
}

Diff for: bigquery/storage/managedwriter/doc.go

+20-8
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,24 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
// Package managedwriter will be a thick client around the storage API's BigQueryWriteClient.
16-
//
17-
// It is EXPERIMENTAL and subject to change or removal without notice. This library is in a pre-alpha
18-
// state, and breaking changes are frequent.
19-
//
20-
// Currently, the BigQueryWriteClient this library targets is exposed in the storage v1beta2 endpoint, and is
21-
// a successor to the streaming interface. API method tabledata.insertAll is the primary backend method, and
22-
// the Inserter abstraction is the equivalent to this in the cloud.google.com/go/bigquery package.
15+
/*
16+
Package managedwriter provides an EXPERIMENTAL thick client around the BigQuery storage API's BigQueryWriteClient.
17+
More information about this new write client may also be found in the public documentation: https://cloud.google.com/bigquery/docs/write-api
18+
19+
It is EXPERIMENTAL and subject to change or removal without notice. This library is in a pre-alpha
20+
state, and breaking changes are frequent.
21+
22+
Currently, this client targets the BigQueryWriteClient present in the v1beta2 endpoint, and is intended as a more
23+
feature-rich successor to the classic BigQuery streaming interface, which is presented as the Inserter abstraction
24+
in cloud.google.com/go/bigquery, and the tabledata.insertAll method if you're more familiar with the BigQuery v2 REST
25+
methods.
26+
27+
Appending data is accomplished through the use of streams. There are four stream types, each targetting slightly different
28+
use cases and needs. See the StreamType documentation for more details about each stream type.
29+
30+
This API uses serialized protocol buffer messages for appending data to streams. For users who don't have predefined protocol
31+
buffer messages for sending data, the cloud.google.com/go/bigquery/storage/managedwriter/adapt subpackage includes functionality
32+
for defining protocol buffer messages dynamically using table schema information, which enables users to do things like using
33+
protojson to convert json text into a protocol buffer.
34+
*/
2335
package managedwriter

Diff for: bigquery/storage/managedwriter/integration_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ func testPendingStream(ctx context.Context, t *testing.T, mwClient *Client, bqCl
448448
}
449449

450450
// Commit stream and validate.
451-
resp, err := mwClient.BatchCommit(ctx, tableParentFromStreamName(ms.StreamName()), []string{ms.StreamName()})
451+
resp, err := mwClient.BatchCommit(ctx, TableParentFromStreamName(ms.StreamName()), []string{ms.StreamName()})
452452
if err != nil {
453453
t.Errorf("client.BatchCommit: %v", err)
454454
}

Diff for: bigquery/storage/managedwriter/managed_stream.go

+16-5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ var (
4545

4646
// BufferedStream is a form of checkpointed stream, that allows
4747
// you to advance the offset of visible rows via Flush operations.
48+
//
49+
// NOTE: Buffered Streams are currently in limited preview, and as such
50+
// methods like FlushRows() may yield errors for non-enrolled projects.
4851
BufferedStream StreamType = "BUFFERED"
4952

5053
// PendingStream is a stream in which no data is made visible to
@@ -106,17 +109,17 @@ type streamSettings struct {
106109
// request bytes can be outstanding into the system.
107110
MaxInflightBytes int
108111

109-
// TracePrefix sets a suitable prefix for the trace ID set on
110-
// append requests. Useful for diagnostic purposes.
111-
TracePrefix string
112+
// TraceID can be set when appending data on a stream. It's
113+
// purpose is to aid in debug and diagnostic scenarios.
114+
TraceID string
112115
}
113116

114117
func defaultStreamSettings() *streamSettings {
115118
return &streamSettings{
116119
streamType: DefaultStream,
117120
MaxInflightRequests: 1000,
118121
MaxInflightBytes: 0,
119-
TracePrefix: "defaultManagedWriter",
122+
TraceID: "",
120123
}
121124
}
122125

@@ -241,7 +244,9 @@ func (ms *ManagedStream) append(pw *pendingWrite, opts ...gax.CallOption) error
241244
reqCopy.GetProtoRows().WriterSchema = &storagepb.ProtoSchema{
242245
ProtoDescriptor: ms.schemaDescriptor,
243246
}
244-
reqCopy.TraceId = ms.streamSettings.TracePrefix
247+
if ms.streamSettings.TraceID != "" {
248+
reqCopy.TraceId = ms.streamSettings.TraceID
249+
}
245250
req = &reqCopy
246251
})
247252

@@ -290,10 +295,16 @@ func (ms *ManagedStream) Close() error {
290295
ms.mu.Lock()
291296
ms.err = io.EOF
292297
ms.mu.Unlock()
298+
// Propagate cancellation.
299+
if ms.cancel != nil {
300+
ms.cancel()
301+
}
293302
return err
294303
}
295304

296305
// AppendRows sends the append requests to the service, and returns one AppendResult per row.
306+
// The format of the row data is binary serialized protocol buffer bytes, and and the message
307+
// must adhere to the format of the schema Descriptor passed in when creating the managed stream.
297308
func (ms *ManagedStream) AppendRows(ctx context.Context, data [][]byte, offset int64) ([]*AppendResult, error) {
298309
pw := newPendingWrite(data, offset)
299310
// check flow control

Diff for: bigquery/storage/managedwriter/managed_stream_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func TestManagedStream_FirstAppendBehavior(t *testing.T) {
115115
fc: newFlowController(0, 0),
116116
}
117117
ms.streamSettings.streamID = "FOO"
118-
ms.streamSettings.TracePrefix = "TRACE"
118+
ms.streamSettings.TraceID = "TRACE"
119119
ms.schemaDescriptor = schema
120120

121121
fakeData := [][]byte{

Diff for: bigquery/storage/managedwriter/writer_option.go

+10-9
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ package managedwriter
1616

1717
import "google.golang.org/protobuf/types/descriptorpb"
1818

19-
// WriterOption is used to configure a ManagedWriteClient.
19+
// WriterOption are variadic options used to configure a ManagedStream instance.
2020
type WriterOption func(*ManagedStream)
2121

22-
// WithType sets the write type of the new writer.
22+
// WithType sets the stream type for the managed stream.
2323
func WithType(st StreamType) WriterOption {
2424
return func(ms *ManagedStream) {
2525
ms.streamSettings.streamType = st
@@ -28,10 +28,10 @@ func WithType(st StreamType) WriterOption {
2828

2929
// WithStreamName allows users to set the stream name this writer will
3030
// append to explicitly. By default, the managed client will create the
31-
// stream when instantiated.
31+
// stream when instantiated if necessary.
3232
//
33-
// Note: Supplying this option causes other options such as WithStreamType
34-
// and WithDestinationTable to be ignored.
33+
// Note: Supplying this option causes other options which affect stream construction
34+
// such as WithStreamType and WithDestinationTable to be ignored.
3535
func WithStreamName(name string) WriterOption {
3636
return func(ms *ManagedStream) {
3737
ms.streamSettings.streamID = name
@@ -62,15 +62,16 @@ func WithMaxInflightBytes(n int) WriterOption {
6262
}
6363
}
6464

65-
// WithTracePrefix allows instruments requests to the service with a custom trace prefix.
65+
// WithTraceID allows instruments requests to the service with a custom trace prefix.
6666
// This is generally for diagnostic purposes only.
67-
func WithTracePrefix(prefix string) WriterOption {
67+
func WithTraceID(traceID string) WriterOption {
6868
return func(ms *ManagedStream) {
69-
ms.streamSettings.TracePrefix = prefix
69+
ms.streamSettings.TraceID = traceID
7070
}
7171
}
7272

73-
// WithSchemaDescriptor describes the format of messages you'll be sending to the service.
73+
// WithSchemaDescriptor describes the format of the serialized data being sent by
74+
// AppendRows calls on the stream.
7475
func WithSchemaDescriptor(dp *descriptorpb.DescriptorProto) WriterOption {
7576
return func(ms *ManagedStream) {
7677
ms.schemaDescriptor = dp

Diff for: bigquery/storage/managedwriter/writer_option_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ func TestWriterOptions(t *testing.T) {
6363
},
6464
{
6565
desc: "WithTracePrefix",
66-
options: []WriterOption{WithTracePrefix("foo")},
66+
options: []WriterOption{WithTraceID("foo")},
6767
want: func() *ManagedStream {
6868
ms := &ManagedStream{
6969
streamSettings: defaultStreamSettings(),
7070
}
71-
ms.streamSettings.TracePrefix = "foo"
71+
ms.streamSettings.TraceID = "foo"
7272
return ms
7373
}(),
7474
},
@@ -88,15 +88,15 @@ func TestWriterOptions(t *testing.T) {
8888
options: []WriterOption{
8989
WithType(PendingStream),
9090
WithMaxInflightBytes(5),
91-
WithTracePrefix("pre"),
91+
WithTraceID("id"),
9292
},
9393
want: func() *ManagedStream {
9494
ms := &ManagedStream{
9595
streamSettings: defaultStreamSettings(),
9696
}
9797
ms.streamSettings.MaxInflightBytes = 5
9898
ms.streamSettings.streamType = PendingStream
99-
ms.streamSettings.TracePrefix = "pre"
99+
ms.streamSettings.TraceID = "id"
100100
return ms
101101
}(),
102102
},

0 commit comments

Comments
 (0)