/
storage.proto
566 lines (499 loc) · 21.5 KB
/
storage.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.bigquery.storage.v1beta2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/bigquery/storage/v1beta2/arrow.proto";
import "google/cloud/bigquery/storage/v1beta2/avro.proto";
import "google/cloud/bigquery/storage/v1beta2/protobuf.proto";
import "google/cloud/bigquery/storage/v1beta2/stream.proto";
import "google/cloud/bigquery/storage/v1beta2/table.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/rpc/status.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage";
option java_multiple_files = true;
option java_outer_classname = "StorageProto";
option java_package = "com.google.cloud.bigquery.storage.v1beta2";
// BigQuery Read API.
//
// The Read API can be used to read data from BigQuery.
//
// New code should use the v1 Read API going forward, if they don't use Write
// API at the same time.
service BigQueryRead {
option (google.api.default_host) = "bigquerystorage.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/bigquery,"
"https://www.googleapis.com/auth/bigquery.readonly,"
"https://www.googleapis.com/auth/cloud-platform";
// Creates a new read session. A read session divides the contents of a
// BigQuery table into one or more streams, which can then be used to read
// data from the table. The read session also specifies properties of the
// data to be read, such as a list of columns or a push-down filter describing
// the rows to be returned.
//
// A particular row can be read by at most one stream. When the caller has
// reached the end of each stream in the session, then all the data in the
// table has been read.
//
// Data is assigned to each stream such that roughly the same number of
// rows can be read from each stream. Because the server-side unit for
// assigning data is collections of rows, the API does not guarantee that
// each stream will return the same number or rows. Additionally, the
// limits are enforced based on the number of pre-filtered rows, so some
// filters can lead to lopsided assignments.
//
// Read sessions automatically expire 24 hours after they are created and do
// not require manual clean-up by the caller.
rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
option (google.api.http) = {
post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}"
body: "*"
};
option (google.api.method_signature) = "parent,read_session,max_stream_count";
}
// Reads rows from the stream in the format prescribed by the ReadSession.
// Each response contains one or more table rows, up to a maximum of 100 MiB
// per response; read requests which attempt to read individual rows larger
// than 100 MiB will fail.
//
// Each request also returns a set of stream statistics reflecting the current
// state of the stream.
rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
option (google.api.http) = {
get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}"
};
option (google.api.method_signature) = "read_stream,offset";
}
// Splits a given `ReadStream` into two `ReadStream` objects. These
// `ReadStream` objects are referred to as the primary and the residual
// streams of the split. The original `ReadStream` can still be read from in
// the same manner as before. Both of the returned `ReadStream` objects can
// also be read from, and the rows returned by both child streams will be
// the same as the rows read from the original stream.
//
// Moreover, the two child streams will be allocated back-to-back in the
// original `ReadStream`. Concretely, it is guaranteed that for streams
// original, primary, and residual, that original[0-j] = primary[0-j] and
// original[j-n] = residual[0-m] once the streams have been read to
// completion.
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
option (google.api.http) = {
get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}"
};
}
}
// BigQuery Write API.
//
// The Write API can be used to write data to BigQuery.
service BigQueryWrite {
option (google.api.default_host) = "bigquerystorage.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/bigquery,"
"https://www.googleapis.com/auth/bigquery.insertdata,"
"https://www.googleapis.com/auth/cloud-platform";
// Creates a write stream to the given table.
// Additionally, every table has a special COMMITTED stream named '_default'
// to which data can be written. This stream doesn't need to be created using
// CreateWriteStream. It is a stream that can be used simultaneously by any
// number of clients. Data written to this stream is considered committed as
// soon as an acknowledgement is received.
rpc CreateWriteStream(CreateWriteStreamRequest) returns (WriteStream) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
body: "write_stream"
};
option (google.api.method_signature) = "parent,write_stream";
}
// Appends data to the given stream.
//
// If `offset` is specified, the `offset` is checked against the end of
// stream. The server returns `OUT_OF_RANGE` in `AppendRowsResponse` if an
// attempt is made to append to an offset beyond the current end of the stream
// or `ALREADY_EXISTS` if user provids an `offset` that has already been
// written to. User can retry with adjusted offset within the same RPC
// stream. If `offset` is not specified, append happens at the end of the
// stream.
//
// The response contains the offset at which the append happened. Responses
// are received in the same order in which requests are sent. There will be
// one response for each successful request. If the `offset` is not set in
// response, it means append didn't happen due to some errors. If one request
// fails, all the subsequent requests will also fail until a success request
// is made again.
//
// If the stream is of `PENDING` type, data will only be available for read
// operations after the stream is committed.
rpc AppendRows(stream AppendRowsRequest) returns (stream AppendRowsResponse) {
option (google.api.http) = {
post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
body: "*"
};
option (google.api.method_signature) = "write_stream";
}
// Gets a write stream.
rpc GetWriteStream(GetWriteStreamRequest) returns (WriteStream) {
option (google.api.http) = {
post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
body: "*"
};
option (google.api.method_signature) = "name";
}
// Finalize a write stream so that no new data can be appended to the
// stream. Finalize is not supported on the '_default' stream.
rpc FinalizeWriteStream(FinalizeWriteStreamRequest) returns (FinalizeWriteStreamResponse) {
option (google.api.http) = {
post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
body: "*"
};
option (google.api.method_signature) = "name";
}
// Atomically commits a group of `PENDING` streams that belong to the same
// `parent` table.
// Streams must be finalized before commit and cannot be committed multiple
// times. Once a stream is committed, data in the stream becomes available
// for read operations.
rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) returns (BatchCommitWriteStreamsResponse) {
option (google.api.http) = {
get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
};
option (google.api.method_signature) = "parent";
}
// Flushes rows to a BUFFERED stream.
// If users are appending rows to BUFFERED stream, flush operation is
// required in order for the rows to become available for reading. A
// Flush operation flushes up to any previously flushed offset in a BUFFERED
// stream, to the offset specified in the request.
// Flush is not supported on the _default stream, since it is not BUFFERED.
rpc FlushRows(FlushRowsRequest) returns (FlushRowsResponse) {
option (google.api.http) = {
post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
body: "*"
};
option (google.api.method_signature) = "write_stream";
}
}
// Request message for `CreateReadSession`.
message CreateReadSessionRequest {
// Required. The request project that owns the session, in the form of
// `projects/{project_id}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "cloudresourcemanager.googleapis.com/Project"
}
];
// Required. Session to be created.
ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED];
// Max initial number of streams. If unset or zero, the server will
// provide a value of streams so as to produce reasonable throughput. Must be
// non-negative. The number of streams may be lower than the requested number,
// depending on the amount parallelism that is reasonable for the table. Error
// will be returned if the max count is greater than the current system
// max limit of 1,000.
//
// Streams must be read starting from offset 0.
int32 max_stream_count = 3;
}
// Request message for `ReadRows`.
message ReadRowsRequest {
// Required. Stream to read rows from.
string read_stream = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/ReadStream"
}
];
// The offset requested must be less than the last row read from Read.
// Requesting a larger offset is undefined. If not specified, start reading
// from offset zero.
int64 offset = 2;
}
// Information on if the current connection is being throttled.
message ThrottleState {
// How much this connection is being throttled. Zero means no throttling,
// 100 means fully throttled.
int32 throttle_percent = 1;
}
// Estimated stream statistics for a given Stream.
message StreamStats {
message Progress {
// The fraction of rows assigned to the stream that have been processed by
// the server so far, not including the rows in the current response
// message.
//
// This value, along with `at_response_end`, can be used to interpolate
// the progress made as the rows in the message are being processed using
// the following formula: `at_response_start + (at_response_end -
// at_response_start) * rows_processed_from_response / rows_in_response`.
//
// Note that if a filter is provided, the `at_response_end` value of the
// previous response may not necessarily be equal to the
// `at_response_start` value of the current response.
double at_response_start = 1;
// Similar to `at_response_start`, except that this value includes the
// rows in the current response.
double at_response_end = 2;
}
// Represents the progress of the current stream.
Progress progress = 2;
}
// Response from calling `ReadRows` may include row data, progress and
// throttling information.
message ReadRowsResponse {
// Row data is returned in format specified during session creation.
oneof rows {
// Serialized row data in AVRO format.
AvroRows avro_rows = 3;
// Serialized row data in Arrow RecordBatch format.
ArrowRecordBatch arrow_record_batch = 4;
}
// Number of serialized rows in the rows block.
int64 row_count = 6;
// Statistics for the stream.
StreamStats stats = 2;
// Throttling state. If unset, the latest response still describes
// the current throttling status.
ThrottleState throttle_state = 5;
// The schema for the read. If read_options.selected_fields is set, the
// schema may be different from the table schema as it will only contain
// the selected fields. This schema is equivelant to the one returned by
// CreateSession. This field is only populated in the first ReadRowsResponse
// RPC.
oneof schema {
// Output only. Avro schema.
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Arrow schema.
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
}
}
// Request message for `SplitReadStream`.
message SplitReadStreamRequest {
// Required. Name of the stream to split.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/ReadStream"
}
];
// A value in the range (0.0, 1.0) that specifies the fractional point at
// which the original stream should be split. The actual split point is
// evaluated on pre-filtered rows, so if a filter is provided, then there is
// no guarantee that the division of the rows between the new child streams
// will be proportional to this fractional value. Additionally, because the
// server-side unit for assigning data is collections of rows, this fraction
// will always map to a data storage boundary on the server side.
double fraction = 2;
}
message SplitReadStreamResponse {
// Primary stream, which contains the beginning portion of
// |original_stream|. An empty value indicates that the original stream can no
// longer be split.
ReadStream primary_stream = 1;
// Remainder stream, which contains the tail of |original_stream|. An empty
// value indicates that the original stream can no longer be split.
ReadStream remainder_stream = 2;
}
// Request message for `CreateWriteStream`.
message CreateWriteStreamRequest {
// Required. Reference to the table to which the stream belongs, in the format
// of `projects/{project}/datasets/{dataset}/tables/{table}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquery.googleapis.com/Table"
}
];
// Required. Stream to be created.
WriteStream write_stream = 2 [(google.api.field_behavior) = REQUIRED];
}
// Request message for `AppendRows`.
message AppendRowsRequest {
// Proto schema and data.
message ProtoData {
// Proto schema used to serialize the data.
ProtoSchema writer_schema = 1;
// Serialized row data in protobuf message format.
ProtoRows rows = 2;
}
// Required. The stream that is the target of the append operation. This value must be
// specified for the initial request. If subsequent requests specify the
// stream name, it must equal to the value provided in the first request.
// To write to the _default stream, populate this field with a string in the
// format `projects/{project}/datasets/{dataset}/tables/{table}/_default`.
string write_stream = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/WriteStream"
}
];
// If present, the write is only performed if the next append offset is same
// as the provided value. If not present, the write is performed at the
// current end of stream. Specifying a value for this field is not allowed
// when calling AppendRows for the '_default' stream.
google.protobuf.Int64Value offset = 2;
// Input rows. The `writer_schema` field must be specified at the initial
// request and currently, it will be ignored if specified in following
// requests. Following requests must have data in the same format as the
// initial request.
oneof rows {
// Rows in proto format.
ProtoData proto_rows = 4;
}
// Id set by client to annotate its identity. Only initial request setting is
// respected.
string trace_id = 6;
}
// Response message for `AppendRows`.
message AppendRowsResponse {
// AppendResult is returned for successful append requests.
message AppendResult {
// The row offset at which the last append occurred. The offset will not be
// set if appending using default streams.
google.protobuf.Int64Value offset = 1;
}
oneof response {
// Result if the append is successful.
AppendResult append_result = 1;
// Error returned when problems were encountered. If present,
// it indicates rows were not accepted into the system.
// Users can retry or continue with other append requests within the
// same connection.
//
// Additional information about error signalling:
//
// ALREADY_EXISTS: Happens when an append specified an offset, and the
// backend already has received data at this offset. Typically encountered
// in retry scenarios, and can be ignored.
//
// OUT_OF_RANGE: Returned when the specified offset in the stream is beyond
// the current end of the stream.
//
// INVALID_ARGUMENT: Indicates a malformed request or data.
//
// ABORTED: Request processing is aborted because of prior failures. The
// request can be retried if previous failure is addressed.
//
// INTERNAL: Indicates server side error(s) that can be retried.
google.rpc.Status error = 2;
}
// If backend detects a schema update, pass it to user so that user can
// use it to input new type of message. It will be empty when no schema
// updates have occurred.
TableSchema updated_schema = 3;
}
// Request message for `GetWriteStreamRequest`.
message GetWriteStreamRequest {
// Required. Name of the stream to get, in the form of
// `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/WriteStream"
}
];
}
// Request message for `BatchCommitWriteStreams`.
message BatchCommitWriteStreamsRequest {
// Required. Parent table that all the streams should belong to, in the form of
// `projects/{project}/datasets/{dataset}/tables/{table}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED
];
// Required. The group of streams that will be committed atomically.
repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED];
}
// Response message for `BatchCommitWriteStreams`.
message BatchCommitWriteStreamsResponse {
// The time at which streams were committed in microseconds granularity.
// This field will only exist when there are no stream errors.
// **Note** if this field is not set, it means the commit was not successful.
google.protobuf.Timestamp commit_time = 1;
// Stream level error if commit failed. Only streams with error will be in
// the list.
// If empty, there is no error and all streams are committed successfully.
// If non empty, certain streams have errors and ZERO stream is committed due
// to atomicity guarantee.
repeated StorageError stream_errors = 2;
}
// Request message for invoking `FinalizeWriteStream`.
message FinalizeWriteStreamRequest {
// Required. Name of the stream to finalize, in the form of
// `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/WriteStream"
}
];
}
// Response message for `FinalizeWriteStream`.
message FinalizeWriteStreamResponse {
// Number of rows in the finalized stream.
int64 row_count = 1;
}
// Request message for `FlushRows`.
message FlushRowsRequest {
// Required. The stream that is the target of the flush operation.
string write_stream = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "bigquerystorage.googleapis.com/WriteStream"
}
];
// Ending offset of the flush operation. Rows before this offset(including
// this offset) will be flushed.
google.protobuf.Int64Value offset = 2;
}
// Respond message for `FlushRows`.
message FlushRowsResponse {
// The rows before this offset (including this offset) are flushed.
int64 offset = 1;
}
// Structured custom BigQuery Storage error message. The error can be attached
// as error details in the returned rpc Status. In particular, the use of error
// codes allows more structured error handling, and reduces the need to evaluate
// unstructured error text strings.
message StorageError {
// Error code for `StorageError`.
enum StorageErrorCode {
// Default error.
STORAGE_ERROR_CODE_UNSPECIFIED = 0;
// Table is not found in the system.
TABLE_NOT_FOUND = 1;
// Stream is already committed.
STREAM_ALREADY_COMMITTED = 2;
// Stream is not found.
STREAM_NOT_FOUND = 3;
// Invalid Stream type.
// For example, you try to commit a stream that is not pending.
INVALID_STREAM_TYPE = 4;
// Invalid Stream state.
// For example, you try to commit a stream that is not finalized or is
// garbaged.
INVALID_STREAM_STATE = 5;
// Stream is finalized.
STREAM_FINALIZED = 6;
}
// BigQuery Storage specific error code.
StorageErrorCode code = 1;
// Name of the failed entity.
string entity = 2;
// Message that describes the error.
string error_message = 3;
}