feat: add estimated number of rows to CreateReadSession response

PiperOrigin-RevId: 495122850
googleapis · Dec 13, 2022 · 83b2baf · 83b2baf
1 parent f40f54b
commit 83b2baf
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 35 deletions.
diff --git a/google/cloud/bigquery/storage/v1/storage.proto b/google/cloud/bigquery/storage/v1/storage.proto
@@ -73,7 +73,8 @@ service BigQueryRead {
       post: "/v1/{read_session.table=projects/*/datasets/*/tables/*}"
       body: "*"
     };
-    option (google.api.method_signature) = "parent,read_session,max_stream_count";
+    option (google.api.method_signature) =
+        "parent,read_session,max_stream_count";
   }
 
   // Reads rows from the stream in the format prescribed by the ReadSession.
@@ -102,7 +103,8 @@ service BigQueryRead {
   // original, primary, and residual, that original[0-j] = primary[0-j] and
   // original[j-n] = residual[0-m] once the streams have been read to
   // completion.
-  rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
+  rpc SplitReadStream(SplitReadStreamRequest)
+      returns (SplitReadStreamResponse) {
     option (google.api.http) = {
       get: "/v1/{name=projects/*/locations/*/sessions/*/streams/*}"
     };
@@ -186,7 +188,8 @@ service BigQueryWrite {
 
   // Finalize a write stream so that no new data can be appended to the
   // stream. Finalize is not supported on the '_default' stream.
-  rpc FinalizeWriteStream(FinalizeWriteStreamRequest) returns (FinalizeWriteStreamResponse) {
+  rpc FinalizeWriteStream(FinalizeWriteStreamRequest)
+      returns (FinalizeWriteStreamResponse) {
     option (google.api.http) = {
       post: "/v1/{name=projects/*/datasets/*/tables/*/streams/*}"
       body: "*"
@@ -200,7 +203,8 @@ service BigQueryWrite {
   // Streams must be finalized before commit and cannot be committed multiple
   // times. Once a stream is committed, data in the stream becomes available
   // for read operations.
-  rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) returns (BatchCommitWriteStreamsResponse) {
+  rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest)
+      returns (BatchCommitWriteStreamsResponse) {
     option (google.api.http) = {
       get: "/v1/{parent=projects/*/datasets/*/tables/*}"
     };
@@ -384,9 +388,7 @@ message CreateWriteStreamRequest {
   // of `projects/{project}/datasets/{dataset}/tables/{table}`.
   string parent = 1 [
     (google.api.field_behavior) = REQUIRED,
-    (google.api.resource_reference) = {
-      type: "bigquery.googleapis.com/Table"
-    }
+    (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
   ];
 
   // Required. Stream to be created.
@@ -434,10 +436,10 @@ message AppendRowsRequest {
     DEFAULT_VALUE = 2;
   }
 
-  // Required. The write_stream identifies the target of the append operation, and only
-  // needs to be specified as part of the first request on the gRPC connection.
-  // If provided for subsequent requests, it must match the value of the first
-  // request.
+  // Required. The write_stream identifies the target of the append operation,
+  // and only needs to be specified as part of the first request on the gRPC
+  // connection. If provided for subsequent requests, it must match the value of
+  // the first request.
   //
   // For explicitly created write streams, the format is:
   //
@@ -562,13 +564,11 @@ message GetWriteStreamRequest {
 
 // Request message for `BatchCommitWriteStreams`.
 message BatchCommitWriteStreamsRequest {
-  // Required. Parent table that all the streams should belong to, in the form of
-  // `projects/{project}/datasets/{dataset}/tables/{table}`.
+  // Required. Parent table that all the streams should belong to, in the form
+  // of `projects/{project}/datasets/{dataset}/tables/{table}`.
   string parent = 1 [
     (google.api.field_behavior) = REQUIRED,
-    (google.api.resource_reference) = {
-      type: "bigquery.googleapis.com/Table"
-    }
+    (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
   ];
 
   // Required. The group of streams that will be committed atomically.

diff --git a/google/cloud/bigquery/storage/v1/stream.proto b/google/cloud/bigquery/storage/v1/stream.proto
@@ -122,23 +122,28 @@ message ReadSession {
 
     oneof output_format_serialization_options {
       // Optional. Options specific to the Apache Arrow output format.
-      ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL];
+      ArrowSerializationOptions arrow_serialization_options = 3
+          [(google.api.field_behavior) = OPTIONAL];
 
       // Optional. Options specific to the Apache Avro output format
-      AvroSerializationOptions avro_serialization_options = 4 [(google.api.field_behavior) = OPTIONAL];
+      AvroSerializationOptions avro_serialization_options = 4
+          [(google.api.field_behavior) = OPTIONAL];
     }
   }
 
   // Output only. Unique identifier for the session, in the form
   // `projects/{project_id}/locations/{location}/sessions/{session_id}`.
   string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
 
-  // Output only. Time at which the session becomes invalid. After this time, subsequent
-  // requests to read this Session will return errors. The expire_time is
-  // automatically assigned and currently cannot be specified or updated.
-  google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+  // Output only. Time at which the session becomes invalid. After this time,
+  // subsequent requests to read this Session will return errors. The
+  // expire_time is automatically assigned and currently cannot be specified or
+  // updated.
+  google.protobuf.Timestamp expire_time = 2
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 
-  // Immutable. Data format of the output data. DATA_FORMAT_UNSPECIFIED not supported.
+  // Immutable. Data format of the output data. DATA_FORMAT_UNSPECIFIED not
+  // supported.
   DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
 
   // The schema for the read. If read_options.selected_fields is set, the
@@ -156,12 +161,11 @@ message ReadSession {
   // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
   string table = 6 [
     (google.api.field_behavior) = IMMUTABLE,
-    (google.api.resource_reference) = {
-      type: "bigquery.googleapis.com/Table"
-    }
+    (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
   ];
 
-  // Optional. Any modifiers which are applied when reading from the specified table.
+  // Optional. Any modifiers which are applied when reading from the specified
+  // table.
   TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. Read options for this session (e.g. column selection, filters).
@@ -178,10 +182,16 @@ message ReadSession {
   // Output only. An estimate on the number of bytes this session will scan when
   // all streams are completely consumed. This estimate is based on
   // metadata from the table which might be incomplete or stale.
-  int64 estimated_total_bytes_scanned = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+  int64 estimated_total_bytes_scanned = 12
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. An estimate on the number of rows present in this session's
+  // streams. This estimate is based on metadata from the table which might be
+  // incomplete or stale.
+  int64 estimated_row_count = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
 
-  // Optional. ID set by client to annotate a session identity.  This does not need
-  // to be strictly unique, but instead the same ID should be used to group
+  // Optional. ID set by client to annotate a session identity.  This does not
+  // need to be strictly unique, but instead the same ID should be used to group
   // logically connected sessions (e.g. All using the same ID for all sessions
   // needed to complete a Spark SQL query is reasonable).
   //
@@ -260,15 +270,17 @@ message WriteStream {
   // Immutable. Type of the stream.
   Type type = 2 [(google.api.field_behavior) = IMMUTABLE];
 
-  // Output only. Create time of the stream. For the _default stream, this is the
-  // creation_time of the table.
-  google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+  // Output only. Create time of the stream. For the _default stream, this is
+  // the creation_time of the table.
+  google.protobuf.Timestamp create_time = 3
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 
   // Output only. Commit time of the stream.
   // If a stream is of `COMMITTED` type, then it will have a commit_time same as
   // `create_time`. If the stream is of `PENDING` type, empty commit_time
   // means it is not committed.
-  google.protobuf.Timestamp commit_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp commit_time = 4
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 
   // Output only. The schema of the destination table. It is only returned in
   // `CreateWriteStream` response. Caller should generate data that's

diff --git a/google/cloud/bigquery/storage/v1/table.proto b/google/cloud/bigquery/storage/v1/table.proto
@@ -107,7 +107,8 @@ message TableFieldSchema {
   // Optional. The field mode. The default value is NULLABLE.
   Mode mode = 3 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. Describes the nested schema fields if the type property is set to STRUCT.
+  // Optional. Describes the nested schema fields if the type property is set to
+  // STRUCT.
   repeated TableFieldSchema fields = 4 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The field description. The maximum length is 1,024 characters.