Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ability to request compressed ReadRowsResponse rows #728

Merged
merged 2 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions google/cloud/bigquery_storage_v1/types/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,27 @@ class ReadRowsResponse(proto.Message):
Output only. Arrow schema.

This field is a member of `oneof`_ ``schema``.
uncompressed_byte_size (int):
Optional. If the row data in this ReadRowsResponse is
compressed, then uncompressed byte size is the original size
of the uncompressed row data. If it is set to a value
greater than 0, then decompress into a buffer of size
uncompressed_byte_size using the compression codec that was
requested during session creation time and which is
specified in TableReadOptions.response_compression_codec in
ReadSession. This value is not set if no
response_compression_codec was not requested and it is -1 if
the requested compression would not have reduced the size of
this ReadRowsResponse's row data. This attempts to match
Apache Arrow's behavior described here
https://github.com/apache/arrow/issues/15102 where the
uncompressed length may be set to -1 to indicate that the
data that follows is not compressed, which can be useful for
cases where compression does not yield appreciable savings.
When uncompressed_byte_size is not greater than 0, the
client should skip decompression.

This field is a member of `oneof`_ ``_uncompressed_byte_size``.
"""

avro_rows: avro.AvroRows = proto.Field(
Expand Down Expand Up @@ -273,6 +294,11 @@ class ReadRowsResponse(proto.Message):
oneof="schema",
message=arrow.ArrowSchema,
)
uncompressed_byte_size: int = proto.Field(
proto.INT64,
number=9,
optional=True,
)


class SplitReadStreamRequest(proto.Message):
Expand Down
31 changes: 31 additions & 0 deletions google/cloud/bigquery_storage_v1/types/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,33 @@ class TableReadOptions(proto.Message):
https://cloud.google.com/bigquery/docs/table-sampling)

This field is a member of `oneof`_ ``_sample_percentage``.
response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec):
Optional. Set response_compression_codec when creating a
read session to enable application-level compression of
ReadRows responses.

This field is a member of `oneof`_ ``_response_compression_codec``.
"""

class ResponseCompressionCodec(proto.Enum):
r"""Specifies which compression codec to attempt on the entire
serialized response payload (either Arrow record batch or Avro
rows). This is not to be confused with the Apache Arrow native
compression codecs specified in ArrowSerializationOptions. For
performance reasons, when creating a read session requesting
Arrow responses, setting both native Arrow compression and
application-level response compression will not be allowed -
choose, at most, one kind of compression.

Values:
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0):
Default is no compression.
RESPONSE_COMPRESSION_CODEC_LZ4 (2):
Use raw LZ4 compression.
"""
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0
RESPONSE_COMPRESSION_CODEC_LZ4 = 2

selected_fields: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=1,
Expand All @@ -276,6 +301,12 @@ class TableReadOptions(proto.Message):
number=5,
optional=True,
)
response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field(
proto.ENUM,
number=6,
optional=True,
enum="ReadSession.TableReadOptions.ResponseCompressionCodec",
)

name: str = proto.Field(
proto.STRING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-bigquery-storage",
"version": "2.24.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-bigquery-storage",
"version": "2.24.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down