Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add ZSTD compression as an option for Arrow #197

Merged
merged 1 commit into from May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions google/cloud/bigquery_storage_v1/proto/arrow.proto
Expand Up @@ -52,6 +52,9 @@ message ArrowSerializationOptions {

// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
LZ4_FRAME = 1;

// Zstandard compression.
ZSTD = 2;
}

// The compression codec to use for Arrow buffers in serialized record
Expand Down
6 changes: 2 additions & 4 deletions google/cloud/bigquery_storage_v1/proto/storage.proto
Expand Up @@ -69,8 +69,7 @@ service BigQueryRead {
post: "/v1/{read_session.table=projects/*/datasets/*/tables/*}"
body: "*"
};
option (google.api.method_signature) =
"parent,read_session,max_stream_count";
option (google.api.method_signature) = "parent,read_session,max_stream_count";
}

// Reads rows from the stream in the format prescribed by the ReadSession.
Expand Down Expand Up @@ -99,8 +98,7 @@ service BigQueryRead {
// original, primary, and residual, that original[0-j] = primary[0-j] and
// original[j-n] = residual[0-m] once the streams have been read to
// completion.
rpc SplitReadStream(SplitReadStreamRequest)
returns (SplitReadStreamResponse) {
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/sessions/*/streams/*}"
};
Expand Down
22 changes: 10 additions & 12 deletions google/cloud/bigquery_storage_v1/proto/stream.proto
Expand Up @@ -75,23 +75,20 @@ message ReadSession {
// Restricted to a maximum length for 1 MB.
string row_restriction = 2;

// Optional. Options specific to the Apache Arrow output format.
oneof output_format_serialization_options {
ArrowSerializationOptions arrow_serialization_options = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Options specific to the Apache Arrow output format.
ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL];
}
}

// Output only. Unique identifier for the session, in the form
// `projects/{project_id}/locations/{location}/sessions/{session_id}`.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Time at which the session becomes invalid. After this time,
// subsequent requests to read this Session will return errors. The
// expire_time is automatically assigned and currently cannot be specified or
// updated.
google.protobuf.Timestamp expire_time = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time at which the session becomes invalid. After this time, subsequent
// requests to read this Session will return errors. The expire_time is
// automatically assigned and currently cannot be specified or updated.
google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Immutable. Data format of the output data.
DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
Expand All @@ -111,11 +108,12 @@ message ReadSession {
// `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
string table = 6 [
(google.api.field_behavior) = IMMUTABLE,
(google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
(google.api.resource_reference) = {
type: "bigquery.googleapis.com/Table"
}
];

// Optional. Any modifiers which are applied when reading from the specified
// table.
// Optional. Any modifiers which are applied when reading from the specified table.
TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. Read options for this session (e.g. column selection, filters).
Expand Down
1 change: 1 addition & 0 deletions google/cloud/bigquery_storage_v1/types/arrow.py
Expand Up @@ -63,6 +63,7 @@ class CompressionCodec(proto.Enum):
r"""Compression codec's supported by Arrow."""
COMPRESSION_UNSPECIFIED = 0
LZ4_FRAME = 1
ZSTD = 2

buffer_compression = proto.Field(proto.ENUM, number=2, enum=CompressionCodec,)

Expand Down
3 changes: 2 additions & 1 deletion google/cloud/bigquery_storage_v1/types/stream.py
Expand Up @@ -102,7 +102,8 @@ class TableReadOptions(proto.Message):

Restricted to a maximum length for 1 MB.
arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions):

Optional. Options specific to the Apache
Arrow output format.
"""

selected_fields = proto.RepeatedField(proto.STRING, number=1,)
Expand Down
6 changes: 3 additions & 3 deletions synth.metadata
Expand Up @@ -4,15 +4,15 @@
"git": {
"name": ".",
"remote": "https://github.com/googleapis/python-bigquery-storage.git",
"sha": "ceae220a9fa5daa03da7907ae001d2689a7fbdcb"
"sha": "0fe648449715c0591c64a2013330ecba9d125fa1"
}
},
{
"git": {
"name": "googleapis",
"remote": "https://github.com/googleapis/googleapis.git",
"sha": "7e1b14e6c7a9ab96d2db7e4a131981f162446d34",
"internalRef": "373649163"
"sha": "23efea9fc7bedfe53b24295ed84b5f873606edcb",
"internalRef": "374220891"
}
}
],
Expand Down