/
storage.py
209 lines (177 loc) · 8.03 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import proto # type: ignore
from google.cloud.bigquery_storage_v1.types import arrow
from google.cloud.bigquery_storage_v1.types import avro
from google.cloud.bigquery_storage_v1.types import stream
__protobuf__ = proto.module(
package="google.cloud.bigquery.storage.v1",
manifest={
"CreateReadSessionRequest",
"ReadRowsRequest",
"ThrottleState",
"StreamStats",
"ReadRowsResponse",
"SplitReadStreamRequest",
"SplitReadStreamResponse",
},
)
class CreateReadSessionRequest(proto.Message):
r"""Request message for ``CreateReadSession``.
Attributes:
parent (str):
Required. The request project that owns the session, in the
form of ``projects/{project_id}``.
read_session (google.cloud.bigquery_storage_v1.types.ReadSession):
Required. Session to be created.
max_stream_count (int):
Max initial number of streams. If unset or
zero, the server will provide a value of streams
so as to produce reasonable throughput. Must be
non-negative. The number of streams may be lower
than the requested number, depending on the
amount parallelism that is reasonable for the
table. Error will be returned if the max count
is greater than the current system max limit of
1,000.
Streams must be read starting from offset 0.
"""
parent = proto.Field(proto.STRING, number=1,)
read_session = proto.Field(proto.MESSAGE, number=2, message=stream.ReadSession,)
max_stream_count = proto.Field(proto.INT32, number=3,)
class ReadRowsRequest(proto.Message):
r"""Request message for ``ReadRows``.
Attributes:
read_stream (str):
Required. Stream to read rows from.
offset (int):
The offset requested must be less than the
last row read from Read. Requesting a larger
offset is undefined. If not specified, start
reading from offset zero.
"""
read_stream = proto.Field(proto.STRING, number=1,)
offset = proto.Field(proto.INT64, number=2,)
class ThrottleState(proto.Message):
r"""Information on if the current connection is being throttled.
Attributes:
throttle_percent (int):
How much this connection is being throttled.
Zero means no throttling, 100 means fully
throttled.
"""
throttle_percent = proto.Field(proto.INT32, number=1,)
class StreamStats(proto.Message):
r"""Estimated stream statistics for a given Stream.
Attributes:
progress (google.cloud.bigquery_storage_v1.types.StreamStats.Progress):
Represents the progress of the current
stream.
"""
class Progress(proto.Message):
r"""
Attributes:
at_response_start (float):
The fraction of rows assigned to the stream that have been
processed by the server so far, not including the rows in
the current response message.
This value, along with ``at_response_end``, can be used to
interpolate the progress made as the rows in the message are
being processed using the following formula:
``at_response_start + (at_response_end - at_response_start) * rows_processed_from_response / rows_in_response``.
Note that if a filter is provided, the ``at_response_end``
value of the previous response may not necessarily be equal
to the ``at_response_start`` value of the current response.
at_response_end (float):
Similar to ``at_response_start``, except that this value
includes the rows in the current response.
"""
at_response_start = proto.Field(proto.DOUBLE, number=1,)
at_response_end = proto.Field(proto.DOUBLE, number=2,)
progress = proto.Field(proto.MESSAGE, number=2, message=Progress,)
class ReadRowsResponse(proto.Message):
r"""Response from calling ``ReadRows`` may include row data, progress
and throttling information.
Attributes:
avro_rows (google.cloud.bigquery_storage_v1.types.AvroRows):
Serialized row data in AVRO format.
arrow_record_batch (google.cloud.bigquery_storage_v1.types.ArrowRecordBatch):
Serialized row data in Arrow RecordBatch
format.
row_count (int):
Number of serialized rows in the rows block.
stats (google.cloud.bigquery_storage_v1.types.StreamStats):
Statistics for the stream.
throttle_state (google.cloud.bigquery_storage_v1.types.ThrottleState):
Throttling state. If unset, the latest
response still describes the current throttling
status.
avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema):
Output only. Avro schema.
arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema):
Output only. Arrow schema.
"""
avro_rows = proto.Field(
proto.MESSAGE, number=3, oneof="rows", message=avro.AvroRows,
)
arrow_record_batch = proto.Field(
proto.MESSAGE, number=4, oneof="rows", message=arrow.ArrowRecordBatch,
)
row_count = proto.Field(proto.INT64, number=6,)
stats = proto.Field(proto.MESSAGE, number=2, message="StreamStats",)
throttle_state = proto.Field(proto.MESSAGE, number=5, message="ThrottleState",)
avro_schema = proto.Field(
proto.MESSAGE, number=7, oneof="schema", message=avro.AvroSchema,
)
arrow_schema = proto.Field(
proto.MESSAGE, number=8, oneof="schema", message=arrow.ArrowSchema,
)
class SplitReadStreamRequest(proto.Message):
r"""Request message for ``SplitReadStream``.
Attributes:
name (str):
Required. Name of the stream to split.
fraction (float):
A value in the range (0.0, 1.0) that
specifies the fractional point at which the
original stream should be split. The actual
split point is evaluated on pre-filtered rows,
so if a filter is provided, then there is no
guarantee that the division of the rows between
the new child streams will be proportional to
this fractional value. Additionally, because the
server-side unit for assigning data is
collections of rows, this fraction will always
map to a data storage boundary on the server
side.
"""
name = proto.Field(proto.STRING, number=1,)
fraction = proto.Field(proto.DOUBLE, number=2,)
class SplitReadStreamResponse(proto.Message):
r"""Response message for ``SplitReadStream``.
Attributes:
primary_stream (google.cloud.bigquery_storage_v1.types.ReadStream):
Primary stream, which contains the beginning portion of
\|original_stream|. An empty value indicates that the
original stream can no longer be split.
remainder_stream (google.cloud.bigquery_storage_v1.types.ReadStream):
Remainder stream, which contains the tail of
\|original_stream|. An empty value indicates that the
original stream can no longer be split.
"""
primary_stream = proto.Field(proto.MESSAGE, number=1, message=stream.ReadStream,)
remainder_stream = proto.Field(proto.MESSAGE, number=2, message=stream.ReadStream,)
__all__ = tuple(sorted(__protobuf__.manifest))