-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
batch_prediction_job.proto
460 lines (415 loc) · 22.2 KB
/
batch_prediction_job.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.aiplatform.v1;
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/completion_stats.proto";
import "google/cloud/aiplatform/v1/encryption_spec.proto";
import "google/cloud/aiplatform/v1/explanation.proto";
import "google/cloud/aiplatform/v1/io.proto";
import "google/cloud/aiplatform/v1/job_state.proto";
import "google/cloud/aiplatform/v1/machine_resources.proto";
import "google/cloud/aiplatform/v1/manual_batch_tuning_parameters.proto";
import "google/cloud/aiplatform/v1/unmanaged_container_model.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "BatchPredictionJobProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";
// A job that uses a
// [Model][google.cloud.aiplatform.v1.BatchPredictionJob.model] to produce
// predictions on multiple [input
// instances][google.cloud.aiplatform.v1.BatchPredictionJob.input_config]. If
// predictions for significant portion of the instances fail, the job may finish
// without attempting predictions for all remaining instances.
message BatchPredictionJob {
option (google.api.resource) = {
type: "aiplatform.googleapis.com/BatchPredictionJob"
pattern: "projects/{project}/locations/{location}/batchPredictionJobs/{batch_prediction_job}"
};
// Configures the input to
// [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See
// [Model.supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats]
// for Model's supported input formats, and how instances should be expressed
// via any of them.
message InputConfig {
// Required. The source of the input.
oneof source {
// The Cloud Storage location for the input instances.
GcsSource gcs_source = 2;
// The BigQuery location of the input table.
// The schema of the table should be in the format described by the given
// context OpenAPI Schema, if one is provided. The table may contain
// additional columns that are not described by the schema, and they will
// be ignored.
BigQuerySource bigquery_source = 3;
}
// Required. The format in which instances are given, must be one of the
// [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model]
// [supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats].
string instances_format = 1 [(google.api.field_behavior) = REQUIRED];
}
// Configuration defining how to transform batch prediction input instances to
// the instances that the Model accepts.
message InstanceConfig {
// The format of the instance that the Model accepts. Vertex AI will
// convert compatible
// [batch prediction input instance
// formats][google.cloud.aiplatform.v1.BatchPredictionJob.InputConfig.instances_format]
// to the specified format.
//
// Supported values are:
//
// * `object`: Each input is converted to JSON object format.
// * For `bigquery`, each row is converted to an object.
// * For `jsonl`, each line of the JSONL input must be an object.
// * Does not apply to `csv`, `file-list`, `tf-record`, or
// `tf-record-gzip`.
//
// * `array`: Each input is converted to JSON array format.
// * For `bigquery`, each row is converted to an array. The order
// of columns is determined by the BigQuery column order, unless
// [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields]
// is populated.
// [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields]
// must be populated for specifying field orders.
// * For `jsonl`, if each line of the JSONL input is an object,
// [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields]
// must be populated for specifying field orders.
// * Does not apply to `csv`, `file-list`, `tf-record`, or
// `tf-record-gzip`.
//
// If not specified, Vertex AI converts the batch prediction input as
// follows:
//
// * For `bigquery` and `csv`, the behavior is the same as `array`. The
// order of columns is the same as defined in the file or table, unless
// [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields]
// is populated.
// * For `jsonl`, the prediction instance format is determined by
// each line of the input.
// * For `tf-record`/`tf-record-gzip`, each record will be converted to
// an object in the format of `{"b64": <value>}`, where `<value>` is
// the Base64-encoded string of the content of the record.
// * For `file-list`, each file in the list will be converted to an
// object in the format of `{"b64": <value>}`, where `<value>` is
// the Base64-encoded string of the content of the file.
string instance_type = 1;
// The name of the field that is considered as a key.
//
// The values identified by the key field is not included in the transformed
// instances that is sent to the Model. This is similar to
// specifying this name of the field in
// [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields].
// In addition, the batch prediction output will not include the instances.
// Instead the output will only include the value of the key field, in a
// field named `key` in the output:
//
// * For `jsonl` output format, the output will have a `key` field
// instead of the `instance` field.
// * For `csv`/`bigquery` output format, the output will have have a `key`
// column instead of the instance feature columns.
//
// The input must be JSONL with objects at each line, CSV, BigQuery
// or TfRecord.
string key_field = 2;
// Fields that will be included in the prediction instance that is
// sent to the Model.
//
// If
// [instance_type][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.instance_type]
// is `array`, the order of field names in included_fields also determines
// the order of the values in the array.
//
// When included_fields is populated,
// [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields]
// must be empty.
//
// The input must be JSONL with objects at each line, BigQuery
// or TfRecord.
repeated string included_fields = 3;
// Fields that will be excluded in the prediction instance that is
// sent to the Model.
//
// Excluded will be attached to the batch prediction output if
// [key_field][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.key_field]
// is not specified.
//
// When excluded_fields is populated,
// [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields]
// must be empty.
//
// The input must be JSONL with objects at each line, BigQuery
// or TfRecord.
repeated string excluded_fields = 4;
}
// Configures the output of
// [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See
// [Model.supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats]
// for supported output formats, and how predictions are expressed via any of
// them.
message OutputConfig {
// Required. The destination of the output.
oneof destination {
// The Cloud Storage location of the directory where the output is
// to be written to. In the given directory a new directory is created.
// Its name is `prediction-<model-display-name>-<job-create-time>`,
// where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
// Inside of it files `predictions_0001.<extension>`,
// `predictions_0002.<extension>`, ..., `predictions_N.<extension>`
// are created where `<extension>` depends on chosen
// [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format],
// and N may equal 0001 and depends on the total number of successfully
// predicted instances. If the Model has both
// [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]
// and
// [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]
// schemata defined then each such file contains predictions as per the
// [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format].
// If prediction for any instance failed (partially or completely), then
// an additional `errors_0001.<extension>`, `errors_0002.<extension>`,...,
// `errors_N.<extension>` files are created (N depends on total number
// of failed predictions). These files contain the failed instances,
// as per their schema, followed by an additional `error` field which as
// value has [google.rpc.Status][google.rpc.Status]
// containing only `code` and `message` fields.
GcsDestination gcs_destination = 2;
// The BigQuery project or dataset location where the output is to be
// written to. If project is provided, a new dataset is created with name
// `prediction_<model-display-name>_<job-create-time>`
// where <model-display-name> is made
// BigQuery-dataset-name compatible (for example, most special characters
// become underscores), and timestamp is in
// YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset
// two tables will be created, `predictions`, and `errors`.
// If the Model has both
// [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]
// and
// [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]
// schemata defined then the tables have columns as follows: The
// `predictions` table contains instances for which the prediction
// succeeded, it has columns as per a concatenation of the Model's
// instance and prediction schemata. The `errors` table contains rows for
// which the prediction has failed, it has instance columns, as per the
// instance schema, followed by a single "errors" column, which as values
// has [google.rpc.Status][google.rpc.Status]
// represented as a STRUCT, and containing only `code` and `message`.
BigQueryDestination bigquery_destination = 3;
}
// Required. The format in which Vertex AI gives the predictions, must be
// one of the [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model]
// [supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats].
string predictions_format = 1 [(google.api.field_behavior) = REQUIRED];
}
// Further describes this job's output.
// Supplements
// [output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config].
message OutputInfo {
// The output location into which prediction output is written.
oneof output_location {
// Output only. The full path of the Cloud Storage directory created, into
// which the prediction output is written.
string gcs_output_directory = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The path of the BigQuery dataset created, in
// `bq://projectId.bqDatasetId`
// format, into which the prediction output is written.
string bigquery_output_dataset = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Output only. The name of the BigQuery table created, in
// `predictions_<timestamp>`
// format, into which the prediction output is written.
// Can be used by UI to generate the BigQuery output path, for example.
string bigquery_output_table = 4
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Output only. Resource name of the BatchPredictionJob.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Required. The user-defined name of this BatchPredictionJob.
string display_name = 2 [(google.api.field_behavior) = REQUIRED];
// The name of the Model resource that produces the predictions via this job,
// must share the same ancestor Location.
// Starting this job has no impact on any existing deployments of the Model
// and their resources.
// Exactly one of model and unmanaged_container_model must be set.
//
// The model resource name may contain version id or version alias to specify
// the version.
// Example: `projects/{project}/locations/{location}/models/{model}@2`
// or
// `projects/{project}/locations/{location}/models/{model}@golden`
// if no version is specified, the default version will be deployed.
//
// The model resource could also be a publisher model.
// Example: `publishers/{publisher}/models/{model}`
// or
// `projects/{project}/locations/{location}/publishers/{publisher}/models/{model}`
string model = 3 [(google.api.resource_reference) = {
type: "aiplatform.googleapis.com/Model"
}];
// Output only. The version ID of the Model that produces the predictions via
// this job.
string model_version_id = 30 [(google.api.field_behavior) = OUTPUT_ONLY];
// Contains model information necessary to perform batch prediction without
// requiring uploading to model registry.
// Exactly one of model and unmanaged_container_model must be set.
UnmanagedContainerModel unmanaged_container_model = 28;
// Required. Input configuration of the instances on which predictions are
// performed. The schema of any single instance may be specified via the
// [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model]
// [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
// [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri].
InputConfig input_config = 4 [(google.api.field_behavior) = REQUIRED];
// Configuration for how to convert batch prediction input instances to the
// prediction instances that are sent to the Model.
InstanceConfig instance_config = 27;
// The parameters that govern the predictions. The schema of the parameters
// may be specified via the
// [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model]
// [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
// [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri].
google.protobuf.Value model_parameters = 5;
// Required. The Configuration specifying where output predictions should
// be written.
// The schema of any single prediction may be specified as a concatenation
// of [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model]
// [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
// [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]
// and
// [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri].
OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED];
// The config of resources used by the Model during the batch prediction. If
// the Model
// [supports][google.cloud.aiplatform.v1.Model.supported_deployment_resources_types]
// DEDICATED_RESOURCES this config may be provided (and the job will use these
// resources), if the Model doesn't support AUTOMATIC_RESOURCES, this config
// must be provided.
BatchDedicatedResources dedicated_resources = 7;
// The service account that the DeployedModel's container runs as. If not
// specified, a system generated one will be used, which
// has minimal permissions and the custom container, if used, may not have
// enough permission to access other Google Cloud resources.
//
// Users deploying the Model must have the `iam.serviceAccounts.actAs`
// permission on this service account.
string service_account = 29;
// Immutable. Parameters configuring the batch behavior. Currently only
// applicable when
// [dedicated_resources][google.cloud.aiplatform.v1.BatchPredictionJob.dedicated_resources]
// are used (in other cases Vertex AI does the tuning itself).
ManualBatchTuningParameters manual_batch_tuning_parameters = 8
[(google.api.field_behavior) = IMMUTABLE];
// Generate explanation with the batch prediction results.
//
// When set to `true`, the batch prediction output changes based on the
// `predictions_format` field of the
// [BatchPredictionJob.output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config]
// object:
//
// * `bigquery`: output includes a column named `explanation`. The value
// is a struct that conforms to the
// [Explanation][google.cloud.aiplatform.v1.Explanation] object.
// * `jsonl`: The JSON objects on each line include an additional entry
// keyed `explanation`. The value of the entry is a JSON object that
// conforms to the [Explanation][google.cloud.aiplatform.v1.Explanation]
// object.
// * `csv`: Generating explanations for CSV format is not supported.
//
// If this field is set to true, either the
// [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]
// or
// [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec]
// must be populated.
bool generate_explanation = 23;
// Explanation configuration for this BatchPredictionJob. Can be
// specified only if
// [generate_explanation][google.cloud.aiplatform.v1.BatchPredictionJob.generate_explanation]
// is set to `true`.
//
// This value overrides the value of
// [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec].
// All fields of
// [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec]
// are optional in the request. If a field of the
// [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec]
// object is not populated, the corresponding field of the
// [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]
// object is inherited.
ExplanationSpec explanation_spec = 25;
// Output only. Information further describing the output of this job.
OutputInfo output_info = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The detailed state of the job.
JobState state = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Only populated when the job's state is JOB_STATE_FAILED or
// JOB_STATE_CANCELLED.
google.rpc.Status error = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Partial failures encountered.
// For example, single files that can't be read.
// This field never exceeds 20 entries.
// Status details fields contain standard Google Cloud error details.
repeated google.rpc.Status partial_failures = 12
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Information about resources that had been consumed by this
// job. Provided in real time at best effort basis, as well as a final value
// once the job completes.
//
// Note: This field currently may be not populated for batch predictions that
// use AutoML Models.
ResourcesConsumed resources_consumed = 13
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Statistics on completed and failed prediction instances.
CompletionStats completion_stats = 14
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time when the BatchPredictionJob was created.
google.protobuf.Timestamp create_time = 15
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time when the BatchPredictionJob for the first time entered
// the `JOB_STATE_RUNNING` state.
google.protobuf.Timestamp start_time = 16
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time when the BatchPredictionJob entered any of the following
// states: `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`.
google.protobuf.Timestamp end_time = 17
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time when the BatchPredictionJob was most recently updated.
google.protobuf.Timestamp update_time = 18
[(google.api.field_behavior) = OUTPUT_ONLY];
// The labels with user-defined metadata to organize BatchPredictionJobs.
//
// Label keys and values can be no longer than 64 characters
// (Unicode codepoints), can only contain lowercase letters, numeric
// characters, underscores and dashes. International characters are allowed.
//
// See https://goo.gl/xmQnxf for more information and examples of labels.
map<string, string> labels = 19;
// Customer-managed encryption key options for a BatchPredictionJob. If this
// is set, then all resources created by the BatchPredictionJob will be
// encrypted with the provided encryption key.
EncryptionSpec encryption_spec = 24;
// For custom-trained Models and AutoML Tabular Models, the container of the
// DeployedModel instances will send `stderr` and `stdout` streams to
// Cloud Logging by default. Please note that the logs incur cost,
// which are subject to [Cloud Logging
// pricing](https://cloud.google.com/logging/pricing).
//
// User can disable container logging by setting this flag to true.
bool disable_container_logging = 34;
}