-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
automl_time_series_forecasting.proto
284 lines (247 loc) · 10.6 KB
/
automl_time_series_forecasting.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.aiplatform.v1beta1.schema.trainingjob.definition;
import "google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/export_evaluated_data_items_config.proto";
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1/schema/trainingjob/definition;definition";
option java_multiple_files = true;
option java_outer_classname = "AutoMLForecastingProto";
option java_package = "com.google.cloud.aiplatform.v1beta1.schema.trainingjob.definition";
// A TrainingJob that trains and uploads an AutoML Forecasting Model.
message AutoMlForecasting {
// The input parameters of this TrainingJob.
AutoMlForecastingInputs inputs = 1;
// The metadata information.
AutoMlForecastingMetadata metadata = 2;
}
message AutoMlForecastingInputs {
message Transformation {
// Training pipeline will infer the proper transformation based on the
// statistic of dataset.
message AutoTransformation {
string column_name = 1;
}
// Training pipeline will perform following transformation functions.
//
// * The value converted to float32.
//
// * The z_score of the value.
//
// * log(value+1) when the value is greater than or equal to 0. Otherwise,
// this transformation is not applied and the value is considered a
// missing value.
//
// * z_score of log(value+1) when the value is greater than or equal to 0.
// Otherwise, this transformation is not applied and the value is
// considered a missing value.
//
// * A boolean value that indicates whether the value is valid.
message NumericTransformation {
string column_name = 1;
}
// Training pipeline will perform following transformation functions.
//
// * The categorical string as is--no change to case, punctuation,
// spelling, tense, and so on.
//
// * Convert the category name to a dictionary lookup index and generate an
// embedding for each index.
//
// * Categories that appear less than 5 times in the training dataset are
// treated as the "unknown" category. The "unknown" category gets its own
// special lookup index and resulting embedding.
message CategoricalTransformation {
string column_name = 1;
}
// Training pipeline will perform following transformation functions.
//
// * Apply the transformation functions for Numerical columns.
//
// * Determine the year, month, day,and weekday. Treat each value from the
// timestamp as a Categorical column.
//
// * Invalid numerical values (for example, values that fall outside of a
// typical timestamp range, or are extreme values) receive no special
// treatment and are not removed.
message TimestampTransformation {
string column_name = 1;
// The format in which that time field is expressed. The time_format must
// either be one of:
//
// * `unix-seconds`
//
// * `unix-milliseconds`
//
// * `unix-microseconds`
//
// * `unix-nanoseconds`
//
// (for respectively number of seconds, milliseconds, microseconds and
// nanoseconds since start of the Unix epoch);
//
// or be written in `strftime` syntax.
//
// If time_format is not set, then the
// default format is RFC 3339 `date-time` format, where
// `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z)
string time_format = 2;
}
// Training pipeline will perform following transformation functions.
//
// * The text as is--no change to case, punctuation, spelling, tense, and
// so on.
//
// * Convert the category name to a dictionary lookup index and generate an
// embedding for each index.
message TextTransformation {
string column_name = 1;
}
// The transformation that the training pipeline will apply to the input
// columns.
oneof transformation_detail {
AutoTransformation auto = 1;
NumericTransformation numeric = 2;
CategoricalTransformation categorical = 3;
TimestampTransformation timestamp = 4;
TextTransformation text = 5;
}
}
// A duration of time expressed in time granularity units.
message Granularity {
// The time granularity unit of this time period.
// The supported units are:
//
// * "minute"
//
// * "hour"
//
// * "day"
//
// * "week"
//
// * "month"
//
// * "year"
string unit = 1;
// The number of granularity_units between data points in the training
// data. If `granularity_unit` is `minute`,
// can be 1, 5, 10, 15, or 30. For all other values of `granularity_unit`,
// must be 1.
int64 quantity = 2;
}
// The name of the column that the model is to predict.
string target_column = 1;
// The name of the column that identifies the time series.
string time_series_identifier_column = 2;
// The name of the column that identifies time order in the time series.
string time_column = 3;
// Each transformation will apply transform function to given input column.
// And the result will be used for training.
// When creating transformation for BigQuery Struct column, the column should
// be flattened using "." as the delimiter.
repeated Transformation transformations = 4;
// Objective function the model is optimizing towards. The training process
// creates a model that optimizes the value of the objective
// function over the validation set.
//
// The supported optimization objectives:
//
// * "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE).
//
// * "minimize-mae" - Minimize mean-absolute error (MAE).
//
// * "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE).
//
// * "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE).
//
// * "minimize-wape-mae" - Minimize the combination of weighted absolute
// percentage error (WAPE) and mean-absolute-error (MAE).
//
// * "minimize-quantile-loss" - Minimize the quantile loss at the quantiles
// defined in `quantiles`.
string optimization_objective = 5;
// Required. The train budget of creating this model, expressed in milli node
// hours i.e. 1,000 value in this field means 1 node hour.
//
// The training cost of the model will not exceed this budget. The final cost
// will be attempted to be close to the budget, though may end up being (even)
// noticeably smaller - at the backend's discretion. This especially may
// happen when further model training ceases to provide any improvements.
//
// If the budget is set to a value known to be insufficient to train a
// model for the given dataset, the training won't be attempted and
// will error.
//
// The train budget must be between 1,000 and 72,000 milli node hours,
// inclusive.
int64 train_budget_milli_node_hours = 6;
// Column name that should be used as the weight column.
// Higher values in this column give more importance to the row
// during model training. The column must have numeric values between 0 and
// 10000 inclusively; 0 means the row is ignored for training. If weight
// column field is not set, then all rows are assumed to have equal weight
// of 1.
string weight_column = 7;
// Column names that should be used as attribute columns.
// The value of these columns does not vary as a function of time.
// For example, store ID or item color.
repeated string time_series_attribute_columns = 19;
// Names of columns that are unavailable when a forecast is requested.
// This column contains information for the given entity (identified
// by the time_series_identifier_column) that is unknown before the forecast
// For example, actual weather on a given day.
repeated string unavailable_at_forecast_columns = 20;
// Names of columns that are available and provided when a forecast
// is requested. These columns
// contain information for the given entity (identified by the
// time_series_identifier_column column) that is known at forecast.
// For example, predicted weather for a specific day.
repeated string available_at_forecast_columns = 21;
// Expected difference in time granularity between rows in the data.
Granularity data_granularity = 22;
// The amount of time into the future for which forecasted values for the
// target are returned. Expressed in number of units defined by the
// `data_granularity` field.
int64 forecast_horizon = 23;
// The amount of time into the past training and prediction data is used
// for model training and prediction respectively. Expressed in number of
// units defined by the `data_granularity` field.
int64 context_window = 24;
// Configuration for exporting test set predictions to a BigQuery table. If
// this configuration is absent, then the export is not performed.
ExportEvaluatedDataItemsConfig export_evaluated_data_items_config = 15;
// Quantiles to use for minimize-quantile-loss `optimization_objective`. Up to
// 5 quantiles are allowed of values between 0 and 1, exclusive. Required if
// the value of optimization_objective is minimize-quantile-loss. Represents
// the percent quantiles to use for that objective. Quantiles must be unique.
repeated double quantiles = 16;
// Validation options for the data validation component. The available options
// are:
//
// * "fail-pipeline" - default, will validate against the validation and
// fail the pipeline if it fails.
//
// * "ignore-validation" - ignore the results of the validation and continue
string validation_options = 17;
// Additional experiment flags for the time series forcasting training.
repeated string additional_experiments = 25;
}
// Model metadata specific to AutoML Forecasting.
message AutoMlForecastingMetadata {
// Output only. The actual training cost of the model, expressed in milli
// node hours, i.e. 1,000 value in this field means 1 node hour. Guaranteed
// to not exceed the train budget.
int64 train_cost_milli_node_hours = 1;
}