This repository has been archived by the owner on Aug 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
/
bigquery_dataset_tables.go
503 lines (491 loc) · 21.8 KB
/
bigquery_dataset_tables.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
package bigquery
import (
"context"
"github.com/cloudquery/cq-provider-gcp/client"
"github.com/cloudquery/cq-provider-sdk/provider/diag"
"github.com/cloudquery/cq-provider-sdk/provider/schema"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
"google.golang.org/api/bigquery/v2"
)
const MAX_GOROUTINES = 10
func BigqueryDatasetTables() *schema.Table {
return &schema.Table{
Name: "gcp_bigquery_dataset_tables",
Description: "Model options used for the first training run These options are immutable for subsequent training runs Default values are used for any options not specified in the input query",
IgnoreError: client.IgnoreErrorHandler,
Resolver: listBigqueryDatasetTables,
Columns: []schema.Column{
{
Name: "dataset_cq_id",
Type: schema.TypeUUID,
Resolver: schema.ParentIdResolver,
},
{
Name: "dataset_id",
Type: schema.TypeString,
Resolver: schema.ParentResourceFieldResolver("id"),
},
{
Name: "clustering_fields",
Description: "One or more fields on which data should be clustered Only top-level, non-repeated, simple-type fields are supported When you cluster a table using multiple columns, the order of columns you specify is important The order of the specified columns determines the sort order of the data",
Type: schema.TypeStringArray,
Resolver: schema.PathResolver("Clustering.Fields"),
},
{
Name: "creation_time",
Description: "The time when this table was created, in milliseconds since the epoch",
Type: schema.TypeBigInt,
},
{
Name: "description",
Description: "A user-friendly description of this table",
Type: schema.TypeString,
},
{
Name: "encryption_configuration_kms_key_name",
Description: "Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table The BigQuery Service Account associated with your project requires access to this encryption key",
Type: schema.TypeString,
Resolver: schema.PathResolver("EncryptionConfiguration.KmsKeyName"),
},
{
Name: "etag",
Description: "A hash of the table metadata Used to ensure there were no concurrent modifications to the resource when attempting an update Not guaranteed to change when the table contents or the fields numRows, numBytes, numLongTermBytes or lastModifiedTime change",
Type: schema.TypeString,
},
{
Name: "expiration_time",
Description: "The time when this table expires, in milliseconds since the epoch If not present, the table will persist indefinitely Expired tables will be deleted and their storage reclaimed The defaultTableExpirationMs property of the encapsulating dataset can be used to set a default expirationTime on newly created tables",
Type: schema.TypeBigInt,
},
{
Name: "external_data_configuration_autodetect",
Description: "Try to detect schema and format options automatically Any option specified explicitly will be honored",
Type: schema.TypeBool,
Resolver: schema.PathResolver("ExternalDataConfiguration.Autodetect"),
},
{
Name: "external_data_configuration_compression",
Description: "The compression type of the data source Possible values include GZIP and NONE The default value is NONE This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats",
Type: schema.TypeString,
Resolver: schema.PathResolver("ExternalDataConfiguration.Compression"),
},
{
Name: "external_data_configuration_connection_id",
Description: "Connection for external data source",
Type: schema.TypeString,
Resolver: schema.PathResolver("ExternalDataConfiguration.ConnectionId"),
},
{
Name: "external_data_configuration_ignore_unknown_values",
Description: "Indicates if BigQuery should allow extra values that are not represented in the table schema",
Type: schema.TypeBool,
Resolver: schema.PathResolver("ExternalDataConfiguration.IgnoreUnknownValues"),
},
{
Name: "external_data_configuration_max_bad_records",
Description: "The maximum number of bad records that BigQuery can ignore when reading data If the number of bad records exceeds this value, an invalid error is returned in the job result This is only valid for CSV, JSON, and Google Sheets The default value is 0, which requires that all records are valid This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("ExternalDataConfiguration.MaxBadRecords"),
},
{
Name: "external_data_configuration_schema",
Description: "The schema for the data Schema is required for CSV and JSON formats Schema is disallowed for Google Cloud Bigtable, Cloud Datastore backups, and Avro formats",
Type: schema.TypeJSON,
IgnoreInTests: true,
Resolver: resolveBigqueryDatasetTableExternalDataConfigurationSchema,
},
{
Name: "external_data_configuration_source_format",
Description: "The data format For CSV files, specify \"CSV\" For Google sheets, specify \"GOOGLE_SHEETS\" For newline-delimited JSON, specify \"NEWLINE_DELIMITED_JSON\" For Avro files, specify \"AVRO\" For Google Cloud Datastore backups, specify \"DATASTORE_BACKUP\" [Beta] For Google Cloud Bigtable, specify \"BIGTABLE\"",
Type: schema.TypeString,
Resolver: schema.PathResolver("ExternalDataConfiguration.SourceFormat"),
},
{
Name: "external_data_configuration_source_uris",
Description: "The fully-qualified URIs that point to your data in Google Cloud For Google Cloud Storage URIs: Each URI can contain one '*' wildcard character and it must come after the 'bucket' name Size limits related to load jobs apply to external data sources For Google Cloud Bigtable URIs: Exactly one URI can be specified and it has be a fully specified and valid HTTPS URL for a Google Cloud Bigtable table For Google Cloud Datastore backups, exactly one URI can be specified Also, the '*' wildcard character is not allowed",
Type: schema.TypeStringArray,
IgnoreInTests: true,
Resolver: schema.PathResolver("ExternalDataConfiguration.SourceUris"),
},
{
Name: "friendly_name",
Description: "A descriptive name for this table",
Type: schema.TypeString,
},
{
Name: "id",
Description: "An opaque ID uniquely identifying the table",
Type: schema.TypeString,
},
{
Name: "kind",
Description: "The type of the resource",
Type: schema.TypeString,
},
{
Name: "labels",
Description: "The labels associated with this table You can use these to organize and group your tables Label keys and values can be no longer than 63 characters, can only contain lowercase letters, numeric characters, underscores and dashes International characters are allowed Label values are optional Label keys must start with a letter and each label in the list must have a different key",
Type: schema.TypeJSON,
},
{
Name: "last_modified_time",
Description: "The time when this table was last modified, in milliseconds since the epoch",
Type: schema.TypeBigInt,
},
{
Name: "location",
Description: "The geographic location where the table resides This value is inherited from the dataset",
Type: schema.TypeString,
},
{
Name: "materialized_view_enable_refresh",
Description: "Enable automatic refresh of the materialized view when the base table is updated The default value is \"true\"",
Type: schema.TypeBool,
Resolver: schema.PathResolver("MaterializedView.EnableRefresh"),
},
{
Name: "materialized_view_last_refresh_time",
Description: "The time when this materialized view was last modified, in milliseconds since the epoch",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("MaterializedView.LastRefreshTime"),
},
{
Name: "materialized_view_query",
Description: "A query whose result is persisted",
Type: schema.TypeString,
Resolver: schema.PathResolver("MaterializedView.Query"),
},
{
Name: "materialized_view_refresh_interval_ms",
Description: "The maximum frequency at which this materialized view will be refreshed The default value is \"1800000\" (30 minutes)",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("MaterializedView.RefreshIntervalMs"),
},
{
Name: "model_options_labels",
Type: schema.TypeStringArray,
IgnoreInTests: true,
Resolver: schema.PathResolver("Model.ModelOptions.Labels"),
},
{
Name: "model_options_loss_type",
Type: schema.TypeString,
Resolver: schema.PathResolver("Model.ModelOptions.LossType"),
},
{
Name: "model_options_model_type",
Type: schema.TypeString,
Resolver: schema.PathResolver("Model.ModelOptions.ModelType"),
},
{
Name: "num_bytes",
Description: "The size of this table in bytes, excluding any data in the streaming buffer",
Type: schema.TypeBigInt,
},
{
Name: "num_long_term_bytes",
Description: "The number of bytes in the table that are considered \"long-term storage\"",
Type: schema.TypeBigInt,
},
{
Name: "num_physical_bytes",
Description: "The physical size of this table in bytes, excluding any data in the streaming buffer This includes compression and storage used for time travel",
Type: schema.TypeBigInt,
},
{
Name: "num_rows",
Description: "The number of rows of data in this table, excluding any data in the streaming buffer",
Type: schema.TypeBigInt,
},
{
Name: "range_partitioning_field",
Description: "The table is partitioned by this field The field must be a top-level NULLABLE/REQUIRED field The only supported type is INTEGER/INT64",
Type: schema.TypeString,
Resolver: schema.PathResolver("RangePartitioning.Field"),
},
{
Name: "range_partitioning_range_end",
Description: "The end of range partitioning, exclusive",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("RangePartitioning.Range.End"),
},
{
Name: "range_partitioning_range_interval",
Description: "The width of each interval",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("RangePartitioning.Range.Interval"),
},
{
Name: "range_partitioning_range_start",
Description: "The start of range partitioning, inclusive",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("RangePartitioning.Range.Start"),
},
{
Name: "require_partition_filter",
Description: "If set to true, queries over this table require a partition filter that can be used for partition elimination to be specified",
Type: schema.TypeBool,
},
{
Name: "schema",
Description: "Describes the schema of this table",
Type: schema.TypeJSON,
Resolver: resolveBigqueryDatasetTableSchema,
},
{
Name: "self_link",
Description: "A URL that can be used to access this resource again",
Type: schema.TypeString,
},
{
Name: "streaming_buffer_estimated_bytes",
Description: "A lower-bound estimate of the number of bytes currently in the streaming buffer",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("StreamingBuffer.EstimatedBytes"),
},
{
Name: "streaming_buffer_estimated_rows",
Description: "A lower-bound estimate of the number of rows currently in the streaming buffer",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("StreamingBuffer.EstimatedRows"),
},
{
Name: "streaming_buffer_oldest_entry_time",
Description: "Contains the timestamp of the oldest entry in the streaming buffer, in milliseconds since the epoch, if the streaming buffer is available",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("StreamingBuffer.OldestEntryTime"),
},
{
Name: "time_partitioning_expiration_ms",
Description: "Number of milliseconds for which to keep the storage for partitions in the table The storage in a partition will have an expiration time of its partition time plus this value",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("TimePartitioning.ExpirationMs"),
},
{
Name: "time_partitioning_field",
Description: "If not set, the table is partitioned by pseudo column, referenced via either '_PARTITIONTIME' as TIMESTAMP type, or '_PARTITIONDATE' as DATE type If field is specified, the table is instead partitioned by this field The field must be a top-level TIMESTAMP or DATE field Its mode must be NULLABLE or REQUIRED",
Type: schema.TypeString,
Resolver: schema.PathResolver("TimePartitioning.Field"),
},
{
Name: "time_partitioning_require_partition_filter",
Type: schema.TypeBool,
Resolver: schema.PathResolver("TimePartitioning.RequirePartitionFilter"),
},
{
Name: "time_partitioning_type",
Description: "The supported types are DAY, HOUR, MONTH, and YEAR, which will generate one partition per day, hour, month, and year, respectively When the type is not specified, the default behavior is DAY",
Type: schema.TypeString,
Resolver: schema.PathResolver("TimePartitioning.Type"),
},
{
Name: "type",
Description: "Describes the table type The following values are supported: TABLE: A normal BigQuery table VIEW: A virtual table defined by a SQL query SNAPSHOT: An immutable, read-only table that is a copy of another table MATERIALIZED_VIEW: SQL query whose result is persisted EXTERNAL: A table that references data stored in an external storage system, such as Google Cloud Storage The default value is TABLE",
Type: schema.TypeString,
},
{
Name: "view_query",
Description: "A query that BigQuery executes when the view is referenced",
Type: schema.TypeString,
Resolver: schema.PathResolver("View.Query"),
},
{
Name: "view_use_legacy_sql",
Description: "Specifies whether to use BigQuery's legacy SQL for this view The default value is true If set to false, the view will use BigQuery's standard SQL: https://cloudgooglecom/bigquery/sql-reference/ Queries and views that reference this view must use the same flag value",
Type: schema.TypeBool,
Resolver: schema.PathResolver("View.UseLegacySql"),
},
},
Relations: []*schema.Table{
{
Name: "gcp_bigquery_dataset_table_dataset_model_training_runs",
Description: "Training options used by this training run These options are mutable for subsequent training runs Default values are explicitly stored for options not specified in the input query of the first training run For subsequent training runs, any option not explicitly specified in the input query will be copied from the previous training run",
Resolver: fetchBigqueryDatasetTableDatasetModelTrainingRuns,
IgnoreInTests: true,
Columns: []schema.Column{
{
Name: "dataset_table_cq_id",
Description: "Unique ID of gcp_bigquery_dataset_tables table (FK)",
Type: schema.TypeUUID,
Resolver: schema.ParentIdResolver,
},
{
Name: "dataset_table_id",
Type: schema.TypeString,
Resolver: schema.ParentResourceFieldResolver("id"),
},
{
Name: "start_time",
Description: "Training run start time in milliseconds since the epoch",
Type: schema.TypeString,
},
{
Name: "state",
Description: "Different state applicable for a training run IN PROGRESS: Training run is in progress FAILED: Training run ended due to a non-retryable failure SUCCEEDED: Training run successfully completed CANCELLED: Training run cancelled by the user",
Type: schema.TypeString,
},
{
Name: "training_options_early_stop",
Type: schema.TypeBool,
Resolver: schema.PathResolver("TrainingOptions.EarlyStop"),
},
{
Name: "training_options_l1_reg",
Type: schema.TypeFloat,
Resolver: schema.PathResolver("TrainingOptions.L1Reg"),
},
{
Name: "training_options_l2_reg",
Type: schema.TypeFloat,
Resolver: schema.PathResolver("TrainingOptions.L2Reg"),
},
{
Name: "training_options_learn_rate",
Type: schema.TypeFloat,
Resolver: schema.PathResolver("TrainingOptions.LearnRate"),
},
{
Name: "training_options_learn_rate_strategy",
Type: schema.TypeString,
Resolver: schema.PathResolver("TrainingOptions.LearnRateStrategy"),
},
{
Name: "training_options_line_search_init_learn_rate",
Type: schema.TypeFloat,
Resolver: schema.PathResolver("TrainingOptions.LineSearchInitLearnRate"),
},
{
Name: "training_options_max_iteration",
Type: schema.TypeBigInt,
Resolver: schema.PathResolver("TrainingOptions.MaxIteration"),
},
{
Name: "training_options_min_rel_progress",
Type: schema.TypeFloat,
Resolver: schema.PathResolver("TrainingOptions.MinRelProgress"),
},
{
Name: "training_options_warm_start",
Type: schema.TypeBool,
Resolver: schema.PathResolver("TrainingOptions.WarmStart"),
},
},
},
{
Name: "gcp_bigquery_dataset_table_user_defined_functions",
Description: "This is used for defining User Defined Function (UDF) resources only when using legacy SQL",
Resolver: fetchBigqueryDatasetTableUserDefinedFunctions,
IgnoreInTests: true,
Columns: []schema.Column{
{
Name: "dataset_table_cq_id",
Description: "Unique ID of gcp_bigquery_dataset_tables table (FK)",
Type: schema.TypeUUID,
Resolver: schema.ParentIdResolver,
},
{
Name: "dataset_table_id",
Type: schema.TypeString,
Resolver: schema.ParentResourceFieldResolver("id"),
},
{
Name: "inline_code",
Description: "An inline resource that contains code for a user-defined function (UDF) Providing a inline code resource is equivalent to providing a URI for a file containing the same code",
Type: schema.TypeString,
},
{
Name: "resource_uri",
Description: "A code resource to load from a Google Cloud Storage URI (gs://bucket/path)",
Type: schema.TypeString,
},
},
},
},
}
}
// ====================================================================================================================
// Table Resolver Functions
// ====================================================================================================================
func listBigqueryDatasetTables(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error {
var sem = semaphore.NewWeighted(int64(MAX_GOROUTINES))
p := parent.Item.(*bigquery.Dataset)
c := meta.(*client.Client)
nextPageToken := ""
for {
call := c.Services.BigQuery.Tables.List(c.ProjectId, p.DatasetReference.DatasetId).Context(ctx).PageToken(nextPageToken)
list, err := c.RetryingDo(ctx, call)
if err != nil {
return diag.WrapError(err)
}
output := list.(*bigquery.TableList)
errs, ctx := errgroup.WithContext(ctx)
for _, t := range output.Tables {
if err := sem.Acquire(ctx, 1); err != nil {
return diag.WrapError(err)
}
func(t *bigquery.TableListTables) {
errs.Go(func() error {
defer sem.Release(1)
return fetchBigqueryDatasetTables(ctx, c, p, t, res)
})
}(t)
}
err = errs.Wait()
if err != nil {
return diag.WrapError(err)
}
if output.NextPageToken == "" {
break
}
nextPageToken = output.NextPageToken
}
return nil
}
func fetchBigqueryDatasetTables(ctx context.Context, c *client.Client, p *bigquery.Dataset, t *bigquery.TableListTables, res chan<- interface{}) error {
call := c.Services.BigQuery.Tables.Get(c.ProjectId, p.DatasetReference.DatasetId, t.TableReference.TableId)
item, err := c.RetryingDo(ctx, call)
if err != nil {
return diag.WrapError(err)
}
res <- item.(*bigquery.Table)
return nil
}
func resolveBigqueryDatasetTableExternalDataConfigurationSchema(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error {
p := resource.Item.(*bigquery.Table)
if p.ExternalDataConfiguration == nil || p.ExternalDataConfiguration.Schema == nil {
return nil
}
s := make(map[string]interface{})
for _, f := range p.ExternalDataConfiguration.Schema.Fields {
s[f.Name] = f.Type
}
return diag.WrapError(resource.Set(c.Name, s))
}
func resolveBigqueryDatasetTableSchema(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error {
p := resource.Item.(*bigquery.Table)
if p.Schema == nil {
return nil
}
s := make(map[string]interface{})
for _, f := range p.Schema.Fields {
s[f.Name] = f.Type
}
return diag.WrapError(resource.Set(c.Name, s))
}
func fetchBigqueryDatasetTableDatasetModelTrainingRuns(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error {
p := parent.Item.(*bigquery.Table)
if p.Model == nil {
return nil
}
res <- p.Model.TrainingRuns
return nil
}
func fetchBigqueryDatasetTableUserDefinedFunctions(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error {
p := parent.Item.(*bigquery.Table)
if p.View == nil {
return nil
}
res <- p.View.UserDefinedFunctionResources
return nil
}