cloudquery · amanenk · Aug 3, 2022 · Aug 2, 2022 · Aug 3, 2022 · Aug 3, 2022
@@ -758,6 +758,8 @@ type GlueClient interface {
 	GetJobRuns(ctx context.Context, params *glue.GetJobRunsInput, optFns ...func(*glue.Options)) (*glue.GetJobRunsOutput, error)
 	GetDatabases(ctx context.Context, params *glue.GetDatabasesInput, optFns ...func(*glue.Options)) (*glue.GetDatabasesOutput, error)
 	GetTables(ctx context.Context, params *glue.GetTablesInput, optFns ...func(*glue.Options)) (*glue.GetTablesOutput, error)
+	GetMLTransforms(ctx context.Context, params *glue.GetMLTransformsInput, optFns ...func(*glue.Options)) (*glue.GetMLTransformsOutput, error)
+	GetMLTaskRuns(ctx context.Context, params *glue.GetMLTaskRunsInput, optFns ...func(*glue.Options)) (*glue.GetMLTaskRunsOutput, error)
 }
 
 //go:generate mockgen -package=mocks -destination=./mocks/kinesis.go . KinesisClient

@@ -0,0 +1,11 @@
+
+# Table: aws_glue_ml_transform_input_record_tables
+The database and table in the Glue Data Catalog that is used for input or output data
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|ml_transform_cq_id|uuid|Unique CloudQuery ID of aws_glue_ml_transforms table (FK)|
+|database_name|text|A database name in the Glue Data Catalog|
+|table_name|text|A table name in the Glue Data Catalog|
+|catalog_id|text|A unique identifier for the Glue Data Catalog|
+|connection_name|text|The name of the connection to the Glue Data Catalog|
@@ -0,0 +1,24 @@
+
+# Table: aws_glue_ml_transform_task_runs
+The sampling parameters that are associated with the machine learning transform
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|ml_transform_cq_id|uuid|Unique CloudQuery ID of aws_glue_ml_transforms table (FK)|
+|completed_on|timestamp without time zone|The last point in time that the requested task run was completed|
+|error_string|text|The list of error strings associated with this task run|
+|execution_time|bigint|The amount of time (in seconds) that the task run consumed resources|
+|last_modified_on|timestamp without time zone|The last point in time that the requested task run was updated|
+|log_group_name|text|The names of the log group for secure logging, associated with this task run|
+|export_labels_task_run_properties_output_s3_path|text|The Amazon Simple Storage Service (Amazon S3) path where you will export the labels|
+|find_matches_task_run_properties_job_id|text|The job ID for the Find Matches task run|
+|find_matches_task_run_properties_job_name|text|The name assigned to the job for the Find Matches task run|
+|find_matches_task_run_properties_job_run_id|text|The job run ID for the Find Matches task run|
+|import_labels_task_run_properties_input_s3_path|text|The Amazon Simple Storage Service (Amazon S3) path from where you will import the labels|
+|import_labels_task_run_properties_replace|boolean|Indicates whether to overwrite your existing labels|
+|labeling_set_generation_task_run_properties_output_s3_path|text|The Amazon Simple Storage Service (Amazon S3) path where you will generate the labeling set|
+|task_type|text|The type of task run|
+|started_on|timestamp without time zone|The date and time that this task run started|
+|status|text|The current status of the requested task run|
+|id|text|The unique identifier for this task run|
+|transform_id|text|The unique identifier for the transform|
@@ -0,0 +1,40 @@
+
+# Table: aws_glue_ml_transforms
+A structure for a machine learning transform
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|account_id|text|The AWS Account ID of the resource.|
+|region|text|The AWS Region of the resource.|
+|arn|text|The Amazon Resource Name (ARN) of the workflow.|
+|tags|jsonb|Resource tags|
+|created_on|timestamp without time zone|A timestamp|
+|description|text|A user-defined, long-form description text for the machine learning transform Descriptions are not guaranteed to be unique and can be changed at any time|
+|evaluation_metrics_transform_type|text|The type of machine learning transform|
+|evaluation_metrics_find_matches_metrics_area_under_pr_curve|float|The area under the precision/recall curve (AUPRC) is a single number measuring the overall quality of the transform, that is independent of the choice made for precision vs|
+|evaluation_metrics_find_matches_metrics_column_importances|jsonb|A list of ColumnImportance structures containing column importance metrics, sorted in order of descending importance|
+|evaluation_metrics_find_matches_metrics_confusion_matrix|jsonb|The confusion matrix shows you what your transform is predicting accurately and what types of errors it is making|
+|evaluation_metrics_find_matches_metrics_f1|float|The maximum F1 metric indicates the transform's accuracy between 0 and 1, where 1 is the best accuracy|
+|evaluation_metrics_find_matches_metrics_precision|float|The precision metric indicates when often your transform is correct when it predicts a match|
+|evaluation_metrics_find_matches_metrics_recall|float|The recall metric indicates that for an actual match, how often your transform predicts the match|
+|glue_version|text|This value determines which version of Glue this machine learning transform is compatible with|
+|label_count|bigint|A count identifier for the labeling files generated by Glue for this transform As you create a better transform, you can iteratively download, label, and upload the labeling file|
+|last_modified_on|timestamp without time zone|A timestamp|
+|max_capacity|float|The number of Glue data processing units (DPUs) that are allocated to task runs for this transform|
+|max_retries|bigint|The maximum number of times to retry after an MLTaskRun of the machine learning transform fails|
+|name|text|A user-defined name for the machine learning transform|
+|number_of_workers|bigint|The number of workers of a defined workerType that are allocated when a task of the transform runs|
+|parameters_transform_type|text|The type of machine learning transform|
+|parameters_find_matches_parameters_accuracy_cost_tradeoff|float|The value that is selected when tuning your transform for a balance between accuracy and cost|
+|parameters_find_matches_parameters_enforce_provided_labels|boolean|The value to switch on or off to force the output to match the provided labels from users|
+|parameters_find_matches_parameters_precision_recall_tradeoff|float|The value selected when tuning your transform for a balance between precision and recall|
+|parameters_find_matches_parameters_primary_key_column_name|text|The name of a column that uniquely identifies rows in the source table|
+|role|text|The name or Amazon Resource Name (ARN) of the IAM role with the required permissions|
+|schema|jsonb|A map of key-value pairs representing the columns and data types that this transform can run against|
+|status|text|The current status of the machine learning transform|
+|timeout|bigint|The timeout in minutes of the machine learning transform|
+|transform_encryption_user_data_encryption_mode|text|The encryption mode applied to user data|
+|transform_encryption_ml_user_data_encryption_kms_key_id|text|The ID for the customer-provided KMS key|
+|transform_encryption_task_run_security_configuration_name|text|The name of the security configuration|
+|id|text|The unique transform ID that is generated for the machine learning transform The ID is guaranteed to be unique and does not change|
+|worker_type|text|The type of predefined worker that is allocated when a task of this transform runs|
@@ -170,6 +170,7 @@ func Provider() *provider.Provider {
 			"fsx.backups":                             fsx.FsxBackups(),
 			"glue.databases":                          glue.Databases(),
 			"glue.jobs":                               glue.Jobs(),
+			"glue.ml_transforms":                      glue.MlTransforms(),
 			"glue.workflows":                          glue.Workflows(),
 			"guardduty.detectors":                     guardduty.GuarddutyDetectors(),
 			"iam.accounts":                            iam.IamAccounts(),