diff --git a/.changelog/32528.txt b/.changelog/32528.txt new file mode 100644 index 000000000000..69ec249f2a13 --- /dev/null +++ b/.changelog/32528.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_glue_job: Add `command.runtime` attribute +``` \ No newline at end of file diff --git a/internal/service/glue/job.go b/internal/service/glue/job.go index b9d3776fc820..068f692f38ac 100644 --- a/internal/service/glue/job.go +++ b/internal/service/glue/job.go @@ -54,16 +54,22 @@ func ResourceJob() *schema.Resource { Optional: true, Default: "glueetl", }, - "script_location": { - Type: schema.TypeString, - Required: true, - }, "python_version": { Type: schema.TypeString, Optional: true, Computed: true, ValidateFunc: validation.StringInSlice([]string{"2", "3", "3.9"}, true), }, + "runtime": { + Type: schema.TypeString, + Optional: true, + Computed: true, + ValidateFunc: validation.StringInSlice([]string{"Ray2.4"}, true), + }, + "script_location": { + Type: schema.TypeString, + Required: true, + }, }, }, }, @@ -81,11 +87,6 @@ func ResourceJob() *schema.Resource { Type: schema.TypeString, Optional: true, }, - "glue_version": { - Type: schema.TypeString, - Optional: true, - Computed: true, - }, "execution_class": { Type: schema.TypeString, Optional: true, @@ -107,6 +108,11 @@ func ResourceJob() *schema.Resource { }, }, }, + "glue_version": { + Type: schema.TypeString, + Optional: true, + Computed: true, + }, "max_capacity": { Type: schema.TypeFloat, Optional: true, @@ -247,7 +253,6 @@ func resourceJobCreate(ctx context.Context, d *schema.ResourceData, meta interfa input.WorkerType = aws.String(v.(string)) } - log.Printf("[DEBUG] Creating Glue Job: %s", input) output, err := conn.CreateJobWithContext(ctx, input) if err != nil { @@ -385,7 +390,6 @@ func resourceJobUpdate(ctx context.Context, d *schema.ResourceData, meta interfa JobUpdate: jobUpdate, } - log.Printf("[DEBUG] Updating Glue Job: %s", input) _, err := conn.UpdateJobWithContext(ctx, input) if err != nil { @@ -438,6 +442,10 @@ func expandJobCommand(l []interface{}) *glue.JobCommand { jobCommand.PythonVersion = aws.String(v) } + if v, ok := m["runtime"].(string); ok && v != "" { + jobCommand.Runtime = aws.String(v) + } + return jobCommand } @@ -480,6 +488,7 @@ func flattenJobCommand(jobCommand *glue.JobCommand) []map[string]interface{} { "name": aws.StringValue(jobCommand.Name), "script_location": aws.StringValue(jobCommand.ScriptLocation), "python_version": aws.StringValue(jobCommand.PythonVersion), + "runtime": aws.StringValue(jobCommand.Runtime), } return []map[string]interface{}{m} diff --git a/internal/service/glue/job_test.go b/internal/service/glue/job_test.go index 72c49fc7bb6f..e0c46457f747 100644 --- a/internal/service/glue/job_test.go +++ b/internal/service/glue/job_test.go @@ -726,6 +726,34 @@ func TestAccGlueJob_pythonShell(t *testing.T) { }) } +func TestAccGlueJob_rayJob(t *testing.T) { + ctx := acctest.Context(t) + var job glue.Job + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_glue_job.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, glue.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckJobDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccJobConfig_rayJob(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckJobExists(ctx, resourceName, &job), + resource.TestCheckResourceAttr(resourceName, "command.#", "1"), + resource.TestCheckResourceAttr(resourceName, "command.0.script_location", "testscriptlocation"), + resource.TestCheckResourceAttr(resourceName, "command.0.name", "glueray"), + resource.TestCheckResourceAttr(resourceName, "command.0.python_version", "3.9"), + resource.TestCheckResourceAttr(resourceName, "command.0.runtime", "Ray2.4"), + resource.TestCheckResourceAttr(resourceName, "worker_type", "Z.2X"), + ), + }, + }, + }) +} + func TestAccGlueJob_maxCapacity(t *testing.T) { ctx := acctest.Context(t) var job glue.Job @@ -1198,6 +1226,27 @@ resource "aws_glue_job" "test" { `, rName, pythonVersion)) } +func testAccJobConfig_rayJob(rName string) string { + return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(` +resource "aws_glue_job" "test" { + glue_version = "4.0" + name = %[1]q + role_arn = aws_iam_role.test.arn + worker_type = "Z.2X" + number_of_workers = 10 + + command { + name = "glueray" + python_version = "3.9" + runtime = "Ray2.4" + script_location = "testscriptlocation" + } + + depends_on = [aws_iam_role_policy_attachment.test] +} +`, rName)) +} + func testAccJobConfig_maxCapacity(rName string, maxCapacity float64) string { return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(` resource "aws_glue_job" "test" { diff --git a/website/docs/r/glue_job.html.markdown b/website/docs/r/glue_job.html.markdown index b5004509ff32..bbf9f352dde9 100644 --- a/website/docs/r/glue_job.html.markdown +++ b/website/docs/r/glue_job.html.markdown @@ -27,6 +27,24 @@ resource "aws_glue_job" "example" { } ``` +### Ray Job + +```terraform +resource "aws_glue_job" "example" { + name = "example" + role_arn = aws_iam_role.example.arn + glue_version = "4.0" + worker_type = "Z.2X" + + command { + name = "glueray" + python_version = "3.9" + runtime = "Ray2.4" + script_location = "s3://${aws_s3_bucket.example.bucket}/example.py" + } +} +``` + ### Scala Job ```terraform @@ -89,7 +107,7 @@ The following arguments are supported: * `non_overridable_arguments` – (Optional) Non-overridable arguments for this job, specified as name-value pairs. * `description` – (Optional) Description of the job. * `execution_property` – (Optional) Execution property of the job. Defined below. -* `glue_version` - (Optional) The version of glue to use, for example "1.0". For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html). +* `glue_version` - (Optional) The version of glue to use, for example "1.0". Ray jobs should set this to 4.0 or greater. For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html). * `execution_class` - (Optional) Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources. Valid value: `FLEX`, `STANDARD`. * `max_capacity` – (Optional) The maximum number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. `Required` when `pythonshell` is set, accept either `0.0625` or `1.0`. Use `number_of_workers` and `worker_type` arguments instead with `glue_version` `2.0` and above. * `max_retries` – (Optional) The maximum number of times to retry this job if it fails. @@ -99,14 +117,20 @@ The following arguments are supported: * `tags` - (Optional) Key-value map of resource tags. If configured with a provider [`default_tags` configuration block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#default_tags-configuration-block) present, tags with matching keys will overwrite those defined at the provider-level. * `timeout` – (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours) for `glueetl` and `pythonshell` jobs, and null (unlimited) for `gluestreaming` jobs. * `security_configuration` - (Optional) The name of the Security Configuration to be associated with the job. -* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X. +* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs. + * For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. + * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for low volume streaming jobs. Only available for Glue version 3.0. + * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB of m emory, 128 GB disk), and provides up to 8 Ray workers based on the autoscaler. * `number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs. ### command Argument Reference -* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen. +* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, `glueray` for Ray Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen. * `script_location` - (Required) Specifies the S3 path to a script that executes a job. * `python_version` - (Optional) The Python version being used to execute a Python shell job. Allowed values are 2, 3 or 3.9. Version 3 refers to Python 3.6. +* `runtime` - (Optional) In Ray jobs, runtime is used to specify the versions of Ray, Python and additional libraries available in your environment. This field is not used in other job types. For supported runtime environment values, see [Working with Ray jobs](https://docs.aws.amazon.com/glue/latest/dg/ray-jobs-section.html#author-job-ray-runtimes) in the Glue Developer Guide. ### execution_property Argument Reference