Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add runtime to Glue Ray job command #32528

Merged
merged 7 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/32528.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_glue_job: Add `command.runtime` attribute
```
31 changes: 20 additions & 11 deletions internal/service/glue/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,22 @@ func ResourceJob() *schema.Resource {
Optional: true,
Default: "glueetl",
},
"script_location": {
Type: schema.TypeString,
Required: true,
},
"python_version": {
Type: schema.TypeString,
Optional: true,
Computed: true,
ValidateFunc: validation.StringInSlice([]string{"2", "3", "3.9"}, true),
},
"runtime": {
Type: schema.TypeString,
Optional: true,
Computed: true,
ValidateFunc: validation.StringInSlice([]string{"Ray2.4"}, true),
},
"script_location": {
Type: schema.TypeString,
Required: true,
},
},
},
},
Expand All @@ -81,11 +87,6 @@ func ResourceJob() *schema.Resource {
Type: schema.TypeString,
Optional: true,
},
"glue_version": {
Type: schema.TypeString,
Optional: true,
Computed: true,
},
"execution_class": {
Type: schema.TypeString,
Optional: true,
Expand All @@ -107,6 +108,11 @@ func ResourceJob() *schema.Resource {
},
},
},
"glue_version": {
Type: schema.TypeString,
Optional: true,
Computed: true,
},
"max_capacity": {
Type: schema.TypeFloat,
Optional: true,
Expand Down Expand Up @@ -247,7 +253,6 @@ func resourceJobCreate(ctx context.Context, d *schema.ResourceData, meta interfa
input.WorkerType = aws.String(v.(string))
}

log.Printf("[DEBUG] Creating Glue Job: %s", input)
output, err := conn.CreateJobWithContext(ctx, input)

if err != nil {
Expand Down Expand Up @@ -385,7 +390,6 @@ func resourceJobUpdate(ctx context.Context, d *schema.ResourceData, meta interfa
JobUpdate: jobUpdate,
}

log.Printf("[DEBUG] Updating Glue Job: %s", input)
_, err := conn.UpdateJobWithContext(ctx, input)

if err != nil {
Expand Down Expand Up @@ -438,6 +442,10 @@ func expandJobCommand(l []interface{}) *glue.JobCommand {
jobCommand.PythonVersion = aws.String(v)
}

if v, ok := m["runtime"].(string); ok && v != "" {
jobCommand.Runtime = aws.String(v)
}

return jobCommand
}

Expand Down Expand Up @@ -480,6 +488,7 @@ func flattenJobCommand(jobCommand *glue.JobCommand) []map[string]interface{} {
"name": aws.StringValue(jobCommand.Name),
"script_location": aws.StringValue(jobCommand.ScriptLocation),
"python_version": aws.StringValue(jobCommand.PythonVersion),
"runtime": aws.StringValue(jobCommand.Runtime),
}

return []map[string]interface{}{m}
Expand Down
49 changes: 49 additions & 0 deletions internal/service/glue/job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,34 @@ func TestAccGlueJob_pythonShell(t *testing.T) {
})
}

func TestAccGlueJob_rayJob(t *testing.T) {
ctx := acctest.Context(t)
var job glue.Job
rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix)
resourceName := "aws_glue_job.test"

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { acctest.PreCheck(ctx, t) },
ErrorCheck: acctest.ErrorCheck(t, glue.EndpointsID),
ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories,
CheckDestroy: testAccCheckJobDestroy(ctx),
Steps: []resource.TestStep{
{
Config: testAccJobConfig_rayJob(rName),
Check: resource.ComposeTestCheckFunc(
testAccCheckJobExists(ctx, resourceName, &job),
resource.TestCheckResourceAttr(resourceName, "command.#", "1"),
resource.TestCheckResourceAttr(resourceName, "command.0.script_location", "testscriptlocation"),
resource.TestCheckResourceAttr(resourceName, "command.0.name", "glueray"),
resource.TestCheckResourceAttr(resourceName, "command.0.python_version", "3.9"),
resource.TestCheckResourceAttr(resourceName, "command.0.runtime", "Ray2.4"),
resource.TestCheckResourceAttr(resourceName, "worker_type", "Z.2X"),
),
},
},
})
}

func TestAccGlueJob_maxCapacity(t *testing.T) {
ctx := acctest.Context(t)
var job glue.Job
Expand Down Expand Up @@ -1198,6 +1226,27 @@ resource "aws_glue_job" "test" {
`, rName, pythonVersion))
}

func testAccJobConfig_rayJob(rName string) string {
return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(`
resource "aws_glue_job" "test" {
glue_version = "4.0"
name = %[1]q
role_arn = aws_iam_role.test.arn
worker_type = "Z.2X"
number_of_workers = 10

command {
name = "glueray"
python_version = "3.9"
runtime = "Ray2.4"
script_location = "testscriptlocation"
}

depends_on = [aws_iam_role_policy_attachment.test]
}
`, rName))
}

func testAccJobConfig_maxCapacity(rName string, maxCapacity float64) string {
return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(`
resource "aws_glue_job" "test" {
Expand Down
30 changes: 27 additions & 3 deletions website/docs/r/glue_job.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@ resource "aws_glue_job" "example" {
}
```

### Ray Job

```terraform
resource "aws_glue_job" "example" {
name = "example"
role_arn = aws_iam_role.example.arn
glue_version = "4.0"
worker_type = "Z.2X"

command {
name = "glueray"
python_version = "3.9"
runtime = "Ray2.4"
script_location = "s3://${aws_s3_bucket.example.bucket}/example.py"
}
}
```

### Scala Job

```terraform
Expand Down Expand Up @@ -89,7 +107,7 @@ The following arguments are supported:
* `non_overridable_arguments` – (Optional) Non-overridable arguments for this job, specified as name-value pairs.
* `description` – (Optional) Description of the job.
* `execution_property` – (Optional) Execution property of the job. Defined below.
* `glue_version` - (Optional) The version of glue to use, for example "1.0". For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html).
* `glue_version` - (Optional) The version of glue to use, for example "1.0". Ray jobs should set this to 4.0 or greater. For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html).
* `execution_class` - (Optional) Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources. Valid value: `FLEX`, `STANDARD`.
* `max_capacity` – (Optional) The maximum number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. `Required` when `pythonshell` is set, accept either `0.0625` or `1.0`. Use `number_of_workers` and `worker_type` arguments instead with `glue_version` `2.0` and above.
* `max_retries` – (Optional) The maximum number of times to retry this job if it fails.
Expand All @@ -99,14 +117,20 @@ The following arguments are supported:
* `tags` - (Optional) Key-value map of resource tags. If configured with a provider [`default_tags` configuration block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#default_tags-configuration-block) present, tags with matching keys will overwrite those defined at the provider-level.
* `timeout` – (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours) for `glueetl` and `pythonshell` jobs, and null (unlimited) for `gluestreaming` jobs.
* `security_configuration` - (Optional) The name of the Security Configuration to be associated with the job.
* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X.
* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs.
* For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker.
* For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs.
* For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs.
* For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for low volume streaming jobs. Only available for Glue version 3.0.
* For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB of m emory, 128 GB disk), and provides up to 8 Ray workers based on the autoscaler.
* `number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs.

### command Argument Reference

* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen.
* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, `glueray` for Ray Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen.
* `script_location` - (Required) Specifies the S3 path to a script that executes a job.
* `python_version` - (Optional) The Python version being used to execute a Python shell job. Allowed values are 2, 3 or 3.9. Version 3 refers to Python 3.6.
* `runtime` - (Optional) In Ray jobs, runtime is used to specify the versions of Ray, Python and additional libraries available in your environment. This field is not used in other job types. For supported runtime environment values, see [Working with Ray jobs](https://docs.aws.amazon.com/glue/latest/dg/ray-jobs-section.html#author-job-ray-runtimes) in the Glue Developer Guide.

### execution_property Argument Reference

Expand Down
Loading