Skip to content

Commit

Permalink
resource/aws_emr_instance_group: Wait for RUNNING status on creation …
Browse files Browse the repository at this point in the history
…and various fixes to be region/partition agnostic

Previously in the acceptance testing:

```
--- FAIL: TestAccAWSEMRInstanceGroup_basic (74.42s)
    testing.go:640: Step 0 error: errors during apply:

        Error: Error waiting for EMR Cluster state to be "WAITING" or "RUNNING": TERMINATED_WITH_ERRORS: VALIDATION_ERROR: The requested instance type c4.large is not supported in the requested availability zone. Learn more at https://docs.aws.amazon.com/console/elasticmapreduce/ERROR_noinstancetype

--- FAIL: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (560.19s)
    testing.go:635: Step 1 error: ImportStateVerify attributes not equivalent. Difference is shown below. Top is actual, bottom is expected.

        (map[string]string) (len=1) {
         (string) (len=6) "status": (string) (len=8) "RESIZING"
        }

        (map[string]string) (len=1) {
         (string) (len=6) "status": (string) (len=12) "PROVISIONING"
        }
```

In the resource logic, we ensure the EMR Instance Group enters the RUNNING state on creation to satisfy Terraform's usual stabilization guarantees and account for EMR Instance Groups that may have failed on startup.

In the testing, we apply the same Availability Zone blacklisting as the `aws_emr_cluster` resource testing since many instance types are not available in usw2-az4. We also switch to using the aws_partition data source and remove bootstrap actions since the functionality is not required for testing the resource and breaks outside AWS Commercial, e.g. in GovCloud:

```
--- FAIL: TestAccAWSEMRInstanceGroup_basic (182.35s)
    testing.go:640: Step 0 error: errors during apply:

        Error: Error waiting for EMR Cluster state to be "WAITING" or "RUNNING": TERMINATING: BOOTSTRAP_FAILURE: Master instance (i-0f179d71c1ed4d08e) failed attempting to download bootstrap action 1 file from S3
```

Output from acceptance testing in AWS Commercial:

```
--- PASS: TestAccAWSEMRInstanceGroup_EmrClusterDisappears (635.92s)
--- PASS: TestAccAWSEMRInstanceGroup_basic (839.34s)
--- PASS: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (856.98s)
--- PASS: TestAccAWSEMRInstanceGroup_InstanceCount (860.39s)
--- PASS: TestAccAWSEMRInstanceGroup_ConfigurationsJson (924.79s)
--- PASS: TestAccAWSEMRInstanceGroup_EbsConfig_EbsOptimized (1138.23s)
--- PASS: TestAccAWSEMRInstanceGroup_BidPrice (1355.93s)
```

Output from acceptance testing in AWS GovCloud (US) (remaining test failure will require a `PreCheck`):

```
--- PASS: TestAccAWSEMRInstanceGroup_EmrClusterDisappears (617.03s)
--- PASS: TestAccAWSEMRInstanceGroup_basic (757.87s)
--- FAIL: TestAccAWSEMRInstanceGroup_BidPrice (796.55s)
    testing.go:640: Step 2 error: errors during apply:

        Error: ValidationException: Attempted to launch spot instance in an unsupported region.
        	status code: 400, request id: 537adc73-095f-4212-af9e-8f49f996d60c

          on /var/folders/w8/05f3x02n27x72g0mc2jy6_180000gp/T/tf-test595681403/main.tf line 283:
          (source code not available)

--- PASS: TestAccAWSEMRInstanceGroup_InstanceCount (824.20s)
--- PASS: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (826.08s)
--- PASS: TestAccAWSEMRInstanceGroup_ConfigurationsJson (843.25s)
--- PASS: TestAccAWSEMRInstanceGroup_EbsConfig_EbsOptimized (1055.92s)
```
  • Loading branch information
bflad committed Jan 21, 2020
1 parent 96840ca commit ce68dc5
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 42 deletions.
44 changes: 30 additions & 14 deletions aws/resource_aws_emr_instance_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ import (
"github.com/hashicorp/terraform-plugin-sdk/helper/validation"
)

const (
emrInstanceGroupCreateTimeout = 10 * time.Minute
emrInstanceGroupUpdateTimeout = 10 * time.Minute
)

func resourceAwsEMRInstanceGroup() *schema.Resource {
return &schema.Resource{
Create: resourceAwsEMRInstanceGroupCreate,
Expand Down Expand Up @@ -182,6 +187,10 @@ func resourceAwsEMRInstanceGroupCreate(d *schema.ResourceData, meta interface{})
}
d.SetId(*resp.InstanceGroupIds[0])

if err := waitForEmrInstanceGroupStateRunning(conn, d.Get("cluster_id").(string), d.Id(), emrInstanceGroupCreateTimeout); err != nil {
return fmt.Errorf("error waiting for EMR Instance Group (%s) creation: %s", d.Id(), err)
}

return resourceAwsEMRInstanceGroupRead(d, meta)
}

Expand Down Expand Up @@ -309,20 +318,7 @@ func resourceAwsEMRInstanceGroupUpdate(d *schema.ResourceData, meta interface{})
return fmt.Errorf("error modifying EMR Instance Group (%s): %s", d.Id(), err)
}

stateConf := &resource.StateChangeConf{
Pending: []string{
emr.InstanceGroupStateBootstrapping,
emr.InstanceGroupStateProvisioning,
emr.InstanceGroupStateResizing,
},
Target: []string{emr.InstanceGroupStateRunning},
Refresh: instanceGroupStateRefresh(conn, d.Get("cluster_id").(string), d.Id()),
Timeout: 10 * time.Minute,
Delay: 10 * time.Second,
MinTimeout: 3 * time.Second,
}

if _, err := stateConf.WaitForState(); err != nil {
if err := waitForEmrInstanceGroupStateRunning(conn, d.Get("cluster_id").(string), d.Id(), emrInstanceGroupUpdateTimeout); err != nil {
return fmt.Errorf("error waiting for EMR Instance Group (%s) modification: %s", d.Id(), err)
}
}
Expand Down Expand Up @@ -486,3 +482,23 @@ func marshalWithoutNil(v interface{}) ([]byte, error) {

return json.Marshal(cleanRules)
}

func waitForEmrInstanceGroupStateRunning(conn *emr.EMR, clusterID string, instanceGroupID string, timeout time.Duration) error {
stateConf := &resource.StateChangeConf{
Pending: []string{
emr.InstanceGroupStateBootstrapping,
emr.InstanceGroupStateProvisioning,
emr.InstanceGroupStateReconfiguring,
emr.InstanceGroupStateResizing,
},
Target: []string{emr.InstanceGroupStateRunning},
Refresh: instanceGroupStateRefresh(conn, clusterID, instanceGroupID),
Timeout: timeout,
Delay: 10 * time.Second,
MinTimeout: 3 * time.Second,
}

_, err := stateConf.WaitForState()

return err
}
59 changes: 31 additions & 28 deletions aws/resource_aws_emr_instance_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,14 @@ func testAccAWSEMRInstanceGroupRecreated(t *testing.T, before, after *emr.Instan
}

const testAccAWSEmrInstanceGroupBase = `
data "aws_availability_zones" "available" {
# Many instance types are not available in this availability zone
blacklisted_zone_ids = ["usw2-az4"]
state = "available"
}
data "aws_partition" "current" {}
resource "aws_security_group" "allow_all" {
name = "allow_all"
description = "Allow all inbound traffic"
Expand Down Expand Up @@ -374,8 +382,9 @@ resource "aws_vpc" "main" {
}
resource "aws_subnet" "main" {
vpc_id = "${aws_vpc.main.id}"
cidr_block = "168.31.0.0/20"
availability_zone = data.aws_availability_zones.available.names[0]
cidr_block = "168.31.0.0/20"
vpc_id = aws_vpc.main.id
}
resource "aws_internet_gateway" "gw" {
Expand All @@ -398,37 +407,31 @@ resource "aws_main_route_table_association" "a" {
## EMR Cluster Configuration
resource "aws_emr_cluster" "tf-test-cluster" {
name = "tf-test-emr-%[1]d"
release_label = "emr-5.26.0"
applications = ["Spark"]
applications = ["Spark"]
autoscaling_role = aws_iam_role.emr-autoscaling-role.arn
configurations = "test-fixtures/emr_configurations.json"
keep_job_flow_alive_when_no_steps = true
name = "tf-test-emr-%[1]d"
release_label = "emr-5.26.0"
service_role = aws_iam_role.iam_emr_default_role.arn
ec2_attributes {
subnet_id = "${aws_subnet.main.id}"
emr_managed_master_security_group = "${aws_security_group.allow_all.id}"
emr_managed_slave_security_group = "${aws_security_group.allow_all.id}"
instance_profile = "${aws_iam_instance_profile.emr_profile.arn}"
subnet_id = aws_subnet.main.id
emr_managed_master_security_group = aws_security_group.allow_all.id
emr_managed_slave_security_group = aws_security_group.allow_all.id
instance_profile = aws_iam_instance_profile.emr_profile.arn
}
master_instance_group {
instance_type = "c4.large"
}
instance_type = "c4.large"
}
core_instance_group {
instance_type = "c4.large"
instance_count = 2
}
bootstrap_action {
path = "s3://elasticmapreduce/bootstrap-actions/run-if"
name = "runif"
args = ["instance.isMaster=true", "echo running on master node"]
instance_type = "c4.large"
instance_count = 2
}
configurations = "test-fixtures/emr_configurations.json"
service_role = "${aws_iam_role.iam_emr_default_role.arn}"
autoscaling_role = "${aws_iam_role.emr-autoscaling-role.arn}"
depends_on = ["aws_internet_gateway.gw"]
depends_on = [aws_internet_gateway.gw]
}
Expand All @@ -446,7 +449,7 @@ resource "aws_iam_role" "iam_emr_default_role" {
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "elasticmapreduce.amazonaws.com"
"Service": "elasticmapreduce.${data.aws_partition.current.dns_suffix}"
},
"Action": "sts:AssumeRole"
}
Expand Down Expand Up @@ -541,7 +544,7 @@ resource "aws_iam_role" "iam_emr_profile_role" {
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
"Service": "ec2.${data.aws_partition.current.dns_suffix}"
},
"Action": "sts:AssumeRole"
}
Expand Down Expand Up @@ -610,14 +613,14 @@ data "aws_iam_policy_document" "emr-autoscaling-role-policy" {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["elasticmapreduce.amazonaws.com","application-autoscaling.amazonaws.com"]
identifiers = ["elasticmapreduce.${data.aws_partition.current.dns_suffix}","application-autoscaling.${data.aws_partition.current.dns_suffix}"]
}
}
}
resource "aws_iam_role_policy_attachment" "emr-autoscaling-role" {
role = "${aws_iam_role.emr-autoscaling-role.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole"
policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole"
}
`

Expand Down

0 comments on commit ce68dc5

Please sign in to comment.