From ce68dc54154800888474452e34c178c04e326211 Mon Sep 17 00:00:00 2001
From: Brian Flad <bflad417@gmail.com>
Date: Tue, 21 Jan 2020 09:10:47 -0500
Subject: [PATCH] resource/aws_emr_instance_group: Wait for RUNNING status on
 creation and various fixes to be region/partition agnostic

Previously in the acceptance testing:

```
--- FAIL: TestAccAWSEMRInstanceGroup_basic (74.42s)
    testing.go:640: Step 0 error: errors during apply:

        Error: Error waiting for EMR Cluster state to be "WAITING" or "RUNNING": TERMINATED_WITH_ERRORS: VALIDATION_ERROR: The requested instance type c4.large is not supported in the requested availability zone. Learn more at https://docs.aws.amazon.com/console/elasticmapreduce/ERROR_noinstancetype

--- FAIL: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (560.19s)
    testing.go:635: Step 1 error: ImportStateVerify attributes not equivalent. Difference is shown below. Top is actual, bottom is expected.

        (map[string]string) (len=1) {
         (string) (len=6) "status": (string) (len=8) "RESIZING"
        }

        (map[string]string) (len=1) {
         (string) (len=6) "status": (string) (len=12) "PROVISIONING"
        }
```

In the resource logic, we ensure the EMR Instance Group enters the RUNNING state on creation to satisfy Terraform's usual stabilization guarantees and account for EMR Instance Groups that may have failed on startup.

In the testing, we apply the same Availability Zone blacklisting as the `aws_emr_cluster` resource testing since many instance types are not available in usw2-az4. We also switch to using the aws_partition data source and remove bootstrap actions since the functionality is not required for testing the resource and breaks outside AWS Commercial, e.g. in GovCloud:

```
--- FAIL: TestAccAWSEMRInstanceGroup_basic (182.35s)
    testing.go:640: Step 0 error: errors during apply:

        Error: Error waiting for EMR Cluster state to be "WAITING" or "RUNNING": TERMINATING: BOOTSTRAP_FAILURE: Master instance (i-0f179d71c1ed4d08e) failed attempting to download bootstrap action 1 file from S3
```

Output from acceptance testing in AWS Commercial:

```
--- PASS: TestAccAWSEMRInstanceGroup_EmrClusterDisappears (635.92s)
--- PASS: TestAccAWSEMRInstanceGroup_basic (839.34s)
--- PASS: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (856.98s)
--- PASS: TestAccAWSEMRInstanceGroup_InstanceCount (860.39s)
--- PASS: TestAccAWSEMRInstanceGroup_ConfigurationsJson (924.79s)
--- PASS: TestAccAWSEMRInstanceGroup_EbsConfig_EbsOptimized (1138.23s)
--- PASS: TestAccAWSEMRInstanceGroup_BidPrice (1355.93s)
```

Output from acceptance testing in AWS GovCloud (US) (remaining test failure will require a `PreCheck`):

```
--- PASS: TestAccAWSEMRInstanceGroup_EmrClusterDisappears (617.03s)
--- PASS: TestAccAWSEMRInstanceGroup_basic (757.87s)
--- FAIL: TestAccAWSEMRInstanceGroup_BidPrice (796.55s)
    testing.go:640: Step 2 error: errors during apply:

        Error: ValidationException: Attempted to launch spot instance in an unsupported region.
        	status code: 400, request id: 537adc73-095f-4212-af9e-8f49f996d60c

          on /var/folders/w8/05f3x02n27x72g0mc2jy6_180000gp/T/tf-test595681403/main.tf line 283:
          (source code not available)

--- PASS: TestAccAWSEMRInstanceGroup_InstanceCount (824.20s)
--- PASS: TestAccAWSEMRInstanceGroup_AutoScalingPolicy (826.08s)
--- PASS: TestAccAWSEMRInstanceGroup_ConfigurationsJson (843.25s)
--- PASS: TestAccAWSEMRInstanceGroup_EbsConfig_EbsOptimized (1055.92s)
```
---
 aws/resource_aws_emr_instance_group.go      | 44 ++++++++++-----
 aws/resource_aws_emr_instance_group_test.go | 59 +++++++++++----------
 2 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/aws/resource_aws_emr_instance_group.go b/aws/resource_aws_emr_instance_group.go
index d91b097c194e..172c3fc2b4c5 100644
--- a/aws/resource_aws_emr_instance_group.go
+++ b/aws/resource_aws_emr_instance_group.go
@@ -15,6 +15,11 @@ import (
 	"github.com/hashicorp/terraform-plugin-sdk/helper/validation"
 )
 
+const (
+	emrInstanceGroupCreateTimeout = 10 * time.Minute
+	emrInstanceGroupUpdateTimeout = 10 * time.Minute
+)
+
 func resourceAwsEMRInstanceGroup() *schema.Resource {
 	return &schema.Resource{
 		Create: resourceAwsEMRInstanceGroupCreate,
@@ -182,6 +187,10 @@ func resourceAwsEMRInstanceGroupCreate(d *schema.ResourceData, meta interface{})
 	}
 	d.SetId(*resp.InstanceGroupIds[0])
 
+	if err := waitForEmrInstanceGroupStateRunning(conn, d.Get("cluster_id").(string), d.Id(), emrInstanceGroupCreateTimeout); err != nil {
+		return fmt.Errorf("error waiting for EMR Instance Group (%s) creation: %s", d.Id(), err)
+	}
+
 	return resourceAwsEMRInstanceGroupRead(d, meta)
 }
 
@@ -309,20 +318,7 @@ func resourceAwsEMRInstanceGroupUpdate(d *schema.ResourceData, meta interface{})
 			return fmt.Errorf("error modifying EMR Instance Group (%s): %s", d.Id(), err)
 		}
 
-		stateConf := &resource.StateChangeConf{
-			Pending: []string{
-				emr.InstanceGroupStateBootstrapping,
-				emr.InstanceGroupStateProvisioning,
-				emr.InstanceGroupStateResizing,
-			},
-			Target:     []string{emr.InstanceGroupStateRunning},
-			Refresh:    instanceGroupStateRefresh(conn, d.Get("cluster_id").(string), d.Id()),
-			Timeout:    10 * time.Minute,
-			Delay:      10 * time.Second,
-			MinTimeout: 3 * time.Second,
-		}
-
-		if _, err := stateConf.WaitForState(); err != nil {
+		if err := waitForEmrInstanceGroupStateRunning(conn, d.Get("cluster_id").(string), d.Id(), emrInstanceGroupUpdateTimeout); err != nil {
 			return fmt.Errorf("error waiting for EMR Instance Group (%s) modification: %s", d.Id(), err)
 		}
 	}
@@ -486,3 +482,23 @@ func marshalWithoutNil(v interface{}) ([]byte, error) {
 
 	return json.Marshal(cleanRules)
 }
+
+func waitForEmrInstanceGroupStateRunning(conn *emr.EMR, clusterID string, instanceGroupID string, timeout time.Duration) error {
+	stateConf := &resource.StateChangeConf{
+		Pending: []string{
+			emr.InstanceGroupStateBootstrapping,
+			emr.InstanceGroupStateProvisioning,
+			emr.InstanceGroupStateReconfiguring,
+			emr.InstanceGroupStateResizing,
+		},
+		Target:     []string{emr.InstanceGroupStateRunning},
+		Refresh:    instanceGroupStateRefresh(conn, clusterID, instanceGroupID),
+		Timeout:    timeout,
+		Delay:      10 * time.Second,
+		MinTimeout: 3 * time.Second,
+	}
+
+	_, err := stateConf.WaitForState()
+
+	return err
+}
diff --git a/aws/resource_aws_emr_instance_group_test.go b/aws/resource_aws_emr_instance_group_test.go
index 16a6101328c2..7de2a8a33716 100644
--- a/aws/resource_aws_emr_instance_group_test.go
+++ b/aws/resource_aws_emr_instance_group_test.go
@@ -342,6 +342,14 @@ func testAccAWSEMRInstanceGroupRecreated(t *testing.T, before, after *emr.Instan
 }
 
 const testAccAWSEmrInstanceGroupBase = `
+data "aws_availability_zones" "available" {
+  # Many instance types are not available in this availability zone
+  blacklisted_zone_ids = ["usw2-az4"]
+  state                = "available"
+}
+
+data "aws_partition" "current" {}
+
 resource "aws_security_group" "allow_all" {
   name        = "allow_all"
   description = "Allow all inbound traffic"
@@ -374,8 +382,9 @@ resource "aws_vpc" "main" {
 }
 
 resource "aws_subnet" "main" {
-  vpc_id     = "${aws_vpc.main.id}"
-  cidr_block = "168.31.0.0/20"
+  availability_zone = data.aws_availability_zones.available.names[0]
+  cidr_block        = "168.31.0.0/20"
+  vpc_id            = aws_vpc.main.id
 }
 
 resource "aws_internet_gateway" "gw" {
@@ -398,37 +407,31 @@ resource "aws_main_route_table_association" "a" {
 
 ## EMR Cluster Configuration
 resource "aws_emr_cluster" "tf-test-cluster" {
-  name          = "tf-test-emr-%[1]d"
-  release_label = "emr-5.26.0"
-  applications  = ["Spark"]
+  applications                      = ["Spark"]
+  autoscaling_role                  = aws_iam_role.emr-autoscaling-role.arn
+  configurations                    = "test-fixtures/emr_configurations.json"
+  keep_job_flow_alive_when_no_steps = true
+  name                              = "tf-test-emr-%[1]d"
+  release_label                     = "emr-5.26.0"
+  service_role                      = aws_iam_role.iam_emr_default_role.arn
 
   ec2_attributes {
-    subnet_id                         = "${aws_subnet.main.id}"
-    emr_managed_master_security_group = "${aws_security_group.allow_all.id}"
-    emr_managed_slave_security_group  = "${aws_security_group.allow_all.id}"
-    instance_profile                  = "${aws_iam_instance_profile.emr_profile.arn}"
+    subnet_id                         = aws_subnet.main.id
+    emr_managed_master_security_group = aws_security_group.allow_all.id
+    emr_managed_slave_security_group  = aws_security_group.allow_all.id
+    instance_profile                  = aws_iam_instance_profile.emr_profile.arn
   }
 
   master_instance_group {
-		instance_type = "c4.large"
-	}
+    instance_type = "c4.large"
+  }
 
   core_instance_group {
-		instance_type = "c4.large"
-		instance_count = 2
-	}
-
-  bootstrap_action {
-    path = "s3://elasticmapreduce/bootstrap-actions/run-if"
-    name = "runif"
-    args = ["instance.isMaster=true", "echo running on master node"]
+    instance_type = "c4.large"
+    instance_count = 2
   }
 
-  configurations = "test-fixtures/emr_configurations.json"
-  service_role = "${aws_iam_role.iam_emr_default_role.arn}"
-  autoscaling_role = "${aws_iam_role.emr-autoscaling-role.arn}"
-
-  depends_on = ["aws_internet_gateway.gw"]
+  depends_on = [aws_internet_gateway.gw]
 }
 
 
@@ -446,7 +449,7 @@ resource "aws_iam_role" "iam_emr_default_role" {
       "Sid": "",
       "Effect": "Allow",
       "Principal": {
-        "Service": "elasticmapreduce.amazonaws.com"
+        "Service": "elasticmapreduce.${data.aws_partition.current.dns_suffix}"
       },
       "Action": "sts:AssumeRole"
     }
@@ -541,7 +544,7 @@ resource "aws_iam_role" "iam_emr_profile_role" {
       "Sid": "",
       "Effect": "Allow",
       "Principal": {
-        "Service": "ec2.amazonaws.com"
+        "Service": "ec2.${data.aws_partition.current.dns_suffix}"
       },
       "Action": "sts:AssumeRole"
     }
@@ -610,14 +613,14 @@ data "aws_iam_policy_document" "emr-autoscaling-role-policy" {
     actions = ["sts:AssumeRole"]
     principals {
       type        = "Service"
-      identifiers = ["elasticmapreduce.amazonaws.com","application-autoscaling.amazonaws.com"]
+      identifiers = ["elasticmapreduce.${data.aws_partition.current.dns_suffix}","application-autoscaling.${data.aws_partition.current.dns_suffix}"]
     }
   }
 }
 
 resource "aws_iam_role_policy_attachment" "emr-autoscaling-role" {
   role       = "${aws_iam_role.emr-autoscaling-role.name}"
-  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole"
+  policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/service-role/AmazonElasticMapReduceforAutoScalingRole"
 }
 `