Attempt to create long-running stacks for rollback tests (#625)

* Attempt to create long-running stacks for rollback tests Not sure if this will work, but hopeful. If successful, it'll make our flakier functional tests work. * Fix output value * Fix duplicate output key * Fix depends on logic * Ugh, get it together! * Throw verbose flag in to debug Not sure why we see such a massive delay in outputs: https://circleci.com/gh/cloudtools/stacker/2404?utm_campaign=vcs-integration-link&utm_medium=referral&utm_source=github-build-link If you look it shows: ``` $ stacker build /dev/fd/63 [2018-07-10T17:00:01] Using default AWS provider mode [2018-07-10T17:00:07] recreate-failed-interactive: submitted (creating new stack) [2018-07-10T17:01:41] recreate-failed-interactive: failed (creating new stack) [2018-07-10T17:01:41] The following steps failed: recreate-failed-interactive ``` That's 1:34 between updates, and it misses the rolled back message ``` `assert_has_line "recreate-failed-interactive: failed (rolled back new stack)"' failed ``` When I look at the events: ``` $ aws cloudformation describe-stack-events --query 'StackEvents[*].{TS:Timestamp,Type:ResourceType,Status:ResourceStatus,Resource:LogicalResourceId}' --stack-name arn:aws:cloudformation:us-east-1:459170252436:stack/cloudtools-functional-tests-2404-recreate-failed-interactive/b26b5480-8462-11e8-aac9-503aca4a5899 --output text | grep ROLL cloudtools-functional-tests-2404-recreate-failed-interactive ROLLBACK_COMPLETE 2018-07-10T17:00:49.724Z AWS::CloudFormation::Stack cloudtools-functional-tests-2404-recreate-failed-interactive ROLLBACK_IN_PROGRESS 2018-07-10T17:00:33.061Z AWS::CloudFormation::Stack ``` It shows that there was at least 16 seconds between when it went into rollback, and when it finished. I'm shocked we don't catch any of those events. Hopefully setting this to verbose will help narrow down what is happening here. * Verbose on all builds attemps
cloudtools · Aug 4, 2018 · 47d7525 · 47d7525
1 parent 342e04a
commit 47d7525
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 13 deletions.
diff --git a/stacker/tests/fixtures/mock_blueprints.py b/stacker/tests/fixtures/mock_blueprints.py
@@ -203,6 +203,62 @@ def create_template(self):
         self.template.add_resource(WaitConditionHandle("Dummy2"))
 
 
+class LongRunningDummy(Blueprint):
+    """
+    Meant to be an attempt to create a cheap blueprint that takes a little bit
+    of time to create/rollback/destroy to avoid some of the race conditions
+    we've seen in some of our functional tests.
+    """
+    VARIABLES = {
+        "Count": {
+            "type": int,
+            "description": "The # of WaitConditonHandles to create.",
+            "default": 1,
+        },
+        "BreakLast": {
+            "type": bool,
+            "description": "Whether or not to break the last WaitConditon "
+                           "by creating an invalid WaitConditionHandle.",
+            "default": True,
+        },
+        "OutputValue": {
+            "type": str,
+            "description": "The value to put in an output to allow for "
+                           "updates.",
+            "default": "DefaultOutput",
+        },
+    }
+
+    def create_template(self):
+        v = self.get_variables()
+        t = self.template
+        base_name = "Dummy"
+
+        for i in range(v["Count"]):
+            name = "%s%s" % (base_name, i)
+            last_name = None
+            if i:
+                last_name = "%s%s" % (base_name, i - 1)
+            wch = WaitConditionHandle(name)
+            if last_name is not None:
+                wch.DependsOn = last_name
+            t.add_resource(wch)
+
+        self.add_output("OutputValue", str(v["OutputValue"]))
+        self.add_output("WCHCount", str(v["Count"]))
+
+        if v["BreakLast"]:
+            t.add_resource(
+                WaitCondition(
+                    "BrokenWaitCondition",
+                    Handle=wch.Ref(),
+                    # Timeout is made deliberately large so CF rejects it
+                    Timeout=2 ** 32,
+                    Count=0
+                )
+            )
+
+
 class Broken(Blueprint):
     """
     This blueprint deliberately fails validation, so that it can be used to

diff --git a/tests/test_suite/22_stacker_build-recreate_failed_stack_non-interactive_mode.bats b/tests/test_suite/22_stacker_build-recreate_failed_stack_non-interactive_mode.bats
@@ -10,7 +10,10 @@ load ../test_helper
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: recreate-failed
-    class_path: stacker.tests.fixtures.mock_blueprints.Broken
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: true
 
 EOF
   }
@@ -20,7 +23,11 @@ EOF
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: recreate-failed
-    class_path: stacker.tests.fixtures.mock_blueprints.Dummy
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: false
+      OutputValue: GoodOutput
 
 EOF
   }
@@ -32,7 +39,7 @@ EOF
   stacker destroy --force <(good_config)
 
   # Create the initial stack. This must fail.
-  stacker build <(bad_config)
+  stacker build -v <(bad_config)
   assert "$status" -eq 1
   assert_has_line "Using default AWS provider mode"
   assert_has_line "recreate-failed: submitted (creating new stack)"

diff --git a/tests/test_suite/23_stacker_build-recreate_failed_stack_interactive_mode.bats b/tests/test_suite/23_stacker_build-recreate_failed_stack_interactive_mode.bats
@@ -10,7 +10,10 @@ load ../test_helper
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: recreate-failed-interactive
-    class_path: stacker.tests.fixtures.mock_blueprints.Broken
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: true
 
 EOF
   }
@@ -20,7 +23,11 @@ EOF
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: recreate-failed-interactive
-    class_path: stacker.tests.fixtures.mock_blueprints.Dummy
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: false
+      OutputValue: GoodOutput
 
 EOF
   }
@@ -32,7 +39,7 @@ EOF
   stacker destroy --force <(good_config)
 
   # Create the initial stack. This must fail.
-  stacker build <(bad_config)
+  stacker build -v <(bad_config)
   assert "$status" -eq 1
   assert_has_line "Using default AWS provider mode"
   assert_has_line "recreate-failed-interactive: submitted (creating new stack)"

diff --git a/tests/test_suite/24_stacker_build-handle_rollbacks_during_updates.bats b/tests/test_suite/24_stacker_build-handle_rollbacks_during_updates.bats
@@ -10,7 +10,10 @@ load ../test_helper
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: update-rollback
-    class_path: stacker.tests.fixtures.mock_blueprints.Broken
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: true
 
 EOF
   }
@@ -20,7 +23,10 @@ EOF
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: update-rollback
-    class_path: stacker.tests.fixtures.mock_blueprints.Dummy
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: false
 
 EOF
   }
@@ -30,7 +36,11 @@ EOF
 namespace: ${STACKER_NAMESPACE}
 stacks:
   - name: update-rollback
-    class_path: stacker.tests.fixtures.mock_blueprints.Dummy2
+    class_path: stacker.tests.fixtures.mock_blueprints.LongRunningDummy
+    variables:
+      Count: 10
+      BreakLast: false
+      OutputValue: UpdateFoo
 
 EOF
   }
@@ -42,21 +52,21 @@ EOF
   stacker destroy --force <(good_config)
 
   # Create the initial stack
-  stacker build <(good_config)
+  stacker build -v <(good_config)
   assert "$status" -eq 0
   assert_has_line "Using default AWS provider mode"
   assert_has_line "update-rollback: submitted (creating new stack)"
   assert_has_line "update-rollback: complete (creating new stack)"
 
   # Do a bad update and watch the rollback
-  stacker build <(bad_config)
+  stacker build -v <(bad_config)
   assert "$status" -eq 1
   assert_has_line "Using default AWS provider mode"
   assert_has_line "update-rollback: submitted (updating existing stack)"
   assert_has_line "update-rollback: failed (rolled back update)"
 
   # Do a good update so we know we've correctly waited for rollback
-  stacker build <(good_config2)
+  stacker build -v <(good_config2)
   assert "$status" -eq 0
   assert_has_line "Using default AWS provider mode"
   assert_has_line "update-rollback: submitted (updating existing stack)"

diff --git a/tests/test_suite/25_stacker_build-handle_rollbacks_in_dependent_stacks.bats b/tests/test_suite/25_stacker_build-handle_rollbacks_in_dependent_stacks.bats
@@ -26,7 +26,7 @@ EOF
   stacker destroy --force <(config)
 
   # Verify both stacks fail during creation
-  stacker build <(config)
+  stacker build -v <(config)
   assert "$status" -eq 1
   assert_has_line "Using default AWS provider mode"
   assert_has_line "dependent-rollback-parent: submitted (creating new stack)"