From 1d8052ec0dc4358b8b29831aff2eb13f78f647c1 Mon Sep 17 00:00:00 2001
From: harikrongali <81331774+harikrongali@users.noreply.github.com>
Date: Thu, 23 Sep 2021 14:02:46 -0700
Subject: [PATCH] Fix: Failed analysis to degrade rollout when multiple metrics
 are analyzed (#1535)

* fix: analysis fail for inline multi metric analysis

Signed-off-by: hari rongali <hari_rongali@intuit.com>

* fix: cleanup

Signed-off-by: hari rongali <hari_rongali@intuit.com>
---
 analysis/analysis.go                          | 19 ++++----
 test/e2e/analysis_test.go                     | 24 ++++++++++
 .../analysistemplate-fail-multiple-job.yaml   | 45 +++++++++++++++++++
 ...out-degraded-inline-multiple-analysis.yaml | 28 ++++++++++++
 4 files changed, 106 insertions(+), 10 deletions(-)
 create mode 100644 test/e2e/functional/analysistemplate-fail-multiple-job.yaml
 create mode 100644 test/e2e/functional/rollout-degraded-inline-multiple-analysis.yaml

diff --git a/analysis/analysis.go b/analysis/analysis.go
index 73aadb5eca..046e3899b8 100644
--- a/analysis/analysis.go
+++ b/analysis/analysis.go
@@ -434,20 +434,19 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
 		} else {
 			// metric hasn't started running. possible cases where some of the metrics starts with delay
 			everythingCompleted = false
-			if terminating {
-				// we have yet to take a single measurement, but have already been instructed to stop
-				log.Infof("metric assessed %s: run terminated", v1alpha1.AnalysisPhaseSuccessful)
-				return v1alpha1.AnalysisPhaseSuccessful, worstMessage
-			}
 		}
 	}
-	if !everythingCompleted {
-		return v1alpha1.AnalysisPhaseRunning, ""
-	}
-	if worstStatus == "" {
-		if terminating {
+
+	if terminating {
+		if worstStatus == "" {
+			// we have yet to take a single measurement, but have already been instructed to stop
+			log.Infof("metric assessed %s: run terminated", v1alpha1.AnalysisPhaseSuccessful)
 			return v1alpha1.AnalysisPhaseSuccessful, worstMessage
 		}
+		log.Infof("metric assessed %s: run terminated", worstStatus)
+		return worstStatus, worstMessage
+	}
+	if !everythingCompleted || worstStatus == "" {
 		return v1alpha1.AnalysisPhaseRunning, ""
 	}
 	return worstStatus, worstMessage
diff --git a/test/e2e/analysis_test.go b/test/e2e/analysis_test.go
index 8eb4f44585..c4e6ef60b5 100644
--- a/test/e2e/analysis_test.go
+++ b/test/e2e/analysis_test.go
@@ -28,6 +28,7 @@ func (s *AnalysisSuite) SetupSuite() {
 	s.ApplyManifests("@functional/analysistemplate-web-background.yaml")
 	s.ApplyManifests("@functional/analysistemplate-sleep-job.yaml")
 	s.ApplyManifests("@functional/analysistemplate-multiple-job.yaml")
+	s.ApplyManifests("@functional/analysistemplate-fail-multiple-job.yaml")
 }
 
 // convenience to generate a new service with a given name
@@ -109,6 +110,29 @@ func (s *AnalysisSuite) TestCanaryInlineMultipleAnalysis() {
 		Then().
 		ExpectAnalysisRunCount(1)
 }
+
+func (s *AnalysisSuite) TestCanaryFailInlineMultipleAnalysis() {
+	s.Given().
+		RolloutObjects("@functional/rollout-degraded-inline-multiple-analysis.yaml").
+		When().
+		ApplyManifests().
+		WaitForRolloutStatus("Healthy").
+		Then().
+		ExpectAnalysisRunCount(0).
+		When().
+		UpdateSpec().
+		WaitForRolloutStatus("Paused").
+		PromoteRollout().
+		Sleep(1*time.Second). // promoting too fast causes test to flake
+		Then().
+		ExpectRolloutStatus("Progressing").
+		When().
+		WaitForInlineAnalysisRunPhase("Failed").
+		WaitForRolloutStatus("Degraded").
+		Then().
+		ExpectRolloutStatus("Degraded")
+}
+
 // TestBlueGreenAnalysis tests blue-green with pre/post analysis and then fast-tracked rollback
 func (s *AnalysisSuite) TestBlueGreenAnalysis() {
 	original := `
diff --git a/test/e2e/functional/analysistemplate-fail-multiple-job.yaml b/test/e2e/functional/analysistemplate-fail-multiple-job.yaml
new file mode 100644
index 0000000000..94209d5915
--- /dev/null
+++ b/test/e2e/functional/analysistemplate-fail-multiple-job.yaml
@@ -0,0 +1,45 @@
+# AnalysisTemplate which sleeps for a specified duration and exits with a specified exit-code
+kind: AnalysisTemplate
+apiVersion: argoproj.io/v1alpha1
+metadata:
+  name: multiple-job-fail
+spec:
+  args:
+  - name: duration
+    value: 0s
+  - name: exit-code
+    value: "0"
+  - name: count
+    value: "1"
+  metrics:
+  - name: sleep-job
+    initialDelay: 10000s
+    count: 1
+    provider:
+      job:
+        spec:
+          template:
+            spec:
+              containers:
+              - name: sleep-job
+                image: nginx:1.19-alpine
+                command: [sh, -c, -x]
+                args: ["sleep {{args.duration}} && exit {{args.exit-code}}"]
+              restartPolicy: Never
+          backoffLimit: 0
+  - name: sleep-job-rep
+    count: 2
+    interval: 1s
+    failureLimit: 1
+    provider:
+      job:
+        spec:
+          template:
+            spec:
+              containers:
+              - name: sleep-job
+                image: nginx:1.19-alpine
+                command: [sh, -c, -x]
+                args: ["sleep {{args.duration}} && exit 1"]
+              restartPolicy: Never
+          backoffLimit: 0
\ No newline at end of file
diff --git a/test/e2e/functional/rollout-degraded-inline-multiple-analysis.yaml b/test/e2e/functional/rollout-degraded-inline-multiple-analysis.yaml
new file mode 100644
index 0000000000..f25f7c9d3d
--- /dev/null
+++ b/test/e2e/functional/rollout-degraded-inline-multiple-analysis.yaml
@@ -0,0 +1,28 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Rollout
+metadata:
+  name: rollout-inline-analysis
+spec:
+  strategy:
+    canary:
+      steps:
+      - setWeight: 10
+      - pause: {}
+      - analysis:
+          templates:
+          - templateName: multiple-job-fail
+  selector:
+    matchLabels:
+      app: rollout-inline-analysis
+  template:
+    metadata:
+      labels:
+        app: rollout-inline-analysis
+    spec:
+      containers:
+      - name: rollouts-demo
+        image: nginx:1.19-alpine
+        resources:
+          requests:
+            memory: 16Mi
+            cpu: 5m