From 159b41981a53e0a72da298b5a6c33320c78bc480 Mon Sep 17 00:00:00 2001
From: Jesse Suen <jesse_suen@intuit.com>
Date: Tue, 3 Nov 2020 03:16:20 -0800
Subject: [PATCH] fix: background analysis refs were not verified. requeue
 InvalidSpec rollouts

Signed-off-by: Jesse Suen <jesse_suen@intuit.com>
---
 pkg/apis/rollouts/validation/validation.go    | 14 ++--
 .../validation/validation_references.go       | 23 +++---
 .../validation/validation_references_test.go  | 15 +++-
 rollout/analysis_test.go                      | 81 +++++--------------
 rollout/context.go                            |  5 ++
 rollout/controller.go                         | 22 ++---
 rollout/controller_test.go                    |  5 +-
 test/e2e/functional_test.go                   | 56 +++++++++++++
 test/fixtures/then.go                         |  6 ++
 9 files changed, 136 insertions(+), 91 deletions(-)

diff --git a/pkg/apis/rollouts/validation/validation.go b/pkg/apis/rollouts/validation/validation.go
index ffd1b3f8f9..d4265a7800 100644
--- a/pkg/apis/rollouts/validation/validation.go
+++ b/pkg/apis/rollouts/validation/validation.go
@@ -224,14 +224,12 @@ func ValidateRolloutStrategyCanary(rollout *v1alpha1.Rollout, fldPath *field.Pat
 			}
 		}
 
-		if analysisRunArgs != nil {
-			for _, arg := range analysisRunArgs {
-				if arg.ValueFrom != nil {
-					if arg.ValueFrom.FieldRef != nil {
-						_, err := fieldpath.ExtractFieldPathAsString(rollout, arg.ValueFrom.FieldRef.FieldPath)
-						if err != nil {
-							allErrs = append(allErrs, field.Invalid(stepFldPath.Child("analyses"), analysisRunArgs, InvalidAnalysisArgsMessage))
-						}
+		for _, arg := range analysisRunArgs {
+			if arg.ValueFrom != nil {
+				if arg.ValueFrom.FieldRef != nil {
+					_, err := fieldpath.ExtractFieldPathAsString(rollout, arg.ValueFrom.FieldRef.FieldPath)
+					if err != nil {
+						allErrs = append(allErrs, field.Invalid(stepFldPath.Child("analyses"), analysisRunArgs, InvalidAnalysisArgsMessage))
 					}
 				}
 			}
diff --git a/pkg/apis/rollouts/validation/validation_references.go b/pkg/apis/rollouts/validation/validation_references.go
index 56d6afac64..4020d47d17 100644
--- a/pkg/apis/rollouts/validation/validation_references.go
+++ b/pkg/apis/rollouts/validation/validation_references.go
@@ -24,7 +24,8 @@ type AnalysisTemplateType string
 const (
 	PrePromotionAnalysis  AnalysisTemplateType = "PrePromotionAnalysis"
 	PostPromotionAnalysis AnalysisTemplateType = "PostPromotionAnalysis"
-	CanaryStep            AnalysisTemplateType = "CanaryStep"
+	InlineAnalysis        AnalysisTemplateType = "InlineAnalysis"
+	BackgroundAnalysis    AnalysisTemplateType = "BackgroundAnalysis"
 )
 
 type AnalysisTemplateWithType struct {
@@ -32,7 +33,7 @@ type AnalysisTemplateWithType struct {
 	ClusterAnalysisTemplate *v1alpha1.ClusterAnalysisTemplate
 	TemplateType            AnalysisTemplateType
 	AnalysisIndex           int
-	// Used only for CanaryStep
+	// Used only for InlineAnalysis
 	CanaryStepIndex int
 }
 
@@ -57,7 +58,7 @@ type ReferencedResources struct {
 	VirtualServices          []unstructured.Unstructured
 }
 
-func ValidateRolloutReferencedResources(rollout *v1alpha1.Rollout, referencedResources ReferencedResources) field.ErrorList { //field.ErrorList {
+func ValidateRolloutReferencedResources(rollout *v1alpha1.Rollout, referencedResources ReferencedResources) field.ErrorList {
 	allErrs := field.ErrorList{}
 	for _, service := range referencedResources.ServiceWithType {
 		allErrs = append(allErrs, ValidateService(service, rollout)...)
@@ -104,11 +105,13 @@ func ValidateAnalysisTemplateWithType(template AnalysisTemplateWithType) field.E
 	} else if template.AnalysisTemplate != nil {
 		templateName, templateSpec = template.AnalysisTemplate.Name, template.AnalysisTemplate.Spec
 	}
-	for _, metric := range templateSpec.Metrics {
-		effectiveCount := metric.EffectiveCount()
-		if effectiveCount == nil {
-			msg := fmt.Sprintf("AnalysisTemplate %s has metric %s which runs indefinitely", templateName, metric.Name)
-			allErrs = append(allErrs, field.Invalid(fldPath, templateName, msg))
+	if template.TemplateType != BackgroundAnalysis {
+		for _, metric := range templateSpec.Metrics {
+			effectiveCount := metric.EffectiveCount()
+			if effectiveCount == nil {
+				msg := fmt.Sprintf("AnalysisTemplate %s has metric %s which runs indefinitely", templateName, metric.Name)
+				allErrs = append(allErrs, field.Invalid(fldPath, templateName, msg))
+			}
 		}
 	}
 	return allErrs
@@ -188,8 +191,10 @@ func GetAnalysisTemplateWithTypeFieldPath(templateType AnalysisTemplateType, ana
 		fldPath = fldPath.Child("blueGreen", "prePromotionAnalysis", "templates")
 	case PostPromotionAnalysis:
 		fldPath = fldPath.Child("blueGreen", "postPromotionAnalysis", "templates")
-	case CanaryStep:
+	case InlineAnalysis:
 		fldPath = fldPath.Child("canary", "steps").Index(canaryStepIndex).Child("analysis", "templates")
+	case BackgroundAnalysis:
+		fldPath = fldPath.Child("canary", "analysis", "templates")
 	default:
 		// No path specified
 		return nil
diff --git a/pkg/apis/rollouts/validation/validation_references_test.go b/pkg/apis/rollouts/validation/validation_references_test.go
index 4f85bc752e..c546706235 100644
--- a/pkg/apis/rollouts/validation/validation_references_test.go
+++ b/pkg/apis/rollouts/validation/validation_references_test.go
@@ -84,7 +84,7 @@ func getAnalysisTemplateWithType() AnalysisTemplateWithType {
 				}},
 			},
 		},
-		TemplateType:    CanaryStep,
+		TemplateType:    InlineAnalysis,
 		AnalysisIndex:   0,
 		CanaryStepIndex: 0,
 	}
@@ -160,7 +160,7 @@ func TestValidateAnalysisTemplateWithType(t *testing.T) {
 		assert.Empty(t, allErrs)
 	})
 
-	t.Run("validate analysisTemplate - failure", func(t *testing.T) {
+	t.Run("validate inline analysisTemplate - failure", func(t *testing.T) {
 		template := getAnalysisTemplateWithType()
 		template.AnalysisTemplate.Spec.Metrics[0].Count = 0
 		allErrs := ValidateAnalysisTemplateWithType(template)
@@ -168,6 +168,15 @@ func TestValidateAnalysisTemplateWithType(t *testing.T) {
 		expectedError := field.Invalid(GetAnalysisTemplateWithTypeFieldPath(template.TemplateType, template.AnalysisIndex, template.CanaryStepIndex), template.AnalysisTemplate.Name, "AnalysisTemplate analysis-template-name has metric metric-name which runs indefinitely")
 		assert.Equal(t, expectedError.Error(), allErrs[0].Error())
 	})
+
+	// verify background analysis does not care about a metric that runs indefinitely
+	t.Run("validate background analysisTemplate - success", func(t *testing.T) {
+		template := getAnalysisTemplateWithType()
+		template.TemplateType = BackgroundAnalysis
+		template.AnalysisTemplate.Spec.Metrics[0].Count = 0
+		allErrs := ValidateAnalysisTemplateWithType(template)
+		assert.Empty(t, allErrs)
+	})
 }
 
 func TestValidateIngress(t *testing.T) {
@@ -256,7 +265,7 @@ func TestGetAnalysisTemplateWithTypeFieldPath(t *testing.T) {
 	})
 
 	t.Run("get fieldPath for analysisTemplateType CanaryStep", func(t *testing.T) {
-		fldPath := GetAnalysisTemplateWithTypeFieldPath(CanaryStep, 0, 0)
+		fldPath := GetAnalysisTemplateWithTypeFieldPath(InlineAnalysis, 0, 0)
 		expectedFldPath := field.NewPath("spec", "strategy", "canary", "steps").Index(0).Child("analysis", "templates").Index(0).Child("templateName")
 		assert.Equal(t, expectedFldPath.String(), fldPath.String())
 	})
diff --git a/rollout/analysis_test.go b/rollout/analysis_test.go
index 334f3c6438..fc8e62a622 100644
--- a/rollout/analysis_test.go
+++ b/rollout/analysis_test.go
@@ -1,12 +1,14 @@
 package rollout
 
 import (
+	"encoding/json"
 	"fmt"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/assert"
 
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/kubernetes/pkg/controller"
@@ -285,17 +287,12 @@ func TestCreateBackgroundAnalysisRunWithClusterTemplates(t *testing.T) {
 	assert.Equal(t, calculatePatch(r2, fmt.Sprintf(expectedPatch, expectedArName, expectedArName)), patch)
 }
 
-func TestCreateBackgroundAnalysisRunErrorWithMissingClusterTemplates(t *testing.T) {
+func TestInvalidSpecMissingClusterTemplatesBackgroundAnalysis(t *testing.T) {
 	f := newFixture(t)
 	defer f.Close()
 
-	steps := []v1alpha1.CanaryStep{{
-		SetWeight: int32Ptr(10),
-	}}
-	cat := clusterAnalysisTemplate("bar")
-	r1 := newCanaryRollout("foo", 10, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
-	r2 := bumpVersion(r1)
-	r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
+	r := newCanaryRollout("foo", 10, nil, nil, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
+	r.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
 		RolloutAnalysis: v1alpha1.RolloutAnalysis{
 			Templates: []v1alpha1.RolloutAnalysisTemplate{{
 				TemplateName: "missing",
@@ -303,25 +300,24 @@ func TestCreateBackgroundAnalysisRunErrorWithMissingClusterTemplates(t *testing.
 			}},
 		},
 	}
+	f.rolloutLister = append(f.rolloutLister, r)
+	f.objects = append(f.objects, r)
 
-	rs1 := newReplicaSetWithStatus(r1, 10, 10)
-	rs2 := newReplicaSetWithStatus(r2, 0, 0)
-	f.kubeobjects = append(f.kubeobjects, rs1, rs2)
-	f.replicaSetLister = append(f.replicaSetLister, rs1, rs2)
-	rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
-
-	r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 10, 0, 10, false)
-	progressingCondition, _ := newProgressingCondition(conditions.ReplicaSetUpdatedReason, rs2, "")
-	conditions.SetRolloutCondition(&r2.Status, progressingCondition)
-	availableCondition, _ := newAvailableCondition(true)
-	conditions.SetRolloutCondition(&r2.Status, availableCondition)
+	patchIndex := f.expectPatchRolloutAction(r)
+	f.run(getKey(r, t))
 
-	f.rolloutLister = append(f.rolloutLister, r2)
-	f.clusterAnalysisTemplateLister = append(f.clusterAnalysisTemplateLister, cat)
-	f.objects = append(f.objects, r2, cat)
+	expectedPatchWithoutSub := `{
+		"status": {
+			"conditions": [%s,%s]
+		}
+	}`
+	_, progressingCond := newProgressingCondition(conditions.ReplicaSetUpdatedReason, r, "")
+	invalidSpecCond := conditions.NewRolloutCondition(v1alpha1.InvalidSpec, corev1.ConditionTrue, conditions.InvalidSpecReason, "The Rollout \"foo\" is invalid: spec.strategy.canary.analysis.templates[0].templateName: Invalid value: \"missing\": clusteranalysistemplate.argoproj.io \"missing\" not found")
+	invalidSpecBytes, _ := json.Marshal(invalidSpecCond)
+	expectedPatch := fmt.Sprintf(expectedPatchWithoutSub, progressingCond, string(invalidSpecBytes))
 
-	c, i, k8sI := f.newController(noResyncPeriodFunc)
-	f.runController(getKey(r2, t), true, true, c, i, k8sI)
+	patch := f.getPatchedRollout(patchIndex)
+	assert.Equal(t, calculatePatch(r, expectedPatch), patch)
 }
 
 func TestCreateBackgroundAnalysisRunWithClusterTemplatesAndTemplate(t *testing.T) {
@@ -646,43 +642,6 @@ func TestFailCreateBackgroundAnalysisRunIfInvalidTemplateRef(t *testing.T) {
 	f.runExpectError(getKey(r2, t), true)
 }
 
-func TestFailCreateBackgroundAnalysisRunIfInvalidTemplateRefWithTemplates(t *testing.T) {
-	f := newFixture(t)
-	defer f.Close()
-
-	steps := []v1alpha1.CanaryStep{{
-		SetWeight: pointer.Int32Ptr(10),
-	}}
-
-	r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
-	r2 := bumpVersion(r1)
-	r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
-		RolloutAnalysis: v1alpha1.RolloutAnalysis{
-			Templates: []v1alpha1.RolloutAnalysisTemplate{{
-				TemplateName: "invalid-template-ref",
-			}},
-		},
-	}
-
-	rs1 := newReplicaSetWithStatus(r1, 1, 1)
-	rs2 := newReplicaSetWithStatus(r2, 0, 0)
-	f.kubeobjects = append(f.kubeobjects, rs1, rs2)
-	f.replicaSetLister = append(f.replicaSetLister, rs1, rs2)
-	rs1PodHash := rs1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
-
-	r2 = updateCanaryRolloutStatus(r2, rs1PodHash, 1, 0, 1, false)
-
-	progressingCondition, _ := newProgressingCondition(conditions.ReplicaSetUpdatedReason, rs2, "")
-	conditions.SetRolloutCondition(&r2.Status, progressingCondition)
-	availableCondition, _ := newAvailableCondition(true)
-	conditions.SetRolloutCondition(&r2.Status, availableCondition)
-
-	f.rolloutLister = append(f.rolloutLister, r2)
-	f.objects = append(f.objects, r2)
-
-	f.runExpectError(getKey(r2, t), true)
-}
-
 func TestFailCreateBackgroundAnalysisRunIfMetricRepeated(t *testing.T) {
 	f := newFixture(t)
 	defer f.Close()
diff --git a/rollout/context.go b/rollout/context.go
index dad137c649..3c6ddcc20b 100644
--- a/rollout/context.go
+++ b/rollout/context.go
@@ -1,6 +1,8 @@
 package rollout
 
 import (
+	"time"
+
 	log "github.com/sirupsen/logrus"
 	appsv1 "k8s.io/api/apps/v1"
 	"k8s.io/apimachinery/pkg/util/validation/field"
@@ -45,6 +47,9 @@ func (c *rolloutContext) reconcile() error {
 	err := c.getRolloutValidationErrors()
 	if err != nil {
 		if vErr, ok := err.(*field.Error); ok {
+			// We want to frequently requeue rollouts with InvalidSpec errors, because the error
+			// condition might be timing related (e.g. the Rollout was applied before the Service).
+			c.enqueueRolloutAfter(c.rollout, 20*time.Second)
 			return c.createInvalidRolloutCondition(vErr, c.rollout)
 		}
 		return err
diff --git a/rollout/controller.go b/rollout/controller.go
index 11313b6805..31a0efa36a 100644
--- a/rollout/controller.go
+++ b/rollout/controller.go
@@ -640,20 +640,24 @@ func (c *rolloutContext) getReferencedRolloutAnalyses() (*[]validation.AnalysisT
 			}
 			analysisTemplates = append(analysisTemplates, templates...)
 		}
-		// Don't need to check for background AnalysisRuns since RO controls and can terminate them
 	} else if c.rollout.Spec.Strategy.Canary != nil {
 		canary := c.rollout.Spec.Strategy.Canary
-		if canary.Steps != nil {
-			for i, step := range canary.Steps {
-				if step.Analysis != nil {
-					templates, err := c.getReferencedAnalysisTemplates(c.rollout, step.Analysis, validation.CanaryStep, i)
-					if err != nil {
-						return nil, err
-					}
-					analysisTemplates = append(analysisTemplates, templates...)
+		for i, step := range canary.Steps {
+			if step.Analysis != nil {
+				templates, err := c.getReferencedAnalysisTemplates(c.rollout, step.Analysis, validation.InlineAnalysis, i)
+				if err != nil {
+					return nil, err
 				}
+				analysisTemplates = append(analysisTemplates, templates...)
 			}
 		}
+		if canary.Analysis != nil {
+			templates, err := c.getReferencedAnalysisTemplates(c.rollout, &canary.Analysis.RolloutAnalysis, validation.BackgroundAnalysis, 0)
+			if err != nil {
+				return nil, err
+			}
+			analysisTemplates = append(analysisTemplates, templates...)
+		}
 	}
 	return &analysisTemplates, nil
 }
diff --git a/rollout/controller_test.go b/rollout/controller_test.go
index a8ed7a518b..af67bdc3e2 100644
--- a/rollout/controller_test.go
+++ b/rollout/controller_test.go
@@ -356,7 +356,10 @@ func calculatePatch(ro *v1alpha1.Rollout, patch string) string {
 	newObservedGen := conditions.ComputeGenerationHash(newRO.Spec)
 
 	newPatch := make(map[string]interface{})
-	json.Unmarshal([]byte(patch), &newPatch)
+	err = json.Unmarshal([]byte(patch), &newPatch)
+	if err != nil {
+		panic(err)
+	}
 	newStatus := newPatch["status"].(map[string]interface{})
 	newStatus["observedGeneration"] = newObservedGen
 	newPatch["status"] = newStatus
diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go
index 2a0a0baf9b..26764cbceb 100644
--- a/test/e2e/functional_test.go
+++ b/test/e2e/functional_test.go
@@ -435,3 +435,59 @@ spec:
 		ExpectReplicaCounts(2, 2, 1, 2, 2). // desired, current, updated, ready, available
 		ExpectServiceSelector("bluegreen-to-canary", map[string]string{"app": "bluegreen-to-canary"})
 }
+
+// TestFixInvalidSpec verifies we recover from an InvalidSpec after applying
+func (s *FunctionalSuite) TestFixInvalidSpec() {
+	s.Given().
+		RolloutObjects(`
+apiVersion: argoproj.io/v1alpha1
+kind: Rollout
+metadata:
+  name: fix-invalid-spec
+spec:
+  replicas: 0
+  strategy:
+    canary:
+      analysis:
+        templates:
+        - templateName: doesnt-exist-yet
+  selector:
+    matchLabels:
+      app: fix-invalid-spec
+  template:
+    metadata:
+      labels:
+        app: fix-invalid-spec
+    spec:
+      containers:
+      - name: fix-invalid-spec
+        image: nginx:1.19-alpine
+        resources:
+          requests:
+            memory: 16Mi
+            cpu: 1m
+`).
+		When().
+		ApplyManifests().
+		WaitForRolloutStatus("Degraded").
+		Then().
+		Given().
+		RolloutObjects(`
+apiVersion: argoproj.io/v1alpha1
+kind: AnalysisTemplate
+metadata:
+  name: doesnt-exist-yet
+spec:
+  metrics:
+  - name: web
+    interval: 5s
+    successCondition: result.major == '1'
+    provider:
+      web:
+        url: https://kubernetes.default.svc/version
+        insecure: true
+`).
+		When().
+		ApplyManifests().
+		WaitForRolloutStatus("Healthy")
+}
diff --git a/test/fixtures/then.go b/test/fixtures/then.go
index edfb9585d9..b1d1789b9b 100644
--- a/test/fixtures/then.go
+++ b/test/fixtures/then.go
@@ -255,3 +255,9 @@ func (t *Then) When() *When {
 		Common: t.Common,
 	}
 }
+
+func (t *Then) Given() *Given {
+	return &Given{
+		Common: t.Common,
+	}
+}