Skip to content

Commit

Permalink
feat(operator): introduce retry logic for KeptnTasks (keptn#1088)
Browse files Browse the repository at this point in the history
Signed-off-by: odubajDT <ondrej.dubaj@dynatrace.com>
  • Loading branch information
odubajDT authored and aepfli committed Mar 30, 2023
1 parent b1c24d8 commit 59e9514
Show file tree
Hide file tree
Showing 21 changed files with 313 additions and 25 deletions.
11 changes: 11 additions & 0 deletions operator/apis/lifecycle/v1alpha3/keptnappversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,17 @@ func TestKeptnAppVersion(t *testing.T) {

require.Equal(t, "trace1.appname.version.phase", app.GetSpanKey("phase"))

retries := int32(5)
task := app.GenerateTask(KeptnTaskDefinition{
ObjectMeta: v1.ObjectMeta{
Name: "task-def",
},
Spec: KeptnTaskDefinitionSpec{
Timeout: v1.Duration{
Duration: 5 * time.Second,
},
Retries: &retries,
},
}, common.PostDeploymentCheckType)
require.Equal(t, KeptnTaskSpec{
AppVersion: app.GetVersion(),
Expand All @@ -190,6 +197,10 @@ func TestKeptnAppVersion(t *testing.T) {
Parameters: TaskParameters{},
SecureParameters: SecureParameters{},
Type: common.PostDeploymentCheckType,
Timeout: v1.Duration{
Duration: 5 * time.Second,
},
Retries: &retries,
}, task.Spec)

evaluation := app.GenerateEvaluation(KeptnEvaluationDefinition{
Expand Down
9 changes: 9 additions & 0 deletions operator/apis/lifecycle/v1alpha3/keptntask_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package v1alpha3

import (
"testing"
"time"

"github.com/keptn/lifecycle-toolkit/operator/apis/lifecycle/v1alpha3/common"
"github.com/stretchr/testify/require"
Expand All @@ -19,6 +20,9 @@ func TestKeptnTask(t *testing.T) {
AppVersion: "appversion",
Type: common.PostDeploymentCheckType,
TaskDefinition: "def",
Timeout: metav1.Duration{
Duration: time.Duration(5 * time.Minute),
},
},
Status: KeptnTaskStatus{
Status: common.StateFailed,
Expand All @@ -35,6 +39,9 @@ func TestKeptnTask(t *testing.T) {
AppVersion: "appversion",
Type: common.PostDeploymentCheckType,
TaskDefinition: "def",
Timeout: metav1.Duration{
Duration: time.Duration(5 * time.Minute),
},
},
Status: KeptnTaskStatus{
Status: common.StateFailed,
Expand Down Expand Up @@ -106,6 +113,8 @@ func TestKeptnTask(t *testing.T) {
"taskDefinitionName": "def",
}, task.GetEventAnnotations())

require.Equal(t, int64(300), *task.GetActiveDeadlineSeconds())

}

func TestKeptnTaskList(t *testing.T) {
Expand Down
20 changes: 15 additions & 5 deletions operator/apis/lifecycle/v1alpha3/keptntask_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,14 @@ type KeptnTaskSpec struct {
Parameters TaskParameters `json:"parameters,omitempty"`
SecureParameters SecureParameters `json:"secureParameters,omitempty"`
Type common.CheckType `json:"checkType,omitempty"`
Retries int `json:"retries,omitempty"`
Timeout metav1.Duration `json:"timeout,omitempty"`
// +kubebuilder:default:=10
Retries *int32 `json:"retries,omitempty"`
// +optional
// +kubebuilder:default:="5m"
// +kubebuilder:validation:Pattern="^0|([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// +optional
Timeout metav1.Duration `json:"timeout,omitempty"`
}

type TaskContext struct {
Expand Down Expand Up @@ -70,8 +76,7 @@ type KeptnTaskStatus struct {
Message string `json:"message,omitempty"`
StartTime metav1.Time `json:"startTime,omitempty"`
EndTime metav1.Time `json:"endTime,omitempty"`
// +kubebuilder:default:=0
RetryCount int `json:"retryCount"`
Reason string `json:"reason,omitempty"`
}

// +kubebuilder:object:root=true
Expand All @@ -82,7 +87,6 @@ type KeptnTaskStatus struct {
// +kubebuilder:printcolumn:name="WorkloadName",type=string,JSONPath=`.spec.workload`
// +kubebuilder:printcolumn:name="WorkloadVersion",type=string,JSONPath=`.spec.workloadVersion`
// +kubebuilder:printcolumn:name="Job Name",type=string,JSONPath=`.status.jobName`
// +kubebuilder:printcolumn:name="RetryCount",type=string,JSONPath=`.status.retryCount`
// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.status`

// KeptnTask is the Schema for the keptntasks API
Expand Down Expand Up @@ -211,3 +215,9 @@ func (t KeptnTask) GetEventAnnotations() map[string]string {
"taskDefinitionName": t.Spec.TaskDefinition,
}
}

func (t KeptnTask) GetActiveDeadlineSeconds() *int64 {
deadline, _ := time.ParseDuration(t.Spec.Timeout.Duration.String())
seconds := int64(deadline.Seconds())
return &seconds
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
type KeptnTaskDefinitionSpec struct {
Function FunctionSpec `json:"function,omitempty"`
// +kubebuilder:default:=10
Retries int `json:"retries,omitempty"`
Retries *int32 `json:"retries,omitempty"`
// +optional
// +kubebuilder:default:="5m"
// +kubebuilder:validation:Pattern="^0|([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
Expand Down
11 changes: 11 additions & 0 deletions operator/apis/lifecycle/v1alpha3/keptnworkloadinstance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,17 @@ func TestKeptnWorkloadInstance(t *testing.T) {

require.Equal(t, "trace1.workloadname.version.phase", workload.GetSpanKey("phase"))

retries := int32(5)
task := workload.GenerateTask(KeptnTaskDefinition{
ObjectMeta: v1.ObjectMeta{
Name: "task-def",
},
Spec: KeptnTaskDefinitionSpec{
Timeout: v1.Duration{
Duration: 5 * time.Second,
},
Retries: &retries,
},
}, common.PostDeploymentCheckType)
require.Equal(t, KeptnTaskSpec{
AppName: workload.GetAppName(),
Expand All @@ -195,6 +202,10 @@ func TestKeptnWorkloadInstance(t *testing.T) {
Parameters: TaskParameters{},
SecureParameters: SecureParameters{},
Type: common.PostDeploymentCheckType,
Timeout: v1.Duration{
Duration: 5 * time.Second,
},
Retries: &retries,
}, task.Spec)

evaluation := workload.GenerateEvaluation(KeptnEvaluationDefinition{
Expand Down
10 changes: 10 additions & 0 deletions operator/apis/lifecycle/v1alpha3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ spec:
type: object
retries:
default: 10
format: int32
type: integer
timeout:
default: 5m
Expand Down
14 changes: 6 additions & 8 deletions operator/config/crd/bases/lifecycle.keptn.sh_keptntasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,6 @@ spec:
- jsonPath: .status.jobName
name: Job Name
type: string
- jsonPath: .status.retryCount
name: RetryCount
type: string
- jsonPath: .status.status
name: Status
type: string
Expand Down Expand Up @@ -323,6 +320,8 @@ spec:
type: object
type: object
retries:
default: 10
format: int32
type: integer
secureParameters:
properties:
Expand All @@ -332,6 +331,8 @@ spec:
taskDefinition:
type: string
timeout:
default: 5m
pattern: ^0|([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
workload:
type: string
Expand All @@ -355,17 +356,14 @@ spec:
type: string
message:
type: string
retryCount:
default: 0
type: integer
reason:
type: string
startTime:
format: date-time
type: string
status:
default: Pending
type: string
required:
- retryCount
type: object
type: object
served: true
Expand Down
10 changes: 8 additions & 2 deletions operator/controllers/common/taskhandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,14 @@ func (r TaskHandler) setupTasks(taskCreateAttributes CreateTaskAttributes, piWra
}

func (r TaskHandler) handleTaskNotExists(ctx context.Context, phaseCtx context.Context, taskCreateAttributes CreateTaskAttributes, taskName string, piWrapper *interfaces.PhaseItemWrapper, reconcileObject client.Object, task *klcv1alpha3.KeptnTask, taskStatus *klcv1alpha3.ItemStatus) error {
taskCreateAttributes.Definition.Name = taskName
taskName, err := r.CreateKeptnTask(ctx, piWrapper.GetNamespace(), reconcileObject, taskCreateAttributes)
definition := &klcv1alpha3.KeptnTaskDefinition{}
err := r.Client.Get(ctx, types.NamespacedName{Name: taskName, Namespace: piWrapper.GetNamespace()}, definition)
if err != nil {
r.Log.Error(err, "could not find KeptnTaskDefinition")
return controllererrors.ErrCannotGetKeptnTaskDefinition
}
taskCreateAttributes.Definition = *definition
taskName, err = r.CreateKeptnTask(ctx, piWrapper.GetNamespace(), reconcileObject, taskCreateAttributes)
if err != nil {
return err
}
Expand Down
44 changes: 43 additions & 1 deletion operator/controllers/common/taskhandler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func TestTaskHandler(t *testing.T) {
wantStatus []v1alpha3.ItemStatus
wantSummary apicommon.StatusSummary
taskObj v1alpha3.KeptnTask
taskDef *v1alpha3.KeptnTaskDefinition
wantErr error
getSpanCalls int
unbindSpanCalls int
Expand Down Expand Up @@ -58,15 +59,52 @@ func TestTaskHandler(t *testing.T) {
getSpanCalls: 0,
unbindSpanCalls: 0,
},
{
name: "task not started - could not find taskDefinition",
object: &v1alpha3.KeptnAppVersion{
ObjectMeta: v1.ObjectMeta{
Namespace: "namespace",
},
Spec: v1alpha3.KeptnAppVersionSpec{
KeptnAppSpec: v1alpha3.KeptnAppSpec{
PreDeploymentTasks: []string{"task-def"},
},
},
},
taskObj: v1alpha3.KeptnTask{},
createAttr: CreateTaskAttributes{
SpanName: "",
Definition: v1alpha3.KeptnTaskDefinition{
ObjectMeta: v1.ObjectMeta{
Name: "task-def",
},
},
CheckType: apicommon.PreDeploymentCheckType,
},
wantStatus: nil,
wantSummary: apicommon.StatusSummary{Total: 1, Pending: 0},
wantErr: controllererrors.ErrCannotGetKeptnTaskDefinition,
getSpanCalls: 0,
unbindSpanCalls: 0,
},
{
name: "task not started",
object: &v1alpha3.KeptnAppVersion{
ObjectMeta: v1.ObjectMeta{
Namespace: "namespace",
},
Spec: v1alpha3.KeptnAppVersionSpec{
KeptnAppSpec: v1alpha3.KeptnAppSpec{
PreDeploymentTasks: []string{"task-def"},
},
},
},
taskDef: &v1alpha3.KeptnTaskDefinition{
ObjectMeta: v1.ObjectMeta{
Namespace: "namespace",
Name: "task-def",
},
},
taskObj: v1alpha3.KeptnTask{},
createAttr: CreateTaskAttributes{
SpanName: "",
Expand Down Expand Up @@ -248,11 +286,15 @@ func TestTaskHandler(t *testing.T) {
return nil
},
}
initObjs := []client.Object{&tt.taskObj}
if tt.taskDef != nil {
initObjs = append(initObjs, tt.taskDef)
}
handler := TaskHandler{
SpanHandler: &spanHandlerMock,
Log: ctrl.Log.WithName("controller"),
Recorder: record.NewFakeRecorder(100),
Client: fake.NewClientBuilder().WithObjects(&tt.taskObj).Build(),
Client: fake.NewClientBuilder().WithObjects(initObjs...).Build(),
Tracer: trace.NewNoopTracerProvider().Tracer("tracer"),
Scheme: scheme.Scheme,
}
Expand Down
1 change: 1 addition & 0 deletions operator/controllers/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ var ErrNoValues = fmt.Errorf("no values")
var ErrInvalidOperator = fmt.Errorf("invalid operator")
var ErrCannotMarshalParams = fmt.Errorf("could not marshal parameters")
var ErrUnsupportedWorkloadInstanceResourceReference = fmt.Errorf("unsupported Resource Reference")
var ErrCannotGetKeptnTaskDefinition = fmt.Errorf("cannot retrieve KeptnTaskDefinition")

var ErrCannotRetrieveConfigMsg = "could not retrieve KeptnConfig: %w"
var ErrCannotRetrieveInstancesMsg = "could not retrieve instances: %w"
Expand Down
2 changes: 2 additions & 0 deletions operator/controllers/lifecycle/keptntask/function_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ func (r *KeptnTaskReconciler) generateFunctionJob(task *klcv1alpha3.KeptnTask, p
RestartPolicy: "OnFailure",
},
},
BackoffLimit: task.Spec.Retries,
ActiveDeadlineSeconds: task.GetActiveDeadlineSeconds(),
},
}
err := controllerutil.SetControllerReference(task, job, r.Scheme)
Expand Down
12 changes: 8 additions & 4 deletions operator/controllers/lifecycle/keptntask/job_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,14 @@ func (r *KeptnTaskReconciler) updateJob(ctx context.Context, req ctrl.Request, t
}
return err
}
if job.Status.Succeeded > 0 {
task.Status.Status = apicommon.StateSucceeded
} else if job.Status.Failed > 0 {
task.Status.Status = apicommon.StateFailed
if len(job.Status.Conditions) > 0 {
if job.Status.Conditions[0].Type == batchv1.JobComplete {
task.Status.Status = apicommon.StateSucceeded
} else if job.Status.Conditions[0].Type == batchv1.JobFailed {
task.Status.Status = apicommon.StateFailed
task.Status.Message = job.Status.Conditions[0].Message
task.Status.Reason = job.Status.Conditions[0].Reason
}
}
return nil
}
Expand Down
13 changes: 10 additions & 3 deletions operator/controllers/lifecycle/keptntask/job_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ func TestKeptnTaskReconciler_updateJob(t *testing.T) {
err := klcv1alpha3.AddToScheme(fakeClient.Scheme())
require.Nil(t, err)

job.Status.Failed = 1
job.Status.Conditions = []batchv1.JobCondition{
{
Type: batchv1.JobFailed,
},
}

err = fakeClient.Status().Update(context.TODO(), job)
require.Nil(t, err)
Expand Down Expand Up @@ -124,8 +128,11 @@ func TestKeptnTaskReconciler_updateJob(t *testing.T) {
require.Equal(t, apicommon.StateFailed, task.Status.Status)

// now, set the job to succeeded
job.Status.Succeeded = 1
job.Status.Failed = 0
job.Status.Conditions = []batchv1.JobCondition{
{
Type: batchv1.JobComplete,
},
}

err = fakeClient.Status().Update(context.TODO(), job)
require.Nil(t, err)
Expand Down
6 changes: 5 additions & 1 deletion operator/test/component/task/task_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ var _ = Describe("Task", Ordered, func() {
Expect(err).To(BeNil())

By("Setting the Job Status to failed")
createdJob.Status.Failed = 1
createdJob.Status.Conditions = []batchv1.JobCondition{
{
Type: batchv1.JobFailed,
},
}

err = k8sClient.Status().Update(context.TODO(), createdJob)
Expect(err).To(BeNil())
Expand Down
Loading

0 comments on commit 59e9514

Please sign in to comment.