diff --git a/docs/variables.md b/docs/variables.md index 15254d20f2ff..a3e750626b28 100644 --- a/docs/variables.md +++ b/docs/variables.md @@ -55,6 +55,7 @@ step. | `inputs.parameters.` | Input parameter of the metric-emitting template | | `outputs.parameters.` | Output parameter of the metric-emitting template | | `outputs.result` | Output result of the metric-emitting template | +| `resourcesDuration` | Resources duration as a string. Can also be indexed for a selected resource, if available (may be one of `resourcesDuration.cpu` or `resourcesDuration.memory`. For more info, see the [Resource Duration](resource-duration.md) doc.| ### Realtime Metrics diff --git a/workflow/common/common.go b/workflow/common/common.go index 6ce060163449..f98076b24748 100644 --- a/workflow/common/common.go +++ b/workflow/common/common.go @@ -136,6 +136,12 @@ const ( LocalVarPodName = "pod.name" // LocalVarRetries is a step level variable that references the retries number if retryStrategy is specified LocalVarRetries = "retries" + // LocalVarDuration is a step level variable (currently only available in metric emission) that tracks the duration of the step + LocalVarDuration = "duration" + // LocalVarStatus is a step level variable (currently only available in metric emission) that tracks the duration of the step + LocalVarStatus = "status" + // LocalVarResourcesDuration is a step level variable (currently only available in metric emission) that tracks the resources duration of the step + LocalVarResourcesDuration = "resourcesDuration" KubeConfigDefaultMountPath = "/kube/config" KubeConfigDefaultVolumeName = "kubeconfig" diff --git a/workflow/controller/operator_test.go b/workflow/controller/operator_test.go index 5d5ed66fbe92..2b9ca848c96e 100644 --- a/workflow/controller/operator_test.go +++ b/workflow/controller/operator_test.go @@ -2671,7 +2671,6 @@ func TestStepsOnExitFailures(t *testing.T) { woc.operate() woc.operate() - fmt.Println(woc.globalParams) assert.Contains(t, woc.globalParams[common.GlobalVarWorkflowFailures], `[{\"displayName\":\"exit-handlers\",\"message\":\"Unexpected pod phase for exit-handlers: \",\"templateName\":\"intentional-fail\",\"phase\":\"Error\",\"podName\":\"exit-handlers\"`) } diff --git a/workflow/controller/steps.go b/workflow/controller/steps.go index 7752b4dd9c0a..520c208fcbc7 100644 --- a/workflow/controller/steps.go +++ b/workflow/controller/steps.go @@ -469,19 +469,19 @@ func (woc *wfOperationCtx) prepareMetricScope(node *wfv1.NodeStatus) (map[string localScope := woc.globalParams.DeepCopy() if node.Fulfilled() { - localScope["duration"] = fmt.Sprintf("%f", node.FinishedAt.Sub(node.StartedAt.Time).Seconds()) - realTimeScope["duration"] = func() float64 { + localScope[common.LocalVarDuration] = fmt.Sprintf("%f", node.FinishedAt.Sub(node.StartedAt.Time).Seconds()) + realTimeScope[common.LocalVarDuration] = func() float64 { return node.FinishedAt.Sub(node.StartedAt.Time).Seconds() } } else { - localScope["duration"] = fmt.Sprintf("%f", time.Since(node.StartedAt.Time).Seconds()) - realTimeScope["duration"] = func() float64 { + localScope[common.LocalVarDuration] = fmt.Sprintf("%f", time.Since(node.StartedAt.Time).Seconds()) + realTimeScope[common.LocalVarDuration] = func() float64 { return time.Since(node.StartedAt.Time).Seconds() } } if node.Phase != "" { - localScope["status"] = string(node.Phase) + localScope[common.LocalVarStatus] = string(node.Phase) } if node.Inputs != nil { @@ -501,5 +501,12 @@ func (woc *wfOperationCtx) prepareMetricScope(node *wfv1.NodeStatus) (map[string } } + if node.ResourcesDuration != nil { + localScope[common.LocalVarResourcesDuration] = node.ResourcesDuration.String() + for name, duration := range node.ResourcesDuration { + localScope[fmt.Sprintf("%s.%s", common.LocalVarResourcesDuration, name)] = duration.String() + } + } + return localScope, realTimeScope } diff --git a/workflow/controller/steps_test.go b/workflow/controller/steps_test.go index c9af094f8922..001d30bf39d4 100644 --- a/workflow/controller/steps_test.go +++ b/workflow/controller/steps_test.go @@ -3,10 +3,12 @@ package controller import ( "testing" + "github.com/ghodss/yaml" "github.com/stretchr/testify/assert" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo/test" + "github.com/argoproj/argo/workflow/common" ) // TestStepsFailedRetries ensures a steps template will recognize exhausted retries @@ -119,3 +121,49 @@ func TestStepsWithParamAndGlobalParam(t *testing.T) { woc.operate() assert.Equal(t, wfv1.NodeRunning, woc.wf.Status.Phase) } + +func TestResourceDurationMetric(t *testing.T) { + var nodeStatus = ` + boundaryID: many-items-z26lj + displayName: sleep(4:four) + finishedAt: "2020-06-02T16:04:50Z" + hostNodeName: minikube + id: many-items-z26lj-3491220632 + name: many-items-z26lj[0].sleep(4:four) + outputs: + artifacts: + - archiveLogs: true + name: main-logs + s3: + accessKeySecret: + key: accesskey + name: my-minio-cred + bucket: my-bucket + endpoint: minio:9000 + insecure: true + key: many-items-z26lj/many-items-z26lj-3491220632/main.log + secretKeySecret: + key: secretkey + name: my-minio-cred + exitCode: "0" + phase: Succeeded + resourcesDuration: + cpu: 33 + memory: 24 + startedAt: "2020-06-02T16:04:21Z" + templateName: sleep + templateScope: local/many-items-z26lj + type: Pod +` + + woc := wfOperationCtx{globalParams: make(common.Parameters)} + var node wfv1.NodeStatus + err := yaml.Unmarshal([]byte(nodeStatus), &node) + if assert.NoError(t, err) { + localScope, _ := woc.prepareMetricScope(&node) + assert.Contains(t, localScope["resourcesDuration"], "24s*(100Mi memory)") + assert.Contains(t, localScope["resourcesDuration"], "33s*(1 cpu)") + assert.Equal(t, "33s", localScope["resourcesDuration.cpu"]) + assert.Equal(t, "24s", localScope["resourcesDuration.memory"]) + } +}