Skip to content

Commit

Permalink
feat: add allowFailure option for workflow steps (#6114)
Browse files Browse the repository at this point in the history
* feat: add an allowFailure field to WorkflowStepSpec

* do not fail the entire workflow if a step labeled with
  allowFailure throws an error

Signed-off-by: Alex Johnson <hello@alex-johnson.net>

* improvement: improved error logging and standardised output on failure

Co-authored-by: Anna Mager <anna@garden.io>

---------

Signed-off-by: Alex Johnson <hello@alex-johnson.net>
Co-authored-by: Steffen Neubauer <steffen@garden.io>
Co-authored-by: Anna Mager <anna@garden.io>
  • Loading branch information
3 people committed Jun 18, 2024
1 parent 7e8aefd commit 70b8ca8
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 49 deletions.
107 changes: 75 additions & 32 deletions core/src/commands/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ export class WorkflowCommand extends Command<Args, {}> {
bodyLog: stepBodyLog,
}

let stepResult: CommandResult

garden.events.emit("workflowStepProcessing", { index })
const stepTemplateContext = new WorkflowStepConfigContext({
allStepNames,
Expand All @@ -161,6 +159,17 @@ export class WorkflowCommand extends Command<Args, {}> {

const initSaveLogState = stepBodyLog.root.storeEntries
stepBodyLog.root.storeEntries = true

if ((!step.command && !step.script) || (step.command && step.script)) {
garden.events.emit("workflowStepError", getStepEndEvent(index, stepStartedAt))
// This should be caught by the validation layer
throw new InternalError({
message: `Workflow steps must specify either a command or a script. Got: ${JSON.stringify(step)}`,
})
}

let stepResult: CommandResult | undefined

try {
if (step.command) {
step.command = resolveTemplateStrings({
Expand All @@ -174,25 +183,31 @@ export class WorkflowCommand extends Command<Args, {}> {
step.script = resolveTemplateString({ string: step.script, context: stepTemplateContext })
stepResult = await runStepScript(stepParams)
} else {
garden.events.emit("workflowStepError", getStepEndEvent(index, stepStartedAt))
// This should be caught by the validation layer
throw new InternalError({
message: `Workflow steps must specify either a command or a script. Got: ${JSON.stringify(step)}`,
})
stepResult = undefined
}
} catch (rawErr) {
const err = toGardenError(rawErr)

garden.events.emit("workflowStepError", getStepEndEvent(index, stepStartedAt))
stepErrors[index] = [err]
printStepDuration({ ...stepParams, success: false })
logErrors(stepBodyLog, [err], index, steps.length, step.description)
// There may be succeeding steps with `when: onError` or `when: always`, so we continue.
continue
// runStepCommand and runStepScript should not throw. If that happens it's either a bug (e.g. InternalError) or a user-error (e.g. TemplateError)
// In these cases we do not continue workflow execution, even when continueOnError is true and even when the following steps declared to run `when: onError` or `when: always`.
const continueOnError = false
logErrors(stepBodyLog, [err], index, steps.length, continueOnError, step.description)
break
}

if (stepResult === undefined) {
throw new InternalError({
message: `Workflow step did not return stepResult. Step: ${JSON.stringify(step)}`,
})
}

// Extract the text from the body log entry, info-level and higher
const stepLog = stepBodyLog.toString((entry) => entry.level <= LogLevel.info)

// TODO: add step conclusion, so following steps can be aware of the error if step.continueOnError is true.
result.steps[stepName] = {
number: index + 1,
outputs: stepResult.result || {},
Expand All @@ -201,9 +216,15 @@ export class WorkflowCommand extends Command<Args, {}> {
stepBodyLog.root.storeEntries = initSaveLogState

if (stepResult.errors && stepResult.errors.length > 0) {
logErrors(outerLog, stepResult.errors, index, steps.length, step.continueOnError || false, step.description)

// If we ignore errors
garden.events.emit("workflowStepError", getStepEndEvent(index, stepStartedAt))
logErrors(outerLog, stepResult.errors, index, steps.length, step.description)
stepErrors[index] = stepResult.errors

if (!step.continueOnError) {
stepErrors[index] = stepResult.errors
}

// There may be succeeding steps with `when: onError` or `when: always`, so we continue.
continue
}
Expand Down Expand Up @@ -383,10 +404,22 @@ export async function runStepCommand(params: RunStepCommandParams): Promise<Comm
return await command.action(commandParams)
}

export async function runStepScript({ garden, bodyLog, step }: RunStepParams): Promise<CommandResult<any>> {
export async function runStepScript({
garden,
bodyLog,
step,
}: RunStepParams): Promise<
CommandResult<{ exitCode: number | undefined; stdout: string | undefined; stderr: string | undefined }>
> {
try {
await runScript({ log: bodyLog, cwd: garden.projectRoot, script: step.script!, envVars: step.envVars })
return { result: {} }
const res = await runScript({ log: bodyLog, cwd: garden.projectRoot, script: step.script!, envVars: step.envVars })
return {
result: {
exitCode: res.exitCode,
stdout: res.stdout,
stderr: res.stderr,
},
}
} catch (err) {
// Unexpected error (failed to execute script, as opposed to script returning an error code)
if (!(err instanceof ChildProcessError)) {
Expand All @@ -400,12 +433,15 @@ export async function runStepScript({ garden, bodyLog, step }: RunStepParams): P
stderr: err.details.stderr,
})

bodyLog.error("")
bodyLog.error({ msg: `Script failed with the following error:`, error: scriptError })
bodyLog.error("")
bodyLog.error(err.details.stderr)

throw scriptError
// We return the error here because we want to separately handle unexpected internal errors (like syntax errors) and user error (like script failure).
return {
result: {
exitCode: err.details.code,
stdout: err.details.stdout,
stderr: err.details.stderr,
},
errors: [scriptError],
}
}
}

Expand Down Expand Up @@ -459,24 +495,31 @@ export function logErrors(
errors: GardenError[],
stepIndex: number,
stepCount: number,
continueOnError: boolean,
stepDescription?: string
) {
const description = formattedStepDescription(stepIndex, stepCount, stepDescription)
const errMsg = `An error occurred while running step ${styles.accent(description)}.\n`
log.error(errMsg)
log.debug("")
const allowedToFailMessage = `Because ${styles.bold("continueOnError")} is ${styles.bold(true)}, the workflow will continue as if the step succeeded.`
const errMsg = `\nAn error occurred while running step ${styles.accent(description)}.${continueOnError ? ` ${allowedToFailMessage}` : ``}\n`

const logFn: typeof log.warn | typeof log.error = (...args) =>
continueOnError ? log.warn(...args) : log.error(...args)

logFn(errMsg)
for (const error of errors) {
if (error instanceof WorkflowScriptError) {
const scriptErrMsg = renderMessageWithDivider({
prefix: `Script exited with code ${error.details.exitCode} ${renderDuration(log.getDuration())}`,
msg: error.explain(),
isError: true,
})
log.error(scriptErrMsg)
logFn(
renderMessageWithDivider({
prefix: `Script exited with code ${error.details.exitCode} ${renderDuration(log.getDuration())}. This is the stderr output:`,
msg: error.details.stderr || error.details.output,
isError: !continueOnError,
})
)
logFn("")
} else {
const taskDetailErrMsg = error.toString(true)
const taskDetailErrMsg = error.toString(false)
log.debug(taskDetailErrMsg)
log.error(error.explain() + "\n")
logFn(error.message + "\n\n")
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion core/src/config/template-contexts/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class WorkflowStepContext extends ConfigContext {
// TODO: populate and then link to command docs
.description(
dedent`
The outputs returned by the step, as a mapping. Script steps will always have \`stdout\` and \`stderr\` keys.
The outputs returned by the step, as a mapping. Script steps will always have \`stdout\`, \`stderr\` and \`exitCode\` keys.
Command steps return different keys, including potentially nested maps and arrays. Please refer to each command
for its output schema.
`
Expand Down
4 changes: 3 additions & 1 deletion core/src/config/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,9 @@ export interface WorkflowStepSpec {
description?: string
envVars?: PrimitiveMap
script?: string
skip?: boolean
when?: workflowStepModifier
skip?: boolean
continueOnError?: boolean
}

export const workflowStepSchema = createSchema({
Expand Down Expand Up @@ -265,6 +266,7 @@ export const workflowStepSchema = createSchema({
See the [workflows guide](${DOCS_BASE_URL}/using-garden/workflows#the-skip-and-when-options) for details
and examples.
`),
continueOnError: joi.boolean().description(`Set to true to continue if the step errors.`).default(false),
}),
xor: [["command", "script"]],
})
Expand Down
2 changes: 1 addition & 1 deletion core/src/exceptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ export class WorkflowScriptError extends GardenError {
constructor(details: WorkflowScriptErrorDetails) {
super({
message: dedent`
Script exited with code ${details.exitCode}. This is the output:
Script exited with code ${details.exitCode}. This is the stderr output:
${details.stderr || details.output}`,
})
Expand Down
48 changes: 47 additions & 1 deletion core/test/unit/src/commands/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,49 @@ describe("RunWorkflowCommand", () => {
expect(result.errors || []).to.eql([])
})

it("should continue on error if continueOnError = true", async () => {
garden.setWorkflowConfigs([
{
apiVersion: GardenApiVersion.v0,
name: "workflow-a",
kind: "Workflow",
internal: {
basePath: garden.projectRoot,
},
files: [],
envVars: {},
resources: defaultWorkflowResources,
steps: [
{
script: dedent`
echo stdout
echo stderr 1>&2
exit 1
`, // <-- error thrown here
continueOnError: true,
},
{ command: ["echo", "success!"] },
],
},
])

const { result, errors } = await cmd.action({ ...defaultParams, args: { workflow: "workflow-a" } })

expect(result).to.exist
expect(errors).to.not.exist

// step 1 is a script command
expect(result?.steps).to.have.property("step-1")
expect(result?.steps["step-1"].log).to.equal("stdout\nstderr")
expect(result?.steps["step-1"].outputs["stderr"]).to.equal("stderr")
expect(result?.steps["step-1"].outputs["stdout"]).to.equal("stdout")
expect(result?.steps["step-1"].outputs["exitCode"]).to.equal(1)

// we should have executed step 2, which is a Garden command, because continueOnError is true in step-1.
expect(result?.steps).to.have.property("step-2")
expect(result?.steps["step-2"].outputs).to.not.have.property("stderr")
})

it("should add workflowStep metadata to log entries provided to steps", async () => {
const _garden = await makeTestGardenA(undefined)
// Ensure log entries are empty
Expand Down Expand Up @@ -796,6 +839,9 @@ describe("RunWorkflowCommand", () => {
expect(result).to.exist
expect(errors).to.not.exist
expect(result?.steps["step-1"].log).to.equal("stdout\nstderr")
expect(result?.steps["step-1"].outputs["stderr"]).to.equal("stderr")
expect(result?.steps["step-1"].outputs["stdout"]).to.equal("stdout")
expect(result?.steps["step-1"].outputs["exitCode"]).to.equal(0)
})

it("should throw if a script step fails", async () => {
Expand Down Expand Up @@ -845,7 +891,7 @@ describe("RunWorkflowCommand", () => {
if (!(error instanceof WorkflowScriptError)) {
expect.fail("Expected error to be a WorkflowScriptError")
}
expect(error.message).to.equal("Script exited with code 1. This is the output:\n\nboo!")
expect(error.message).to.equal("Script exited with code 1. This is the stderr output:\n\nboo!")
expect(error.details.stdout).to.equal("boo!")
})

Expand Down
57 changes: 45 additions & 12 deletions core/test/unit/src/config/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import { expect } from "chai"
import type { TestGarden } from "../../../helpers.js"
import { expectError, getDataDir, makeTestGarden, makeTestGardenA } from "../../../helpers.js"
import type { WorkflowConfig, TriggerSpec } from "../../../../src/config/workflow.js"
import type { WorkflowConfig, WorkflowStepSpec, TriggerSpec } from "../../../../src/config/workflow.js"
import {
resolveWorkflowConfig,
populateNamespaceForTriggers,
Expand Down Expand Up @@ -39,6 +39,12 @@ describe("resolveWorkflowConfig", () => {
keepAliveHours: 48,
}

const defaultWorkflowStep: WorkflowStepSpec = {
skip: false,
when: "onSuccess",
continueOnError: false,
}

before(async () => {
garden = await makeTestGardenA()
garden["secrets"] = { foo: "bar", bar: "baz", baz: "banana" }
Expand All @@ -55,8 +61,15 @@ describe("resolveWorkflowConfig", () => {
description: "Sample workflow",
envVars: {},
steps: [
{ description: "Deploy the stack", command: ["deploy"], skip: false, when: "onSuccess", envVars: {} },
{ command: ["test"], skip: false, when: "onSuccess", envVars: {} },
{
...defaultWorkflowStep,
description: "Deploy the stack",
command: ["deploy"],
skip: false,
when: "onSuccess",
envVars: {},
},
{ ...defaultWorkflowStep, command: ["test"], skip: false, when: "onSuccess", envVars: {} },
],
triggers: [
{
Expand Down Expand Up @@ -85,8 +98,15 @@ describe("resolveWorkflowConfig", () => {
envVars: {},
limits: minimumWorkflowLimits, // <----
steps: [
{ description: "Deploy the stack", command: ["deploy"], skip: false, when: "onSuccess", envVars: {} },
{ command: ["test"], skip: false, when: "onSuccess", envVars: {} },
{
...defaultWorkflowStep,
description: "Deploy the stack",
command: ["deploy"],
skip: false,
when: "onSuccess",
envVars: {},
},
{ ...defaultWorkflowStep, command: ["test"], skip: false, when: "onSuccess", envVars: {} },
],
triggers: [
{
Expand Down Expand Up @@ -145,12 +165,13 @@ describe("resolveWorkflowConfig", () => {
envVars: {},
steps: [
{
...defaultWorkflowStep,
description: "Deploy the stack",
command: ["deploy", "${var.foo}"],
skip: false,
when: "onSuccess",
},
{ script: "echo ${var.foo}", skip: false, when: "onSuccess" },
{ ...defaultWorkflowStep, script: "echo ${var.foo}", skip: false, when: "onSuccess" },
],
}

Expand All @@ -169,8 +190,14 @@ describe("resolveWorkflowConfig", () => {

envVars: {},
steps: [
{ description: "Deploy the stack", command: ["deploy"], skip: false, when: "onSuccess" },
{ command: ["test"], skip: false, when: "onSuccess" },
{
...defaultWorkflowStep,
description: "Deploy the stack",
command: ["deploy"],
skip: false,
when: "onSuccess",
},
{ ...defaultWorkflowStep, command: ["test"], skip: false, when: "onSuccess" },
],
}

Expand All @@ -186,8 +213,14 @@ describe("resolveWorkflowConfig", () => {

envVars: {},
steps: [
{ description: "Deploy the stack", command: ["deploy"], skip: false, when: "onSuccess" },
{ command: ["test"], skip: false, when: "onSuccess" },
{
...defaultWorkflowStep,
description: "Deploy the stack",
command: ["deploy"],
skip: false,
when: "onSuccess",
},
{ ...defaultWorkflowStep, command: ["test"], skip: false, when: "onSuccess" },
],
triggers: [
{
Expand Down Expand Up @@ -221,8 +254,8 @@ describe("resolveWorkflowConfig", () => {
...defaults,
...config,
steps: [
{ description: "Deploy the stack", command: ["deploy"], skip: false, when: "onSuccess", envVars: {} },
{ command: ["test"], skip: false, when: "onSuccess", envVars: {} },
{ ...defaultWorkflowStep, description: "Deploy the stack", command: ["deploy"], envVars: {} },
{ ...defaultWorkflowStep, command: ["test"], envVars: {} },
],
})
})
Expand Down
Loading

0 comments on commit 70b8ca8

Please sign in to comment.