Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions actions/setup/js/check_circuit_breaker.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// @ts-check
/// <reference types="@actions/github-script" />

const fs = require("fs");
const path = require("path");
const { writeDenialSummary } = require("./pre_activation_summary.cjs");
const { getErrorMessage } = require("./error_helpers.cjs");

/**
* Circuit breaker check for agentic workflows.
*
* Reads the circuit-breaker state from <stateDir>/circuit-breaker-state.json
* (downloaded by the preceding actions/download-artifact step) and implements
* the standard closed → open → half-open state machine pattern:
*
* CLOSED → normal execution (consecutive_failures < max)
* OPEN → execution blocked (consecutive_failures >= max AND cooldown not elapsed)
* HALF-OPEN → one retry allowed (consecutive_failures >= max AND cooldown elapsed)
*
* GH_AW_CB_STATE_DIR overrides the default state directory (/tmp/gh-aw) for tests.
*/
async function main() {
const maxFailures = parseInt(process.env.GH_AW_CB_MAX_FAILURES?.trim() || "5", 10);
const timeWindowMinutes = parseInt(process.env.GH_AW_CB_TIME_WINDOW_MINUTES?.trim() || "1440", 10);
const cooldownMinutes = parseInt(process.env.GH_AW_CB_COOLDOWN_MINUTES?.trim() || "60", 10);
const notify = (process.env.GH_AW_CB_NOTIFY?.trim() || "true") === "true";
const workflowName = process.env.GH_AW_WORKFLOW_NAME || "Unknown Workflow";
const stateDir = process.env.GH_AW_CB_STATE_DIR || "/tmp/gh-aw";

core.info(`🔌 Circuit breaker check for workflow '${workflowName}'`);
core.info(` Configuration: max=${maxFailures} failures, window=${timeWindowMinutes}m, cooldown=${cooldownMinutes}m`);

// Read the circuit breaker state downloaded by the preceding download-artifact step.
const stateFile = path.join(stateDir, "circuit-breaker-state.json");
let state = { consecutive_failures: 0 };

try {
if (fs.existsSync(stateFile)) {
const content = fs.readFileSync(stateFile, "utf8");
state = JSON.parse(content);
core.info(` Loaded state: consecutive_failures=${state.consecutive_failures}`);
} else {
core.info(` No previous state found — starting fresh (circuit CLOSED)`);
}
} catch (error) {
// If we can't load the previous state, assume circuit is closed (fail-open for availability)
core.warning(`Could not read previous circuit breaker state from ${stateFile}: ${getErrorMessage(error)}. Assuming circuit is closed.`);
}

const consecutiveFailures = state.consecutive_failures ?? 0;
const lastFailure = state.last_failure ? new Date(state.last_failure) : null;

core.info(` Consecutive failures: ${consecutiveFailures} / ${maxFailures}`);

const nowMs = Date.now();
const windowMs = timeWindowMinutes * 60 * 1000;
const failureIsRecent = lastFailure !== null && nowMs - lastFailure.getTime() <= windowMs;

// CLOSED state: fewer failures than threshold, or failures are outside the time window
if (consecutiveFailures < maxFailures || !failureIsRecent) {
core.info(`✅ Circuit breaker is CLOSED — workflow execution allowed`);
core.setOutput("circuit_breaker_ok", "true");
core.setOutput("consecutive_failures", String(consecutiveFailures));
return;
}

// Circuit is OPEN — check if cooldown has elapsed (HALF-OPEN state)
const cooldownMs = cooldownMinutes * 60 * 1000;
const cooldownElapsed = lastFailure !== null && nowMs - lastFailure.getTime() >= cooldownMs;

if (cooldownElapsed) {
core.info(`🔄 Circuit breaker is HALF-OPEN — cooldown elapsed, allowing one retry`);
core.setOutput("circuit_breaker_ok", "true");
core.setOutput("consecutive_failures", String(consecutiveFailures));
return;
}
Comment on lines +22 to +76

// OPEN state: block execution
const minutesSinceFail = lastFailure ? Math.floor((nowMs - lastFailure.getTime()) / 60000) : 0;
const minutesUntilRetry = Math.max(0, cooldownMinutes - minutesSinceFail);

core.warning(`🔴 Circuit breaker is OPEN — workflow execution blocked`);
core.warning(` ${consecutiveFailures} consecutive failures in the last ${timeWindowMinutes} minutes`);
core.warning(` Retry allowed in approximately ${minutesUntilRetry} minute(s)`);

core.setOutput("circuit_breaker_ok", "false");
core.setOutput("consecutive_failures", String(consecutiveFailures));

if (notify) {
core.error(
`Circuit breaker OPEN for '${workflowName}': ${consecutiveFailures} consecutive failures detected. ` +
`Workflow execution is blocked until the cooldown period expires (≈${minutesUntilRetry} min remaining). ` +
`Fix the underlying issue and wait for the cooldown, or manually reset by deleting the 'circuit-breaker-state' artifact.`
);
}

await writeDenialSummary(
`Circuit breaker OPEN for workflow '${workflowName}': ${consecutiveFailures} consecutive failures detected within the ${timeWindowMinutes}-minute window.`,
`The circuit breaker will allow a retry after the cooldown period (≈${minutesUntilRetry} min remaining). Fix the underlying issue and wait, or delete the \`circuit-breaker-state\` artifact to reset manually.`
);
}

module.exports = { main };
189 changes: 189 additions & 0 deletions actions/setup/js/check_circuit_breaker.test.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// @ts-check
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import fs from "fs";
import os from "os";
import path from "path";

// ---- Globals ----
const mockCore = {
debug: vi.fn(),
info: vi.fn(),
notice: vi.fn(),
warning: vi.fn(),
error: vi.fn(),
setFailed: vi.fn(),
setOutput: vi.fn(),
exportVariable: vi.fn(),
setSecret: vi.fn(),
getCancelled: vi.fn(),
setCancelled: vi.fn(),
setError: vi.fn(),
getInput: vi.fn(),
getBooleanInput: vi.fn(),
getMultilineInput: vi.fn(),
getState: vi.fn(),
saveState: vi.fn(),
startGroup: vi.fn(),
endGroup: vi.fn(),
group: vi.fn(),
addPath: vi.fn(),
setCommandEcho: vi.fn(),
isDebug: vi.fn().mockReturnValue(false),
getIDToken: vi.fn(),
toPlatformPath: vi.fn(),
toPosixPath: vi.fn(),
toWin32Path: vi.fn(),
summary: { addRaw: vi.fn().mockReturnThis(), write: vi.fn().mockResolvedValue(undefined) },
};
global.core = mockCore;
global.github = {};
global.context = { repo: { owner: "test-owner", repo: "test-repo" }, runId: 123456 };

// Helper: relative timestamps
const minutesAgoISO = m => new Date(Date.now() - m * 60 * 1000).toISOString();

describe("check_circuit_breaker.cjs", () => {
let tmpDir;
let originalEnv;

beforeEach(() => {
vi.clearAllMocks();
// Restore summary stubs after clearAllMocks
mockCore.summary.addRaw.mockReturnThis();
mockCore.summary.write.mockResolvedValue(undefined);

tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "cb-check-test-"));

originalEnv = {
GH_AW_CB_MAX_FAILURES: process.env.GH_AW_CB_MAX_FAILURES,
GH_AW_CB_TIME_WINDOW_MINUTES: process.env.GH_AW_CB_TIME_WINDOW_MINUTES,
GH_AW_CB_COOLDOWN_MINUTES: process.env.GH_AW_CB_COOLDOWN_MINUTES,
GH_AW_CB_NOTIFY: process.env.GH_AW_CB_NOTIFY,
GH_AW_WORKFLOW_NAME: process.env.GH_AW_WORKFLOW_NAME,
GH_AW_CB_STATE_DIR: process.env.GH_AW_CB_STATE_DIR,
};
process.env.GH_AW_CB_MAX_FAILURES = "5";
process.env.GH_AW_CB_TIME_WINDOW_MINUTES = "1440";
process.env.GH_AW_CB_COOLDOWN_MINUTES = "60";
process.env.GH_AW_CB_NOTIFY = "true";
process.env.GH_AW_WORKFLOW_NAME = "Test Workflow";
process.env.GH_AW_CB_STATE_DIR = tmpDir;
});

afterEach(() => {
for (const [key, val] of Object.entries(originalEnv)) {
if (val === undefined) {
delete process.env[key];
} else {
process.env[key] = val;
}
}
fs.rmSync(tmpDir, { recursive: true, force: true });
});

/** Write a state file to the temp dir. */
function writeState(state) {
fs.writeFileSync(path.join(tmpDir, "circuit-breaker-state.json"), JSON.stringify(state), "utf8");
}

async function runCheck() {
vi.resetModules();
const mod = await import("./check_circuit_breaker.cjs");
await mod.main();
}

it("CLOSED — no previous state file: allows execution", async () => {
await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
expect(mockCore.setOutput).toHaveBeenCalledWith("consecutive_failures", "0");
expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("CLOSED"));
});

it("CLOSED — failures below threshold: allows execution", async () => {
writeState({ consecutive_failures: 3, last_failure: minutesAgoISO(10) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("CLOSED"));
});

it("CLOSED — failures at threshold but outside time window: allows execution", async () => {
// 2 days ago — outside the 24h (1440 min) window
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(2 * 24 * 60) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("CLOSED"));
});

it("OPEN — failures at threshold within window: blocks execution", async () => {
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(5) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "false");
expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("OPEN"));
});

it("OPEN — notify=true posts an error annotation", async () => {
process.env.GH_AW_CB_NOTIFY = "true";
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(5) });

await runCheck();

expect(mockCore.error).toHaveBeenCalledWith(expect.stringContaining("Circuit breaker OPEN"));
});

it("OPEN — notify=false skips the error annotation", async () => {
process.env.GH_AW_CB_NOTIFY = "false";
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(5) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "false");
expect(mockCore.error).not.toHaveBeenCalled();
});

it("HALF-OPEN — cooldown elapsed: allows one retry", async () => {
process.env.GH_AW_CB_COOLDOWN_MINUTES = "60";
// 90 min ago — cooldown (60 min) elapsed, still within 24h window
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(90) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
expect(mockCore.info).toHaveBeenCalledWith(expect.stringContaining("HALF-OPEN"));
});

it("OPEN — cooldown not yet elapsed: blocks execution", async () => {
process.env.GH_AW_CB_COOLDOWN_MINUTES = "60";
// 30 min ago — cooldown (60 min) not yet elapsed
writeState({ consecutive_failures: 5, last_failure: minutesAgoISO(30) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "false");
});

it("CLOSED — custom lower threshold respected", async () => {
process.env.GH_AW_CB_MAX_FAILURES = "3";
writeState({ consecutive_failures: 2, last_failure: minutesAgoISO(5) });

await runCheck();

expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
});

it("handles corrupt state file gracefully — circuit CLOSED (fail-open)", async () => {
fs.writeFileSync(path.join(tmpDir, "circuit-breaker-state.json"), "NOT VALID JSON", "utf8");

await runCheck();

expect(mockCore.warning).toHaveBeenCalledWith(expect.stringContaining("Could not read"));
// Fail-open: allow execution
expect(mockCore.setOutput).toHaveBeenCalledWith("circuit_breaker_ok", "true");
});
});
71 changes: 71 additions & 0 deletions actions/setup/js/find_circuit_breaker_artifact.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// @ts-check
/// <reference types="@actions/github-script" />

const { getErrorMessage } = require("./error_helpers.cjs");

/**
* Find the most recent completed workflow run (other than the current one)
* that has a 'circuit-breaker-state' artifact, and output its run ID.
*
* Output: previous_run_id — the run ID string, or empty string if not found.
*/
async function main() {
const {
repo: { owner, repo },
runId,
} = context;

core.info(`🔌 Looking for previous circuit-breaker-state artifact`);

try {
// Get the workflow ID of the current run
const { data: runData } = await github.rest.actions.getWorkflowRun({
owner,
repo,
run_id: runId,
});
const workflowId = runData.workflow_id;
core.info(` Workflow ID: ${workflowId}`);

// List recent completed runs for this workflow (excluding the current one)
const { data: runsData } = await github.rest.actions.listWorkflowRuns({
owner,
repo,
workflow_id: workflowId,
status: "completed",
per_page: 20,
});

core.info(` Found ${runsData.workflow_runs.length} recent completed runs`);

for (const run of runsData.workflow_runs) {
if (run.id === runId) continue;

try {
const { data: artifactsData } = await github.rest.actions.listWorkflowRunArtifacts({
owner,
repo,
run_id: run.id,
});

const artifact = artifactsData.artifacts.find(a => a.name === "circuit-breaker-state" && !a.expired);
if (artifact) {
core.info(` Found circuit-breaker-state artifact in run #${run.id}`);
core.setOutput("previous_run_id", String(run.id));
return;
}
} catch (error) {
core.debug(` Could not list artifacts for run #${run.id}: ${getErrorMessage(error)}`);
continue;
}
}

core.info(` No previous circuit-breaker-state artifact found`);
core.setOutput("previous_run_id", "");
} catch (error) {
core.warning(`Could not search for previous circuit breaker state: ${getErrorMessage(error)}`);
core.setOutput("previous_run_id", "");
}
Comment on lines +12 to +68
}

module.exports = { main };
Loading