Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions src/node/runtime/sshConnectionPool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,26 @@ describe("SSHConnectionPool", () => {
expect(health?.backoffUntil).toBeDefined();
});

test("backoff caps at ~10s with jitter", () => {
const pool = new SSHConnectionPool();
const config: SSHRuntimeConfig = {
host: "test.example.com",
srcBaseDir: "/work",
};

// Report many failures to hit the cap
for (let i = 0; i < 10; i++) {
pool.reportFailure(config, "Connection refused");
}

const health = pool.getConnectionHealth(config)!;
const backoffMs = health.backoffUntil!.getTime() - Date.now();

// Max base is 10s, jitter adds ±20%, so max is ~12s (10 * 1.2)
expect(backoffMs).toBeGreaterThan(7_500); // 10 * 0.8 - some tolerance
expect(backoffMs).toBeLessThanOrEqual(12_500); // 10 * 1.2 + some tolerance
});

test("resetBackoff clears backoff state after failed probe", async () => {
const pool = new SSHConnectionPool();
const config: SSHRuntimeConfig = {
Expand Down Expand Up @@ -317,5 +337,52 @@ describe("SSHConnectionPool", () => {
// Only 1 failure should be recorded (not 3) - proves singleflighting worked
expect(pool.getConnectionHealth(config)?.consecutiveFailures).toBe(1);
});

test("callers waking from backoff share single probe (herd only released on success)", async () => {
const pool = new SSHConnectionPool();
const config: SSHRuntimeConfig = {
host: "test.example.com",
srcBaseDir: "/work",
};

// Put connection in backoff
pool.reportFailure(config, "Initial failure");
expect(pool.getConnectionHealth(config)?.consecutiveFailures).toBe(1);

let probeCount = 0;
const sleepResolvers: Array<() => void> = [];

// Start 3 waiters - they'll all sleep through backoff
const waiters = [1, 2, 3].map(() =>
pool.acquireConnection(config, {
sleep: () =>
new Promise<void>((resolve) => {
sleepResolvers.push(() => {
// When sleep resolves, simulate recovery (mark healthy)
// This happens during the first probe - all waiters share it
if (probeCount === 0) {
probeCount++;
pool.markHealthy(config);
}
resolve();
});
}),
})
);

// Let all sleepers proceed
await Promise.resolve(); // Let all acquireConnection calls reach sleep
expect(sleepResolvers.length).toBe(3);

// Wake them all up "simultaneously"
sleepResolvers.forEach((resolve) => resolve());

// All should succeed
await Promise.all(waiters);

// Only one "probe" (markHealthy) should have happened
expect(probeCount).toBe(1);
expect(pool.getConnectionHealth(config)?.status).toBe("healthy");
});
});
});
17 changes: 13 additions & 4 deletions src/node/runtime/sshConnectionPool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,18 @@ export interface ConnectionHealth {
}

/**
* Backoff schedule in seconds: 1s → 5s → 10s → 20s → 40s → 60s (cap)
* Backoff schedule in seconds: 1s → 2s → 4s → 7s → 10s (cap)
* Kept short to avoid blocking user actions; thundering herd is mitigated by jitter.
*/
const BACKOFF_SCHEDULE = [1, 5, 10, 20, 40, 60];
const BACKOFF_SCHEDULE = [1, 2, 4, 7, 10];

/**
* Add ±20% jitter to prevent thundering herd when multiple clients recover simultaneously.
*/
function withJitter(seconds: number): number {
const jitterFactor = 0.8 + Math.random() * 0.4; // 0.8 to 1.2
return seconds * jitterFactor;
}

/**
* Time after which a "healthy" connection should be re-probed.
Expand Down Expand Up @@ -315,7 +324,7 @@ export class SSHConnectionPool {
const current = this.health.get(key);
const failures = (current?.consecutiveFailures ?? 0) + 1;
const backoffIndex = Math.min(failures - 1, BACKOFF_SCHEDULE.length - 1);
const backoffSecs = BACKOFF_SCHEDULE[backoffIndex];
const backoffSecs = withJitter(BACKOFF_SCHEDULE[backoffIndex]);

this.health.set(key, {
status: "unhealthy",
Expand All @@ -326,7 +335,7 @@ export class SSHConnectionPool {
});

log.warn(
`SSH connection failed (${failures} consecutive). Backoff for ${backoffSecs}s. Error: ${error}`
`SSH connection failed (${failures} consecutive). Backoff for ${backoffSecs.toFixed(1)}s. Error: ${error}`
);
}

Expand Down