diff --git a/docs/lab-notes/2026-04-20-coding-cli-session-contract.md b/docs/lab-notes/2026-04-20-coding-cli-session-contract.md index 34123ed2b..bef4830d8 100644 --- a/docs/lab-notes/2026-04-20-coding-cli-session-contract.md +++ b/docs/lab-notes/2026-04-20-coding-cli-session-contract.md @@ -1,6 +1,6 @@ # Coding CLI Session Contract Lab Note -This note records the real-binary provider probes rerun on `2026-04-26` inside `/home/user/code/freshell/.worktrees/trycycle-codex-session-resilience`. Binary version facts were refreshed on `2026-05-03` inside `/home/user/code/freshell/.worktrees/land-local-main-codex-sidecar-lifecycle`. +This note records the real-binary provider probes rerun on `2026-04-26` inside `/home/user/code/freshell/.worktrees/trycycle-codex-session-resilience`. Binary version facts were refreshed on `2026-05-14` inside `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514`. The implementation plan file is dated `2026-04-19` because the design work was written the day before. This note is dated `2026-04-26` because the real-provider contracts were re-proved on the implementation machine on that date, and that verification date is the one Freshell is allowed to build on. @@ -9,7 +9,7 @@ The implementation plan file is dated `2026-04-19` because the design work was w { "capturedOn": "2026-04-26", "planCreatedOn": "2026-04-19", - "dateReason": "The plan was drafted on 2026-04-19, but the checked-in note is dated 2026-04-26 because that is when the durable behavior contract was re-proved on the implementation machine and the earlier 2026-04-23 contract capture was superseded by the newer provider behavior. Binary version facts were refreshed on 2026-05-03 after the installed provider versions changed.", + "dateReason": "The plan was drafted on 2026-04-19, but the checked-in note is dated 2026-04-26 because that is when the durable behavior contract was re-proved on the implementation machine and the earlier 2026-04-23 contract capture was superseded by the newer provider behavior. Binary version facts were refreshed on 2026-05-14 after the installed provider versions changed.", "cleanup": { "liveProcessAuditCommand": "ps -eo pid,ppid,stat,cmd --sort=pid | rg \"codex|claude|opencode\"", "ownershipReportFields": [ @@ -37,7 +37,7 @@ The implementation plan file is dated `2026-04-19` because the design work was w "codex": { "executable": "codex", "resolvedPath": "/home/user/.npm-global/bin/codex", - "version": "codex-cli 0.128.0", + "version": "codex-cli 0.130.0", "freshRemoteBootstrapCommand": "codex --remote ", "freshRemoteBootstrapEventsBeforeUserTurn": [ "connection", @@ -60,8 +60,11 @@ The implementation plan file is dated `2026-04-19` because the design work was w ], "remoteResumeBootstrapFollowupMethods": [ "account/rateLimits/read", + "command/exec", + "hooks/list", "skills/list", - "skills/list" + "skills/list", + "thread/goal/get" ], "freshRemoteAllocatesThreadBeforeUserTurn": true, "shellSnapshotGlob": ".codex/shell_snapshots/*.sh", @@ -81,7 +84,7 @@ The implementation plan file is dated `2026-04-19` because the design work was w "executable": "claude", "resolvedPath": "/home/user/bin/claude", "isolatedBinaryPath": "/home/user/.local/bin/claude", - "version": "2.1.126 (Claude Code)", + "version": "2.1.140 (Claude Code)", "exactIdCommandTemplate": "HOME= /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --session-id ", "namedResumeCommandTemplate": "HOME= /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --resume [--name ] <prompt>", "transcriptGlob": ".claude/projects/*/<uuid>.jsonl", @@ -94,7 +97,7 @@ The implementation plan file is dated `2026-04-19` because the design work was w "opencode": { "executable": "opencode", "resolvedPath": "/home/user/.opencode/bin/opencode", - "version": "1.14.33", + "version": "1.14.50", "runCommandTemplate": "opencode run <prompt> --format json --dangerously-skip-permissions", "serveCommandTemplate": "opencode serve --hostname 127.0.0.1 --port <port>", "globalHealthPath": "/global/health", @@ -102,6 +105,7 @@ The implementation plan file is dated `2026-04-19` because the design work was w "canonicalIdentity": "session-id", "runEventSessionIdMatchesDbId": true, "busyStatusUsesAuthoritativeSessionId": true, + "attachFormatJsonEmitsEvents": false, "titleOnResumeMutatesStoredTitle": false, "sessionSubcommands": [ "list", @@ -138,10 +142,10 @@ command -v codex # /home/user/.npm-global/bin/codex codex --version -# codex-cli 0.128.0 +# codex-cli 0.130.0 ``` -This 2026-05-03 version refresh supersedes the older `codex-cli 0.125.0` capture. The current version of record on this machine is `codex-cli 0.128.0`. +This 2026-05-14 version refresh supersedes the older `codex-cli 0.128.0` capture. The current version of record on this machine is `codex-cli 0.130.0`. Fresh remote bootstrap was probed with a loopback websocket stub and: @@ -160,7 +164,7 @@ Before any user turn, the CLI opened a connection and issued: That proves fresh `codex --remote` allocates a thread during bootstrap, before the first user turn, but that thread allocation is not yet the durable contract Freshell may persist. -The remote resume form was re-proved through a websocket proxy in front of the real app-server. Before any user turn, `codex --remote <ws> --no-alt-screen resume <sessionId>` issued the stable prefix through `thread/resume`, and then the follow-up `skills/list` and `account/rateLimits/read` calls. The trailing post-resume follow-up order was observed to vary between reruns on the same binary, so only the stable prefix plus the required follow-up method set is treated as contract. +The remote resume form was re-proved through a websocket proxy in front of the real app-server. Before any user turn, `codex --remote <ws> --no-alt-screen resume <sessionId>` issued the stable prefix through `thread/resume`, and then the follow-up `account/rateLimits/read`, `command/exec`, `hooks/list`, `skills/list`, and `thread/goal/get` calls. The trailing post-resume follow-up order was observed to vary between reruns on the same binary, so only the stable prefix plus the required follow-up method set is treated as contract. Real provider-owned durability was re-proved against the app-server websocket with: @@ -229,6 +233,19 @@ Allowed Freshell behavior: - Freshell may only persist canonical Codex identity after the durable `.jsonl` artifact exists at the provider-reported `thread.path`. - Freshell must not treat the bootstrap `thread/start` id as durable restore identity. +### 2026-05-14 Codex restore decision addendum + +The `da2e0076` refactor added a design constraint that belongs with the provider contract: deterministic Codex restore needs one typed create/restore decision path, not only a correct rollout proof reader. Restore-like entry points must make the same decision about canonical `sessionRef`, captured candidate proof, live attach after proof failure, fresh create, and legacy raw resume. Keeping those choices local to each caller risks separate restore semantics. + +Design-level change recorded from `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514`: `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/server/coding-cli/codex-app-server/restore-decision.ts` now owns `planCodexCreateRestoreDecision` and `resolveCodexCreateRestoreDecision`, and `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/server/ws-handler.ts` routes Codex `terminal.create` and reopen handling through it. This is a narrow centralization, not a claim that every surface is done. + +Follow-up constraints: + +- Move exact live-candidate matching into the central module or make its typed input contract require enough live candidate identity for the module to verify `candidateThreadId` and `rolloutPath`. +- Remove or replace `legacy_raw_resume_passthrough`; raw resume should not remain a durable restore identity path. +- Extend the same decision path to REST, MCP, CLI, and any future restore-like surface instead of maintaining parallel semantics. +- Add surface matrix tests so coverage proves all entry points use the same restore decisions, not just the decision module and the current websocket route. + ## Claude Version and binaries: @@ -238,7 +255,7 @@ command -v claude # /home/user/bin/claude claude --version -# 2.1.126 (Claude Code) +# 2.1.140 (Claude Code) ``` The wrapper at `/home/user/bin/claude` shells out to `/home/user/.local/bin/claude`. The isolated probes used the actual binary and overrode `HOME` to keep persistence inside the probe temp root. @@ -287,7 +304,7 @@ command -v opencode # /home/user/.opencode/bin/opencode opencode --version -# 1.14.33 +# 1.14.50 ``` Fresh isolated runs were probed with: @@ -312,9 +329,10 @@ curl http://127.0.0.1:<port>/session/status Observed control behavior: -- `/global/health` returned a healthy payload with version `1.14.33`. +- `/global/health` returned a healthy payload with version `1.14.50`. - `/session/status` returned `{}` while idle. -- During an attached `opencode run ... --attach http://127.0.0.1:<port>`, `/session/status` returned the same authoritative `sessionID` with `{ "type": "busy" }`. +- During an attached `opencode run ... --attach http://127.0.0.1:<port>`, `/session/status` returned an authoritative `sessionID` with `{ "type": "busy" }`, and the same id was persisted as a `session.id` row in the isolated OpenCode database. +- On OpenCode `1.14.50`, attached `opencode run ... --attach ... --format json` exited successfully but emitted no JSON event lines on stdout, so attached-mode identity must come from `/session/status` plus the persisted database row rather than attached-run stdout. Title semantics were probed with: diff --git a/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md b/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md new file mode 100644 index 000000000..1275f5bca --- /dev/null +++ b/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md @@ -0,0 +1,739 @@ +# Coding CLI Session Restore Research + +This is the primary research record for how Freshell should identify, persist, and restore sessions for Codex, Claude Code, and OpenCode. Consult this file before changing session identity, restore, resume, sidebar, or terminal recovery behavior. + +## What matters + +| Provider | Deterministic restore identity | What works | What fails or must not be used | Not fully studied | +| --- | --- | --- | --- | --- | +| Codex | The rollout-backed root TUI `ThreadId` after the exact provider-reported `.codex/sessions/YYYY/MM/DD/rollout-*.jsonl` file exists and starts with matching `session_meta`. | Fresh `codex --remote` creates a thread before user work; Freshell can capture that pre-durable candidate after installing listeners, then promote only after the exact rollout file proves the same root TUI `ThreadId`. `turn/completed` is the required proof-check boundary, not proof itself. | Pre-creating an app-server thread and launching the TUI with `codex resume <threadId>` before the rollout file exists fails with `no rollout found for thread id`. Cwd, time, title, shell snapshot, and bare pre-durable thread id are not durable restore identity. If proof fails after `turn/completed`, Freshell must show a degraded/error state and use only deterministic one-shot repair triggers. | Full long-idle and restart behavior still needs product-level coverage, but the identity contract is known. | +| Claude Code | The UUID-backed transcript file under `.claude/projects/*/<uuid>.jsonl`. | `--session-id <uuid>` creates a durable transcript, and `--resume <uuid>` restores it. | Titles and names are mutable metadata only. The old title stops resolving after rename. | The proof covers print-mode session creation/resume/rename; broader interactive TUI edge cases are not the source of truth here. | +| OpenCode | The authoritative `sessionID` from JSON events, the DB row, and `/session/status`. | JSON `step_start` session id matches the DB session id; `/session/status` reports the same busy id while attached. | Titles are metadata and do not replace session identity. No rename subcommand was present in the tested mode. | Full interactive TUI restart and long-idle behavior still needs product-level coverage. | + +## Freshell rules + +- Never infer a coding-agent restore identity from cwd, launch time, tab title, pane title, or proximity. +- For Codex, capture the pre-durable root TUI `ThreadId` candidate before allowing user input, but persist it as a candidate only; promote it to canonical durable identity only after the exact rollout path returned by Codex exists and starts with parseable `session_meta` whose `payload.id` matches the candidate `ThreadId`. +- For Codex, `turn/completed` is the mandatory proof-check boundary. It is not itself proof of durable restore. On that event, Freshell must run one exact proof read and either promote to durable or mark `durability_unproven_after_completion`. +- For Codex, a post-completion proof failure is not a normal grey/live-only steady state. Later Codex events, `fs/changed`, PTY exit, app-server websocket close/error, and user restore/list/open actions may each trigger one exact repair proof read, but Freshell must not start periodic or backoff read loops. +- For Codex, do not try to prevent restore loss by pre-creating an app-server thread and TUI-resuming it before rollout materialization; the real binary rejected that path. +- For Claude Code, persist the UUID transcript identity, not the visible title or `--name` value. +- For OpenCode, promote only from authoritative provider surfaces: JSON events, the DB/session row, or `/session/status`. +- Cleanup for probes must never stop real user sessions; only processes tagged with the current temp root and sentinel are safe to stop. + +## Scope and provenance + +The real-binary provider probes were rerun on `2026-04-26` inside `/home/user/code/freshell/.worktrees/trycycle-codex-session-resilience`. Binary version facts were refreshed on `2026-05-03` inside `/home/user/code/freshell/.worktrees/land-local-main-codex-sidecar-lifecycle`; the Claude Code binary version fact was refreshed again on `2026-05-06` inside `/home/user/code/freshell/.worktrees/codex-sidebar-reopen-corner-origin-pr-20260505` after the installed binary changed. A targeted Codex pre-durable resume and identity-capture experiment was run on `2026-05-13` inside `/home/user/code/freshell/.worktrees/dev` using isolated temp roots. + +The later version-only refreshes did not re-prove the full behavior contract, so `capturedOn` remains `2026-04-26`; the `2026-05-13` experiment is recorded as a narrow Codex addendum. A Codex source-code study was added on `2026-05-13` against the locally installed `@openai/codex` package and the official upstream `openai/codex` tag `rust-v0.130.0`. + +The implementation plan file is dated `2026-04-19` because the design work was written the day before. This research record is dated `2026-05-13` because it now includes the targeted Codex pre-durable resume experiment. The durable behavior contract date remains `2026-04-26`, because that is when the full real-provider contract was re-proved on the implementation machine and that verification date is the one Freshell is allowed to build on. + +The real-provider harness parses the next section. Keep the `## Machine-readable contract` heading and the fenced JSON block intact when editing this file. + +## Machine-readable contract +```json +{ + "capturedOn": "2026-04-26", + "planCreatedOn": "2026-04-19", + "binaryVersionFactsRefreshedOn": "2026-05-06", + "dateReason": "The plan was drafted on 2026-04-19, but the checked-in note is dated 2026-04-26 because that is when the durable behavior contract was re-proved on the implementation machine and the earlier 2026-04-23 contract capture was superseded by the newer provider behavior. Binary version facts were refreshed on 2026-05-03 after installed provider versions changed, and the Claude Code binary version fact was refreshed on 2026-05-06 after the local installed binary changed to 2.1.132. These later version-only refreshes did not re-prove the behavior contract.", + "cleanup": { + "liveProcessAuditCommand": "ps -eo pid,ppid,stat,cmd --sort=pid | rg \"codex|claude|opencode\"", + "ownershipReportFields": [ + "pid", + "ppid", + "cwd", + "tempHome", + "sentinelPath", + "safeToStop", + "command" + ], + "safeToStopRequires": [ + "FRESHELL_PROBE_HOME must match the current temp root.", + "FRESHELL_PROBE_SENTINEL must match the current sentinel path." + ], + "safeExamples": [ + "Probe-owned temp-home root processes and their descendants tagged by the current harness sentinel." + ], + "unsafeExamples": [ + "Real user codex, claude, or opencode sessions under the user home.", + "Any process that lacks the current harness sentinel metadata." + ] + }, + "providers": { + "codex": { + "executable": "codex", + "resolvedPath": "/home/user/.npm-global/bin/codex", + "version": "codex-cli 0.130.0", + "freshRemoteBootstrapCommand": "codex --remote <ws>", + "freshRemoteBootstrapEventsBeforeUserTurn": [ + "connection", + "initialize", + "initialized", + "account/read", + "account/read", + "model/list", + "thread/start" + ], + "remoteResumeBootstrapStablePrefix": [ + "connection", + "initialize", + "initialized", + "account/read", + "thread/read", + "account/read", + "model/list", + "thread/resume" + ], + "remoteResumeBootstrapFollowupMethods": [ + "account/rateLimits/read", + "command/exec", + "hooks/list", + "skills/list", + "skills/list", + "thread/goal/get" + ], + "freshRemoteAllocatesThreadBeforeUserTurn": true, + "shellSnapshotGlob": ".codex/shell_snapshots/*.sh", + "durableArtifactGlob": ".codex/sessions/YYYY/MM/DD/rollout-*.jsonl", + "freshInteractiveCreatesShellSnapshotBeforeTurn": true, + "freshInteractiveCreatesDurableSessionBeforeTurn": false, + "appServerThreadPathAvailableBeforeArtifact": true, + "appServerMissingPathWatchAccepted": true, + "appServerMissingParentWatchAccepted": true, + "appServerWatchEchoesCallerWatchId": false, + "appServerArtifactMaterializesAtReportedPath": true, + "appServerChangedPathsMentionRolloutPath": false, + "resumeCommandTemplate": "codex --remote <ws> --no-alt-screen resume <threadId>", + "preDurableResumeExperimentCapturedOn": "2026-05-13", + "preDurableResumeCommandTemplate": "codex --remote <ws> --no-alt-screen resume <threadId>", + "preDurableResumeBeforeRolloutWorks": false, + "preDurableResumeFailureFragment": "no rollout found for thread id", + "freshRemoteThreadStartedDelayMs": 641, + "preDurableIdentityCaptureStrategy": "Launch fresh remote TUI only after listener installation, then block user input until thread/started is persisted.", + "codexIdentityNames": { + "rootTuiThreadId": "Provider ThreadId observed from thread/start or thread/started for the root TUI thread.", + "rolloutProofId": "The payload.id value from the first rollout JSONL record when type is session_meta.", + "resumeId": "The same root TUI ThreadId passed to codex --remote <ws> --no-alt-screen resume <threadId>.", + "ambiguousTermsToAvoid": [ + "generic session id", + "provider session_id" + ] + }, + "turnCompletedIsDurabilityProof": false, + "noPollingPromotionSupported": "yes_with_required_completion_proof_check_and_event_driven_repair", + "noPollingCanonicalPromotionStrategy": "Use turn/completed for the candidate root TUI ThreadId as the normal proof-check boundary. On that event, immediately do one exact proof read of the stored provider-reported rollout path and promote only if the first JSONL record is matching session_meta. fs/changed, later Codex events, PTY exit, app-server websocket close/error, and user-initiated restore/list/open actions are repair opportunities, not the normal success path.", + "noPollingPromotionGuarantee": "No periodic or backoff existence/read loop. Durable restore is allowed to be unproven before a Codex turn completes. After turn/completed, a missing, unreadable, empty, malformed, or mismatched rollout proof is durability_unproven_after_completion and must be visible as degraded/error state.", + "proofReadContract": { + "trigger": "turn/completed", + "path": "stored provider-reported rolloutPath", + "read": "one exact read of the rollout path", + "success": "regular readable JSONL file whose first record has type session_meta and payload.id equal to candidateThreadId", + "failureStateAfterTurnCompleted": "durability_unproven_after_completion", + "timerLoopAllowed": false + }, + "durabilityStateModel": { + "identity_pending": { + "canonical": false, + "userInput": "blocked", + "sidebar": "Starting Codex; restore identity not captured.", + "userCan": "wait, close, or start a fresh pane" + }, + "captured_pre_turn": { + "canonical": false, + "userInput": "allowed after the candidate write succeeds", + "sidebar": "Codex identity captured; restore proof pending before first turn.", + "userCan": "work in the live terminal" + }, + "turn_in_progress_unproven": { + "canonical": false, + "userInput": "allowed while live terminal is healthy", + "sidebar": "Codex turn running; restore proof pending.", + "userCan": "continue live work, with restore not yet guaranteed" + }, + "proof_checking": { + "canonical": false, + "userInput": "allowed if the live terminal remains attachable", + "sidebar": "Checking Codex restore proof.", + "userCan": "keep using the live terminal while the exact proof read is in flight" + }, + "durable": { + "canonical": true, + "userInput": "allowed", + "sidebar": "Codex session restorable.", + "userCan": "restore or reopen using the durable root TUI ThreadId" + }, + "durability_unproven_after_completion": { + "canonical": false, + "userInput": "allowed only through an attachable live terminal", + "sidebar": "Codex restore proof failed after turn completion.", + "userCan": "attach live if available, trigger one-shot repair by restore/list/open, or start fresh" + }, + "non_restorable": { + "canonical": false, + "userInput": "fresh terminal only", + "sidebar": "Codex session not restorable.", + "userCan": "open a fresh Codex terminal" + } + }, + "repairTriggers": [ + { + "name": "later_codex_event", + "semantics": "On a later Codex notification/response that is deterministically tied to the candidate root TUI ThreadId, run one exact proof read. Promote on success; remain degraded on failure." + }, + { + "name": "fs_changed", + "semantics": "On fs/changed for the exact rollout path or watched parent, run one exact proof read. Promote on success; remain degraded on failure." + }, + { + "name": "pty_exit", + "semantics": "On PTY exit, run one exact proof read before deciding whether the captured session is durable, still pre-completion lenient, or non_restorable." + }, + { + "name": "app_server_websocket_close_or_error", + "semantics": "On app-server websocket close/error, run one exact proof read for the captured candidate. Promote on success; otherwise keep or enter degraded/non-restorable state according to live terminal availability." + }, + { + "name": "user_restore_list_open", + "semantics": "On user restore, list, or open for a captured-but-unproven Codex session, run one exact proof read first. This is a repair path, not the normal success path." + } + ], + "capturedUnprovenReopenPolicy": { + "firstStep": "Run one exact proof read of the stored rolloutPath.", + "onProofSuccess": "Promote to durable and resume with the proven root TUI ThreadId.", + "onProofFailureLiveAttachable": "Attach the existing live terminal and keep the degraded/unproven state visible.", + "onProofFailureLiveMissing": "Create a fresh Codex terminal and show a clear message that the captured session could not be proven restorable.", + "forbidden": [ + "cwd_time_title_matching", + "shell_snapshot_identity", + "hidden_hook_configuration", + "fake_or_mutating_provider_writes" + ] + }, + "inputGatePurpose": "Block user-originating PTY input only until Freshell has captured and durably saved Codex's candidate root TUI ThreadId and provider-reported rollout path. The gate is not waiting for the rollout file to exist.", + "turnCompletionDurabilityContract": "Before a Codex turn completes, canonical restore may be unproven. When turn/completed arrives for the candidate root TUI ThreadId, Freshell must immediately proof-read the exact rollout path. Completion is the required proof-check boundary, not proof itself, because Codex can warn on rollout flush failure and still complete the turn.", + "mutableNameSurface": "absent" + }, + "claude": { + "executable": "claude", + "resolvedPath": "/home/user/bin/claude", + "isolatedBinaryPath": "/home/user/.local/bin/claude", + "version": "2.1.132 (Claude Code)", + "exactIdCommandTemplate": "HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --session-id <uuid> <prompt>", + "namedResumeCommandTemplate": "HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --resume <title-or-uuid> [--name <title>] <prompt>", + "transcriptGlob": ".claude/projects/*/<uuid>.jsonl", + "canonicalIdentity": "uuid-transcript", + "namedResumeWorksInPrintMode": true, + "renameMutatesMetadataOnly": true, + "oldTitleStopsResolvingAfterRename": true, + "oldTitleErrorFragment": "does not match any session title" + }, + "opencode": { + "executable": "opencode", + "resolvedPath": "/home/user/.opencode/bin/opencode", + "version": "1.14.41", + "runCommandTemplate": "opencode run <prompt> --format json --dangerously-skip-permissions", + "serveCommandTemplate": "opencode serve --hostname 127.0.0.1 --port <port>", + "globalHealthPath": "/global/health", + "sessionStatusPath": "/session/status", + "canonicalIdentity": "session-id", + "runEventSessionIdMatchesDbId": true, + "busyStatusUsesAuthoritativeSessionId": true, + "titleOnResumeMutatesStoredTitle": false, + "sessionSubcommands": [ + "list", + "delete" + ] + } + } +} +``` + +## Process audit and cleanup + +The live process audit was run with: + +```bash +ps -eo pid,ppid,stat,cmd --sort=pid | rg "codex|claude|opencode" +``` + +That audit showed live user sessions for all three providers outside the temp homes used for the probes. Those processes must never be stopped by cleanup. + +The checked-in harness therefore only stops processes when both provenance checks succeed: + +1. `FRESHELL_PROBE_HOME` matches the current temp root. +2. `FRESHELL_PROBE_SENTINEL` matches the current sentinel file. + +Before cleanup runs, the harness emits a dry-run ownership report containing `pid`, `ppid`, `cwd`, `tempHome`, `sentinelPath`, `safeToStop`, and `command` for every candidate PID in the probe-owned process tree. Cleanup aborts if any candidate lacks the expected temp-home or sentinel metadata. + +## Codex evidence + +### Version + +```bash +command -v codex +# /home/user/.npm-global/bin/codex + +codex --version +# codex-cli 0.130.0 +``` + +This `2026-05-13` version refresh supersedes the older `codex-cli 0.129.0` capture. The current version of record on this machine is `codex-cli 0.130.0`. + +### Fresh remote startup + +Fresh remote bootstrap was probed with a loopback websocket stub and: + +```bash +CODEX_HOME=<temp-root>/.codex codex --remote <ws> --no-alt-screen +``` + +Before any user turn, the CLI opened a connection and issued: + +1. `initialize` +2. `initialized` +3. `account/read` +4. `account/read` +5. `model/list` +6. `thread/start` + +That proves fresh `codex --remote` allocates a thread during bootstrap, before the first user turn. This thread allocation is useful for preventing untracked user work, but it is not yet the durable restore identity. + +### Remote resume + +The remote resume form was re-proved through a websocket proxy in front of the real app-server. Before any user turn, `codex --remote <ws> --no-alt-screen resume <threadId>` issued the stable prefix through `thread/resume`, followed by `skills/list`, `account/rateLimits/read`, `command/exec`, `hooks/list`, and `thread/goal/get` calls. + +The trailing post-resume follow-up order varied between reruns on the same binary, so only the stable prefix plus the required follow-up method set is treated as contract. + +### Durable artifact creation + +Real provider-owned durability was re-proved against the app-server websocket with: + +```bash +CODEX_HOME=<temp-root>/.codex codex app-server --listen <ws> +# JSON-RPC: +# initialize +# thread/start +# turn/start +# thread/resume +``` + +Observed provider-owned artifacts: + +- After `thread/start` and before `turn/start`: a shell snapshot under `.codex/shell_snapshots/*.sh`. +- After `thread/start` and before `turn/start`: no `.codex/sessions/**.jsonl` durable artifact. +- `thread/start` already returned `thread.ephemeral: false` and a concrete `thread.path` under `.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`. +- Immediately after `thread/start`, neither the rollout file nor its date directory existed yet. +- `fs/watch` accepted caller-supplied `watchId` values for both the missing rollout path and the missing parent directory and returned only the canonicalized watched `path`. +- After the first real `turn/start`: a durable artifact under `.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`. +- After the first real `turn/start`: the durable artifact appeared at the exact `thread.path`. +- In the `2026-04-26` rerun, no `fs/changed` notification was observed for the newly materialized rollout path within the historical timeout, so durable detection must not depend on that notification. + +Short JSON-ish transcript from the `2026-04-26` rerun: + +```json +{ + "thread/start": { + "thread": { + "id": "<uuid>", + "ephemeral": false, + "path": "<temp-root>/.codex/sessions/2026/04/23/rollout-...jsonl" + } + }, + "preTurn": { + "rolloutExists": false, + "parentExists": false + }, + "fs/watch": [ + { + "watchId": "probe-rollout-path", + "result": { "path": "<same rollout path>" } + }, + { + "watchId": "probe-rollout-parent", + "result": { "path": "<same parent directory>" } + } + ], + "fs/changed": null +} +``` + +The durable restore path that worked after restarting the app-server runtime was: + +```bash +thread/resume <threadId> +turn/start <threadId> +``` + +### Pre-durable resume and input-gating experiment + +This targeted `2026-05-13` experiment tested whether Freshell can prevent un-restorable fresh Codex work by pre-creating the Codex app-server thread, persisting that `thread.id`, and then launching the user-facing TUI against the pre-created thread before any rollout artifact exists. + +The isolated setup used: + +```bash +CODEX_HOME=<temp-root>/.codex /home/user/.npm-global/bin/codex app-server --listen ws://127.0.0.1:<port> + +# JSON-RPC over the app-server websocket: +# initialize +# thread/start { cwd: <temp-cwd>, persistExtendedHistory: true } + +CODEX_HOME=<same-temp-root>/.codex /home/user/.npm-global/bin/codex --remote ws://127.0.0.1:<port> --no-alt-screen resume <threadId> +``` + +Result: + +- `thread/start` returned a persistable `thread.id` and exact future `thread.path`. +- The rollout artifact and parent date directory did not exist immediately after `thread/start`. +- The pre-created-thread TUI resume read that exact in-memory thread successfully with `thread/read`. +- The same TUI then failed `thread/resume` with `no rollout found for thread id <threadId>`. +- No real model prompt was sent during the failed pre-durable resume experiment. + +Measured timings from the isolated run: + +| Phase | Elapsed | +| --- | ---: | +| app-server spawn to websocket accepting | 316.9 ms | +| `initialize` request to response | 33.9 ms | +| `thread/start` request to response | 559.7 ms | +| first `thread/started` notification from probe start | 1006.9 ms | +| pre-durable resume TUI spawn to proxy connection | 450.3 ms | +| pre-durable resume TUI `thread/read` success | 2.9 ms | +| pre-durable resume TUI `thread/resume` failure | 1.9 ms | +| fresh remote TUI spawn to `thread/start` response | 638.0 ms | +| fresh remote TUI spawn to `thread/started` notification | 640.9 ms | + +Conclusion: + +- Pre-creating a thread via app-server and then attaching the user-facing TUI with `codex resume <threadId>` before rollout materialization is not a viable prevention strategy. +- Fresh remote TUI launch after listener installation is viable for identity capture: in this run the thread identity was available about 641 ms after TUI spawn. +- To prevent untracked user work, Freshell must block terminal input until `thread/started` has been observed and the pre-durable candidate identity has been persisted. +- The pre-durable `thread.id` is useful as a captured candidate identity, but it is not a durable restore identity until the exact rollout artifact exists at the provider-reported `thread.path`. + +### Codex source-code study + +This study used the installed launcher at `/home/user/.npm-global/lib/node_modules/@openai/codex` and the official upstream source `openai/codex` tag `rust-v0.130.0`, commit `58573da43ab697e8b79f152c53df4b42230395a8`, cloned at `/tmp/codex-rust-v0.130.0`. The installed npm package contains the JavaScript native-binary launcher and package metadata; the Rust TUI, app-server, protocol, thread-store, and rollout code live in the official upstream repository. + +Source locations studied: + +- `/home/user/.npm-global/lib/node_modules/@openai/codex/package.json`: version `0.130.0`, upstream repository `https://github.com/openai/codex.git`, package directory `codex-cli`, and platform-native optional dependencies. +- `/home/user/.npm-global/lib/node_modules/@openai/codex/bin/codex.js`: locates the platform binary and execs it with inherited stdio; it does not implement session identity. +- `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/lib.rs`: remote app-server connection and resume lookup. +- `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app.rs`: fresh/resume startup ordering and TUI input event loop. +- `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app_server_session.rs`: TUI JSON-RPC calls for `thread/start`, `thread/resume`, `thread/read`, and `turn/start`. +- `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/session_state.rs`: TUI stores `thread_id` and optional `rollout_path`. +- `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs`: `thread/start`, `thread/read`, and `thread/resume` behavior. +- `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/turn_processor.rs`: `turn/start` converts app-server input into core `Op::UserInput`. +- `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/fs_watch.rs` and `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/fs.rs`: `fs/watch` and `fs/changed`. +- `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/common.rs`: public JSON-RPC method set, including `fs/watch`, `turn/start`, and no public `thread/persist`-style method. +- `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/thread_data.rs`: `thread.path` is explicitly marked `[UNSTABLE]`. +- `/tmp/codex-rust-v0.130.0/codex-rs/core/src/thread_manager.rs`, `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/session.rs`, and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/mod.rs`: session startup, `SessionConfigured`, rollout path propagation, and materialization hooks. +- `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/turn.rs` and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/handlers.rs`: first user input is recorded and then forces rollout materialization. +- `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs`: fresh rollout path precomputation, deferred writer open, `persist()`, `flush()`, and `session_meta` write ordering. +- `/tmp/codex-rust-v0.130.0/codex-rs/thread-store/src/local/read_thread.rs`: stored-thread lookup, rollout existence checks, and the `no rollout found for thread id` path. +- `/tmp/codex-rust-v0.130.0/codex-rs/core/src/hook_runtime.rs`: `SessionStart` hook transcript path obtains a materialized rollout internally. +- `/tmp/codex-rust-v0.130.0/codex-rs/core/src/shell_snapshot.rs`: shell snapshot lifecycle. + +#### Remote TUI startup and candidate identity + +The remote TUI connects with `client_name: "codex-tui"`, `experimental_api: true`, and `opt_out_notification_methods: Vec::new()` in `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/lib.rs:378`. That means Freshell can observe normal app-server responses and notifications when it owns the remote websocket proxy. + +For fresh sessions, `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app.rs:734` awaits `app_server.start_thread(&config)` before it constructs the chat widget; `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app_server_session.rs:328` sends `ClientRequest::ThreadStart`; `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app_server_session.rs:1329` copies `response.thread.id` and `response.thread.path` into `ThreadSessionState`; and `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/session_state.rs:27` stores `thread_id` plus optional `rollout_path`. + +On the app-server side, `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1102` builds the API thread from the `SessionConfigured` event, including `session_configured.rollout_path`; `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1156` builds the `ThreadStartResponse`; `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1170` creates the `thread/started` notification; and `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1171` sends the response before `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1180` sends the notification. Therefore a Freshell websocket proxy can capture the same candidate from either the `thread/start` response or the later `thread/started` notification. The notification is useful as a provider event surface, but the response is the earlier source-supported surface. + +The TUI itself does not start its main terminal event loop until after it enqueues the started thread in `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app.rs:913`. Once the loop is running, `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app.rs:1018` reads terminal events and `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app.rs:1082` dispatches keys and paste. Freshell still needs a PTY-side input gate because terminal bytes can be queued outside Codex before Freshell has atomically persisted the observed candidate. + +#### Rollout path is announced before the rollout exists + +The app-server integration test at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/tests/suite/v2/thread_start.rs:147` asserts the fresh `thread.path` is absolute and `/tmp/codex-rust-v0.130.0/codex-rs/app-server/tests/suite/v2/thread_start.rs:149` asserts it does not yet exist. The same test waits for the `thread/started` notification at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/tests/suite/v2/thread_start.rs:186` and asserts no preceding `thread/status/changed` for the new thread at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/tests/suite/v2/thread_start.rs:194`. + +The rollout recorder explains why. In `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:680`, the create path calls `precompute_log_file_info`, captures `path`, and constructs `SessionMeta`, but returns `None` for the writer and `Some(log_file_info)` at `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:718`. A fresh thread therefore has an in-memory rollout path and session metadata before the file is opened. + +Materialization happens only when persistence is forced or pending items require a write. `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1494` makes `add_items` a no-op for the filesystem while the writer is deferred; `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1503` makes `persist()` write even when there are no pending items; `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1507` makes `flush()` return without creating a file when the writer is deferred and there are no pending items; `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1576` opens the deferred writer; `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1614` opens the writer, writes session metadata, writes pending items, and flushes; and `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1714` writes `RolloutItem::SessionMeta`. + +The metadata line has the durable root TUI `ThreadId` Freshell needs to validate. `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2703` defines `SessionMeta { id: ThreadId, ... }`; `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2759` defines the JSONL `SessionMetaLine`; and `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2767` wraps it as the `session_meta` rollout item. Because the writer opens before the first line is written, a plain `exists()` check can observe a transient empty file. The deterministic promotion proof should require the exact provider-reported path to exist, be readable as JSONL, and begin with `payload.id == candidateThreadId` on a `session_meta` record. + +`thread.path` is useful but not a stable protocol guarantee by itself: `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/thread_data.rs:125` marks the path field `[UNSTABLE]`. Freshell should version/probe this provider surface and keep the direct rollout proof as the durable promotion gate. + +#### First user input is the materialization trigger + +`turn/start` is the first app-server request that accepts user work. `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/app_server_session.rs:520` sends `ClientRequest::TurnStart`; `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/turn_processor.rs:348` maps app-server input to core input items; `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/turn_processor.rs:449` starts the turn by submitting `Op::UserInput` or `Op::UserInputWithTurnContext`; and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/handlers.rs:233` creates the turn context before `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/handlers.rs:239` steers user input into the active turn. + +After the prompt is accepted, `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/turn.rs:328` records the user prompt, and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/mod.rs:2976` persists the prompt to history, emits the UI item, then calls `ensure_rollout_materialized()` at `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/mod.rs:2990`. That method calls `live_thread.persist()` through `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/mod.rs:1072`. + +Before Codex emits turn completion, `/tmp/codex-rust-v0.130.0/codex-rs/core/src/tasks/mod.rs:396` calls `sess.flush_rollout().await`. The important caveat is the error path: `/tmp/codex-rust-v0.130.0/codex-rs/core/src/tasks/mod.rs:397` logs the flush failure, `/tmp/codex-rust-v0.130.0/codex-rs/core/src/tasks/mod.rs:398` through `/tmp/codex-rust-v0.130.0/codex-rs/core/src/tasks/mod.rs:406` sends a warning that the transcript failed to save and Codex will retry, and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/tasks/mod.rs:410` still finishes the task when the turn was not cancelled. The app-server exposes that task finish as `turn/completed`: `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/common.rs:1429` defines the notification method, and `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/bespoke_event_handling.rs:1278` through `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/bespoke_event_handling.rs:1299` emits it with the thread id and turn id. + +This gives Freshell a practical no-polling contract, but only if the boundary is named precisely: `turn/completed` is the required proof-check boundary, not proof itself. Durable restore does not need to be proven before the first Codex turn completes. When the turn-completed event arrives for the captured root TUI `ThreadId`, Freshell must immediately do one proof read of the exact provider-reported rollout path. If that proof read fails, the session enters `durability_unproven_after_completion`, a visible restore-durability failure state. It is not acceptable to leave it green, grey, live-only, or captured-not-canonical as a normal steady state past turn completion. + +The reason to block typing is therefore narrow. The gate is not waiting for durable restore. The gate only prevents the user's first prompt from reaching Codex before Freshell has captured and saved the Codex candidate root TUI `ThreadId` and provider-reported rollout path. Once that candidate is durably saved by Freshell, user input can be released even though the rollout file may not exist yet. + +This proves the normal first-turn path should materialize the rollout promptly after the user prompt is accepted and should flush it before turn completion. It does not prove a zero-risk crash window between forwarding the first `turn/start` and observing a parseable rollout file. No public app-server method named like `thread/persist` or `thread/materialize` appears in the public request set around `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/common.rs:699` through `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/common.rs:777`. Therefore strict prevention of all un-restorable first-turn bytes is not source-supported by a public pre-turn materialization RPC in this version. Under the accepted product leniency, that is tolerable only until the first turn completes. + +#### Why pre-create plus TUI resume is not viable + +The TUI resume lookup path explains the mixed result from the `2026-05-13` experiment. `/tmp/codex-rust-v0.130.0/codex-rs/tui/src/lib.rs:579` parses a UUID and calls `thread/read(... include_turns=false)`. The app-server allows metadata-only reads from live in-memory state before persistence: `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:1950` falls back to a live thread snapshot when persisted metadata is missing. + +`thread/resume` is different. `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:2290` handles resume; `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:2336` first tries to resume a running thread; but `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:2637` still calls `read_stored_thread_for_resume(... include_history=true)` for a running thread id before it attaches. The local thread store requires an existing rollout: `/tmp/codex-rust-v0.130.0/codex-rs/thread-store/src/local/read_thread.rs:66` resolves the rollout path, `/tmp/codex-rust-v0.130.0/codex-rs/thread-store/src/local/read_thread.rs:68` returns `no rollout found for thread id` if it cannot, and `/tmp/codex-rust-v0.130.0/codex-rs/thread-store/src/local/read_thread.rs:168` only accepts the live writer path when `try_exists(path)` is true. The app-server maps thread-store misses to the same `no rollout found for thread id` error at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/request_processors/thread_processor.rs:3574`. + +That source path proves the pre-created id can be readable as live metadata and still be non-resumable. + +#### fs/watch, shell snapshots, hooks, and provider events + +`fs/watch` is a wake-up source, not a proof. The protocol accepts an absolute path with a connection-scoped `watch_id` in `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/fs.rs:160`, and `fs/changed` echoes the `watch_id` plus changed paths at `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/fs.rs:195`. The implementation registers the requested path without an existence check in `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/fs_watch.rs:118`, emits sorted changed paths joined under the watch root at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/fs_watch.rs:165`, and returns only `FsWatchResponse { path }` at `/tmp/codex-rust-v0.130.0/codex-rs/app-server/src/fs_watch.rs:185`. The underlying watcher is `notify::recommended_watcher` in `/tmp/codex-rust-v0.130.0/codex-rs/core/src/file_watcher.rs:327`; missing targets are watched through the nearest existing ancestor in `/tmp/codex-rust-v0.130.0/codex-rs/core/src/file_watcher.rs:736`; the OS watch is skipped if the actual path does not exist in `/tmp/codex-rust-v0.130.0/codex-rs/core/src/file_watcher.rs:556`; and matching reports the requested path only when an event plus current existence state reaches the requested target in `/tmp/codex-rust-v0.130.0/codex-rs/core/src/file_watcher.rs:811`. Codex's own fs-watch tests explicitly avoid failing when no OS event arrives in `/tmp/codex-rust-v0.130.0/codex-rs/app-server/tests/suite/v2/fs.rs:684`, and the real probe observed no `fs/changed` before timeout. Therefore an event-driven Freshell implementation may subscribe to the exact rollout path and parent, but an `fs/changed` event cannot be the only event source and cannot replace a direct proof read. + +Shell snapshots are not identity. `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/session.rs:699` starts shell snapshotting during session startup when the feature is enabled; `/tmp/codex-rust-v0.130.0/codex-rs/core/src/shell_snapshot.rs:39` keys the snapshot by session id and cwd; and `/tmp/codex-rust-v0.130.0/codex-rs/core/src/shell_snapshot.rs:153` writes and validates a temporary shell environment file before renaming it. The snapshot can appear before the rollout and can help diagnose startup, but it is deleted on drop and is not used by `thread/resume` as durable session history. + +Hooks expose an internal materialization path but not a Freshell startup contract. `/tmp/codex-rust-v0.130.0/codex-rs/core/src/hook_runtime.rs:104` runs pending `SessionStart` hooks on the first turn context and includes `transcript_path: sess.hook_transcript_path().await` at `/tmp/codex-rust-v0.130.0/codex-rs/core/src/hook_runtime.rs:115`; `hook_transcript_path()` calls `ensure_rollout_materialized()` at `/tmp/codex-rust-v0.130.0/codex-rs/core/src/session/mod.rs:3284`. That could force materialization for Codex-owned hook execution, but Freshell should not rely on configuring hidden provider hooks to create identity; it is not a public app-server session-start barrier, and it changes provider configuration semantics. + +Provider events split into candidate, proof-check, and repair surfaces. `thread/start` response and `thread/started` notification carry the candidate before user work. `turn/start` proves user work has already been accepted, so it is too late to protect candidate capture. `turn/completed` is the normal no-polling proof-check trigger: by then Codex should have materialized and flushed the rollout, but source shows flush failure can still warn and continue to completion. `fs/changed`, later Codex events, PTY exit, app-server websocket close/error, and user restore/list/open actions are deterministic repair opportunities, not the main path. + +#### Codex identity names + +For Codex, Freshell should be explicit about identity terms: + +| Name | Meaning | +| --- | --- | +| `rootTuiThreadId` | The `ThreadId` for the user-facing root TUI thread, observed from `thread/start` or `thread/started`. | +| `candidateThreadId` | A persisted non-canonical copy of `rootTuiThreadId` before rollout proof succeeds. | +| `rolloutPath` | The provider-reported `thread.path` for that candidate. It is useful but marked `[UNSTABLE]` in `/tmp/codex-rust-v0.130.0/codex-rs/app-server-protocol/src/protocol/v2/thread_data.rs:125`. | +| `rolloutProofId` | The `payload.id` from the first JSONL rollout record when `type == "session_meta"`. `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2703` through `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2705` define that id as a `ThreadId`. | +| `durableThreadId` | The canonical identity after `rolloutProofId == candidateThreadId` at the exact `rolloutPath`. This is also the id passed to `codex --remote <ws> --no-alt-screen resume <threadId>`. | + +Avoid generic "session id" in Codex restore design because it can be confused with provider fields named `sessionId` or `session_id`. The durable Codex identity in this contract is the root TUI `ThreadId`; the rollout proof is the first JSONL line shaped like `{"type":"session_meta","payload":{"id":"<ThreadId>", ...}}`. That shape follows the tagged `RolloutItem` wrapper in `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2767` through `/tmp/codex-rust-v0.130.0/codex-rs/protocol/src/protocol.rs:2770`, and the recorder writes that `RolloutItem::SessionMeta` at `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1738` through `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1740`. + +#### State model + +| State | Meaning | What the user can do | Sidebar/state surface | +| --- | --- | --- | --- | +| `identity_pending` | Fresh Codex is starting, but Freshell has not persisted a `candidateThreadId` plus `rolloutPath`. | Wait, close the pane, or start a fresh pane. User-originating PTY input is blocked. | "Starting Codex; restore identity not captured." No restorable indicator. | +| `captured_pre_turn` | Freshell has persisted the candidate before a user turn is accepted, but the rollout proof is not expected yet. | Use the live terminal after the candidate write succeeds. | "Codex identity captured; restore proof pending." Neutral pending state, not green. | +| `turn_in_progress_unproven` | A Codex turn is running for the captured candidate and durable proof has not succeeded. | Continue live work while the terminal is attachable. Restore is not guaranteed yet. | "Codex turn running; restore proof pending." Not an error before completion. | +| `proof_checking` | `turn/completed` arrived or a repair trigger fired, and Freshell is doing one exact proof read. | Keep using the live terminal if it remains attachable. | "Checking Codex restore proof." Short-lived pending state. | +| `durable` | The exact rollout proof succeeded, so `durableThreadId` is canonical. | Reopen, resume, split, or restore using the durable root TUI `ThreadId`. | Normal restorable Codex session. | +| `durability_unproven_after_completion` | A proof read failed after `turn/completed` for the candidate. | Attach the live terminal if available, trigger user repair by restore/list/open, or start fresh. | Visible degraded/error state: "Codex restore proof failed after turn completion." | +| `non_restorable` | There is no captured candidate, or the captured candidate cannot be proven and no live terminal can be attached. | Open a fresh Codex terminal. | Clear non-restorable error. No fake resume affordance. | + +The state model intentionally accepts leniency before a Codex turn completes. After `turn/completed`, proof failure is not a normal grey state. It is `durability_unproven_after_completion` until a deterministic repair trigger succeeds or the live terminal is gone and the pane becomes `non_restorable`. + +#### Failure handling at `turn/completed` + +When `turn/completed` arrives for the captured root TUI `ThreadId`, Freshell must transition to `proof_checking` and immediately run exactly one proof read of the stored `rolloutPath`. The proof succeeds only if the path is a regular readable JSONL file and the first record is parseable `session_meta` with `payload.id == candidateThreadId`. A mere path existence check is too weak because `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1576` through `/tmp/codex-rust-v0.130.0/codex-rs/rollout/src/recorder.rs:1622` opens the deferred writer, writes session metadata, writes pending items, and then flushes. + +If the proof read succeeds, Freshell promotes to `durable`, persists the canonical `sessionRef`, and may display normal restore affordances. If it fails after `turn/completed`, Freshell must immediately surface `durability_unproven_after_completion`. The live PTY may remain usable if it is still attachable, but the sidebar and pane state must not silently present it as durable, green, or harmlessly pending. + +There is no periodic, delayed, or backoff read loop. If a proof read is already in flight and another deterministic trigger arrives, Freshell may coalesce the trigger into at most one additional exact read after the current read resolves. It must not keep retrying because time passed. + +#### Proof-to-layout bridge + +A successful Codex durability proof is not complete at the registry binding step. The implementation invariant is: + +`rollout proof succeeds -> server binds/rebinds the terminal to the proven Codex thread id -> the browser receives terminal.session.associated with sessionRef { provider: "codex", sessionId: durableThreadId } -> TerminalView writes that canonical sessionRef into the terminal pane, and into the tab when the tab is a single-pane terminal tab -> TerminalView dispatches flushPersistedLayoutNow`. + +`terminal.session.bound` is a server-local registry event. It may be useful for ownership, activity tracking, lifecycle logs, and metadata, but it is not by itself a persisted browser layout update. Likewise, `terminal.codex.durability.updated` can persist candidate and durability state, but it does not replace `terminal.session.associated` as the canonical terminal durable-promotion event. + +The current `/home/user/code/freshell/.worktrees/dev` implementation has two bridge surfaces that should stay covered by tests: `/home/user/code/freshell/.worktrees/dev/server/terminal-registry.ts:2017` binds the proven Codex id, `/home/user/code/freshell/.worktrees/dev/server/terminal-registry.ts:2051` through `/home/user/code/freshell/.worktrees/dev/server/terminal-registry.ts:2052` broadcasts `terminal.session.associated` to attached clients, and `/home/user/code/freshell/.worktrees/dev/server/index.ts:442` through `/home/user/code/freshell/.worktrees/dev/server/index.ts:459` converts Codex `terminal.session.bound` into the shared association publisher path with `source: "codex_durability"`. On the client, `/home/user/code/freshell/.worktrees/dev/src/components/TerminalView.tsx:2189` through `/home/user/code/freshell/.worktrees/dev/src/components/TerminalView.tsx:2234` is the persistence boundary: it accepts `terminal.session.associated`, builds the canonical sessionRef update, updates pane/tab state, and flushes the persisted layout. + +Focused verification should prove both halves. A server test should start from a matching Codex rollout proof and assert a client-visible `terminal.session.associated` publication, not only `terminal.session.bound` or `codex_durable_session_observed`. A client test should show that `terminal.created` stays live-only and that pane/tab `sessionRef` plus the immediate layout flush happen only after `terminal.session.associated`. + +#### Deterministic repair triggers + +Each repair trigger below performs one exact proof read of the stored `rolloutPath`. Success promotes to `durable`. Failure keeps `durability_unproven_after_completion` after a completed turn, or keeps the pre-completion unproven state before a completed turn. User actions are repair paths, not the normal success path. + +| Trigger | Semantics | +| --- | --- | +| Later Codex event | A later Codex notification/response deterministically tied to the candidate root TUI `ThreadId` may trigger one proof read. Generic app-server noise that cannot be tied to the candidate is ignored. | +| `fs/changed` | A notification for the exact rollout path or watched parent may trigger one proof read. The notification is only a wake-up source and does not prove durability. | +| PTY exit | Before marking the session gone, Freshell runs one proof read. If it fails after completion and no live terminal remains, the state becomes `non_restorable`; if it fails before completion, it stays within the accepted pre-completion leniency but still is not durable. | +| App-server websocket close/error | Close/error from the app-server observer or TUI connection triggers one proof read for the captured candidate. Success promotes; failure stays degraded or becomes non-restorable depending on live attachability. | +| User restore/list/open | A user attempt to restore, list, or open a captured-but-unproven Codex session runs one proof read first. This can repair a missed provider/filesystem event, but it is not the normal success path. | + +#### Re-open/resume policy for captured-but-unproven sessions + +If the user attempts to re-open or resume a captured-but-unproven Codex session, Freshell must proof-read first. If proof succeeds, it promotes to `durable` and resumes with the proven root TUI `ThreadId`. If proof fails and the live terminal is attachable, Freshell attaches the live terminal and keeps the degraded/unproven state visible. If proof fails and no live terminal is attachable, Freshell creates a fresh Codex terminal with a clear local message/state explaining that the captured Codex session could not be proven restorable. + +This path must not use cwd, launch time, title, pane title, shell snapshots, hidden hook configuration, or fake/mutating provider writes. It also must not try `codex resume <candidateThreadId>` before proof succeeds; the real-binary experiment and the thread-store source prove that a live-readable pre-durable id can still fail resume with `no rollout found for thread id`. + +#### Approach evaluation + +| Approach | Source proof | Failure mode | Use in Freshell | +| --- | --- | --- | --- | +| Pre-create app-server thread, then TUI `resume <threadId>` | `thread/read include_turns=false` can return live metadata before persistence, but `thread/resume` requires stored rollout history and path existence. | Fails before rollout with `no rollout found for thread id`; this matches the real-binary experiment. | Do not use. | +| Fresh remote TUI after listener/proxy install | TUI awaits `thread/start` before its main input loop, app-server sends `thread/start` response then `thread/started`, both with `thread.id` and `thread.path`. | If Freshell starts Codex before installing the proxy/listener or before its own persistence transaction is ready, early terminal bytes can race identity capture. | Use. Install proxy/listeners first. | +| PTY input blocking | TUI reads keys/paste after startup; Freshell controls the PTY input boundary. | Without a Freshell gate, queued bytes can enter Codex before the candidate is durably recorded by Freshell. | Use. Block user-originating stdin until the candidate is atomically persisted. | +| App-server-side `turn/start` interception | `turn/start` is the app-server request that submits `Op::UserInput`. | Intercepting it as the primary guard is late: the user already typed. Forwarding it before candidate persistence creates untracked work. | Use only as a secondary safety net in the websocket proxy: reject or hold `turn/start` if the candidate is missing. | +| Exact rollout-path watch plus proof | `fs/watch` accepts the path and can notify, while rollout writer writes `session_meta` first when materialized. | `fs/changed` is not guaranteed by source tests or the probe; `exists()` alone can observe an empty file between open and first write. | Use watch only as one explicit event source; do a one-shot proof read on each event and promote only after parseable `payload.id` on `session_meta` matches the candidate. | +| Turn-completed proof check | Codex normally materializes after recording the first prompt and attempts to flush before completing the turn, but the flush error path can warn and still finish the task. | If the proof read fails after `turn/completed`, Freshell has evidence of a restore-durability failure, not proof that durability exists. | Use as the required proof-check boundary. Promote only after the exact rollout proof succeeds. | +| Shell snapshot identity | Shell snapshots are startup environment files keyed by session id and cwd, separate from `thread/resume` history. | Snapshot may exist before rollout, is deleted on drop, and is not consulted by resume. | Do not use as identity or promotion proof. | +| Provider event promotion | `thread/start` response and `thread/started` are pre-user-work candidate surfaces; `turn/start` and turn notifications are post-acceptance surfaces. | Promoting on `thread/started` alone treats an unmaterialized future path as durable. Waiting for turn events cannot prevent first-turn loss. | Use start response/notification for candidate only; promote on rollout proof only. | +| Hidden hook-based materialization | `SessionStart` hooks call `hook_transcript_path()`, which materializes internally. | Requires provider hook configuration and only runs in the first-turn hook path; not a stable external session-start API. | Do not use. | +| Mutating API calls to force persistence | Methods like injecting items can write history, but they mutate provider-visible state. No public no-op materialize method was found in this source pass. | Creates fake history or hidden behavior. | Do not use; under the current constraints there is no source-supported no-op materialization path. | + +#### Practical Freshell contract + +1. Fresh Codex launch starts in `identity_pending`. Freshell installs the remote websocket proxy/listeners and prepares its own atomic persistence before spawning `codex --remote`. +2. While `identity_pending`, Freshell forwards provider output, resize signals, and narrow terminal-control replies required for TUI startup, but not user-originating PTY input. The UI should show a clear starting state rather than silently accepting untracked work. +3. Freshell captures the first valid candidate from either the `thread/start` response or the `thread/started` notification. The candidate must have `ephemeral == false`, a non-empty root TUI `ThreadId`, and a provider-reported absolute `rolloutPath`. +4. Freshell atomically persists the candidate as non-canonical state: provider `codex`, candidate root TUI `ThreadId`, `rolloutPath`, source event/response, CLI version, capture timestamp, and durability state. +5. After that write succeeds, Freshell transitions to `captured_pre_turn` and may unblock user-originating PTY input. This prevents unknown-thread work, but it does not claim the first prompt is restorable. +6. During `turn_in_progress_unproven`, live use may continue. Canonical restore remains unproven and the sidebar must not show durable/green restore state. +7. On `turn/completed` for the captured root TUI `ThreadId`, Freshell transitions to `proof_checking` and performs one exact proof read of the stored `rolloutPath`. +8. Freshell promotes to `durable` only after the proof read finds a regular readable JSONL file whose first record is `type == "session_meta"` and whose `payload.id == candidateThreadId`. +9. If the proof read fails after `turn/completed`, Freshell transitions to `durability_unproven_after_completion`, shows a degraded/error state immediately, and keeps the live terminal attachable only as live terminal access. +10. Freshell registers deterministic repair triggers but never starts a periodic or backoff existence/read loop. Each trigger is one exact proof read. +11. If the process exits before candidate capture, report `non_restorable` and never infer identity from cwd, time, title, or shell snapshot. If it exits after candidate capture but before a turn completes, do one final proof read; a failed proof is still pre-completion leniency, not durability. +12. The residual unproven gap is strict first-turn crash safety. Source shows the normal first user prompt forces rollout materialization, but this version does not expose a public pre-turn materialize RPC. Under the stated constraints, the enforceable boundary is "captured before input, proof check required at turn completion." + +Implementation note from live Freshell validation: the fresh-candidate wait should be a bounded startup deadline, not a polling loop. A 10 second deadline killed a valid cold Codex launch before the app-server exposed candidate identity; use a longer bounded deadline such as 45 seconds, while still keeping PTY input blocked until the candidate is persisted. + +#### Current Freshell implementation gap + +The current `/home/user/code/freshell/.worktrees/dev` implementation does not yet match this contract. `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/durable-rollout-tracker.ts:6` through `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/durable-rollout-tracker.ts:8` define delayed probe intervals, `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/durable-rollout-tracker.ts:164` through `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/durable-rollout-tracker.ts:205` schedules repeated probes, and `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/durable-rollout-tracker.ts:183` promotes on `pathExists()` rather than a first-record `session_meta` proof. Those lines are incompatible with the no-polling proof contract. + +The current app-server client schema handles thread lifecycle notifications and `fs/changed` in `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/protocol.ts:355` through `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/protocol.ts:367`, and dispatches them in `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/client.ts:376` through `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/client.ts:399` and `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/client.ts:447` through `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server/client.ts:482`. This source pass found no `turn/completed` parser under `/home/user/code/freshell/.worktrees/dev/server/coding-cli/codex-app-server`, so the implementation must add a deterministic completion proof-check surface before it can satisfy this contract. + +The current recovery path also has a timer-based live-only success surface: `/home/user/code/freshell/.worktrees/dev/server/terminal-registry.ts:1979` through `/home/user/code/freshell/.worktrees/dev/server/terminal-registry.ts:1997` starts a pre-durable stability timer and marks `running_live_only`. Under the revised contract, a live-only state is acceptable only before a completed turn or while visibly degraded after proof failure; it must not be a silent green/grey steady state after `turn/completed`. + +#### 2026-05-14 central restore decision lesson + +Commit `da2e0076` (`Centralize Codex restore create decisions`) turned one implementation lesson into part of the design contract: deterministic restore is not only rollout-proof logic. Every restore-like create path must enter one typed decision contract before it can spawn, resume, attach, or fall back to a fresh Codex terminal. Otherwise separate entry points can drift into different restore semantics even if the proof reader is correct. + +At the design level, `da2e0076` added `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/server/coding-cli/codex-app-server/restore-decision.ts` with `planCodexCreateRestoreDecision` and `resolveCodexCreateRestoreDecision`, then rewired `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/server/ws-handler.ts` so Codex `terminal.create` and reopen handling route through that module. The module separates canonical durable resume through `sessionRef { provider: 'codex', sessionId: candidateThreadId }`, proof-first handling for captured candidates, attach-live-on-proof-failure, fresh create, and the remaining legacy raw-resume passthrough case. Focused coverage passed for `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/test/unit/server/coding-cli/codex-app-server/restore-decision.test.ts` and `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/test/server/ws-terminal-create-reuse-running-codex.test.ts`. + +The review verdict was pass with concerns. The refactor is a useful boundary, but it does not mean every surface is complete: + +- `legacy_raw_resume_passthrough` still exists for non-restore creates. It should be removed or replaced once callers can provide canonical `sessionRef` or captured candidate state. +- The central module currently trusts the caller's exact-live-terminal lookup to enforce that a live handle matches both `candidateThreadId` and `rolloutPath`. A follow-up should either enforce that match inside the module or make the typed input contract return enough candidate identity for the module to verify it itself. +- Some side-effect branching remains in `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/server/ws-handler.ts` because spawn, attach, broadcast, and error response effects still live there. That is acceptable for the narrow refactor, but the decision surface should stay pure and explicit as more effects move behind it. +- REST, MCP, CLI, and any other future restore-like surfaces must route through the same decision path or an equivalent shared contract. They should not grow parallel semantics for raw resume, candidate proof, live attach, or fresh fallback. +- Tests should include a surface matrix that proves each external entry point reaches the same decision semantics, not only unit tests for the decision module and the current websocket path. + +### Codex allowed behavior + +- Fresh Codex panes may be captured-but-unproven before a turn completes, but user input should not be accepted until the pre-durable candidate root TUI `ThreadId` and provider-reported `rolloutPath` have been captured and persisted. +- Freshell may use `fs/watch` as a wake-up source for Codex durability, but it still needs direct proof at the exact rollout path before promotion. Without polling, a missed filesystem event is repairable only through later deterministic provider/process/user events that each trigger one exact proof read. +- Freshell may only persist canonical Codex identity after the durable `.jsonl` artifact exists at the provider-reported `thread.path` and the first rollout record proves `payload.id == candidateThreadId` on a `session_meta` record. +- Freshell must not treat the bootstrap `thread/start` id as durable restore identity, and must not try to TUI-resume a pre-artifact thread as if it were durable. +- After `turn/completed`, failed proof is `durability_unproven_after_completion`. The user can still attach a live terminal if one exists, but the sidebar/pane state must be visibly degraded until proof succeeds or the session becomes non-restorable. + +`codex --help` in the tested mode did not expose a rename or title mutation flag such as `--name`, so no mutable-name surface was confirmed for Codex in this contract. + +## Claude Code evidence + +### Version + +```bash +command -v claude +# /home/user/bin/claude + +claude --version +# 2.1.132 (Claude Code) +``` + +This Claude Code version line was refreshed on `2026-05-06`; the behavior observations below remain from the `2026-04-26` real-provider proof. + +The wrapper at `/home/user/bin/claude` shells out to `/home/user/.local/bin/claude`. The isolated probes used the actual binary and overrode `HOME` to keep persistence inside the probe temp root. + +### Exact-id durability + +Fresh exact-id durability was probed with: + +```bash +HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --session-id <uuid> "Reply with exactly: claude-home-probe-ok" +``` + +Observed provider-owned artifacts: + +- `.claude/.credentials.json` +- `.claude/policy-limits.json` +- `.claude/projects/*/<uuid>.jsonl` + +The UUID-backed transcript file is the canonical durable identity. + +### Named resume and rename + +Named resume and rename/title mutation were probed with: + +```bash +HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --session-id <uuid> --name probe-name-one "Reply with exactly: named-create-ok" +HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --resume probe-name-one "Reply with exactly: named-resume-ok" +HOME=<temp-home> /home/user/.local/bin/claude --bare --dangerously-skip-permissions -p --resume <uuid> --name probe-name-two "Reply with exactly: renamed-ok" +``` + +Observed rename semantics: + +- The transcript filename and UUID-backed `sessionId` remained stable. +- Claude appended new `custom-title` and `agent-name` metadata lines for the renamed title. +- After rename, the old title no longer resolved in `--resume`. +- The new title resolved, but only as mutable metadata pointing back to the same UUID transcript identity. + +### Claude Code allowed behavior + +- UUID-backed Claude transcript identity is canonical durable identity. +- Named resume values and titles are mutable metadata only. +- Freshell must not persist a mutable title as Claude durable identity. + +## OpenCode evidence + +### Version + +```bash +command -v opencode +# /home/user/.opencode/bin/opencode + +opencode --version +# 1.14.41 +``` + +### Run-event identity + +Fresh isolated runs were probed with: + +```bash +XDG_DATA_HOME=<temp-home>/.local/share XDG_CONFIG_HOME=<temp-home>/.config opencode run "Reply with exactly: opencode-probe-ok" --format json --dangerously-skip-permissions +``` + +Observed durable identity rule: + +- The `2026-04-26` rerun used isolated empty OpenCode data/config roots for the session-identity probes so stale user-local provider configuration could not affect the contract. +- The first JSON `step_start` event carried a `sessionID`. +- That exact `sessionID` matched the `session.id` row written into the isolated OpenCode database. + +### Control surface identity + +The authoritative control surface was probed with: + +```bash +XDG_DATA_HOME=<temp-home>/.local/share XDG_CONFIG_HOME=<temp-home>/.config opencode serve --hostname 127.0.0.1 --port <port> +curl http://127.0.0.1:<port>/global/health +curl http://127.0.0.1:<port>/session/status +``` + +Observed control behavior: + +- `/global/health` returned a healthy payload with version `1.14.41`. +- `/session/status` returned `{}` while idle. +- During an attached `opencode run ... --attach http://127.0.0.1:<port>`, `/session/status` returned the same authoritative `sessionID` with `{ "type": "busy" }`. + +### Title behavior + +Title semantics were probed with: + +```bash +opencode run "Reply with exactly: opencode-title-one" --format json --dangerously-skip-permissions --title probe-title-one +opencode run "Reply with exactly: opencode-title-two" --format json --dangerously-skip-permissions --session <sessionId> --title probe-title-two +opencode session --help +``` + +Observed title behavior: + +- The resumed run kept the same `sessionID`. +- The stored database title remained `probe-title-one`. +- `opencode session --help` only exposed `list` and `delete`; no rename subcommand was present in the tested mode. + +### OpenCode allowed behavior + +- Canonical OpenCode identity is the authoritative `sessionID`. +- Busy or restore state may only be promoted from the control surface or the canonical DB/session events. +- Titles are metadata and do not replace session identity. diff --git a/docs/superpowers/plans/2026-05-14-codex-turn-completion-durability.md b/docs/superpowers/plans/2026-05-14-codex-turn-completion-durability.md new file mode 100644 index 000000000..05428a2f4 --- /dev/null +++ b/docs/superpowers/plans/2026-05-14-codex-turn-completion-durability.md @@ -0,0 +1,497 @@ +# Codex Turn-Completion Durability Implementation Plan + +> **For Claude:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make Codex terminal restore identity mandatory, observable, and promoted only by deterministic evidence. A fresh Codex pane must not accept user input until Freshell has persisted the provider-reported candidate thread id and rollout path, must not treat that candidate as durable, and must promote to `sessionRef` only after the exact rollout file proves the same Codex root TUI `ThreadId`. + +**Architecture:** Add a Freshell-owned websocket proxy between the visible Codex TUI and the Codex app-server sidecar. The proxy observes `thread/start` responses and `thread/started` notifications for candidate capture, observes `turn/completed` for the mandatory proof-check boundary, and forwards traffic normally. Terminal input is gated only until the candidate is atomically written to the Freshell server-side durability store. Durable promotion is an event-driven one-shot proof read of the exact rollout path, not a polling loop. + +**Tech Stack:** Node.js/TypeScript ESM, `ws`, `node-pty`, Express WebSocket protocol, React 18, Redux Toolkit, Zod, Vitest, Testing Library, superwstest, Freshell orchestration. + +--- + +## Research Contract + +- Codex durable restore identity is not a title, cwd, launch time, shell snapshot, or the bare bootstrap id. It is the root TUI `ThreadId` after the exact provider-reported `.codex/sessions/YYYY/MM/DD/rollout-*.jsonl` begins with matching `session_meta` (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:9`, `:15-19`, `:492-504`). +- Fresh `codex --remote <ws>` creates a thread before user work. Freshell must capture that candidate before letting user input through, persist it as candidate-only state, and promote only after rollout proof (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:16`, `:99-127`, `:561-570`). +- Pre-creating a thread through the app-server and launching the TUI with `codex resume <threadId>` before the rollout exists fails with `no rollout found for thread id`; this implementation must remove that launch pattern for fresh Codex panes (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:19`, `:369-412`, `:474-480`, `:550`). +- The Codex source proves the TUI receives `thread/start` response before the `thread/started` notification, both with the candidate id/path, and it does not read terminal input until after the thread is started. Freshell still needs a PTY-side input gate because terminal bytes can queue outside Codex before Freshell persists the candidate (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:440-446`, `:551-553`). +- `turn/completed` is the required proof-check boundary, not proof. On that event for the candidate thread, Freshell must run exactly one direct proof read of the stored rollout path. It promotes only if the file is regular, readable JSONL whose first record is `type == "session_meta"` and `payload.id == candidateThreadId` (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:17`, `:124-134`, `:456`, `:466-468`, `:520-526`, `:555`). +- `fs/watch` is only a wake-up source. A missed filesystem event was observed in the probe, so it cannot be the only promotion path. It also cannot replace the direct proof read (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:325-331`, `:482-490`, `:554`, `:586-588`). +- After `turn/completed`, proof failure is not an acceptable green, grey, or silently live-only steady state. It is `durability_unproven_after_completion` until a deterministic one-shot repair trigger succeeds or the pane becomes non-restorable (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:506-518`, `:520-538`, `:584-590`). +- Reopen of captured-but-unproven Codex state proof-reads first. If proof succeeds, promote and resume. If proof fails and a live terminal is attachable, attach live while keeping the degraded state visible. If no live terminal is attachable, fresh-create a Codex pane and show that the old captured Codex state could not be proven restorable. Do not try `codex resume <candidateThreadId>` before proof (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:540-544`). + +## Current Gap In This Worktree + +- `server/coding-cli/codex-app-server/launch-planner.ts` currently calls `runtime.startThread()` for fresh Codex launches and returns that id as the launch `sessionId`; `server/ws-handler.ts` then passes it as `resumeSessionId`, so `buildSpawnSpec()` launches the visible TUI with `codex --remote <ws> resume <threadId>`. This is exactly the pre-durable resume pattern the research rejects. +- `server/coding-cli/codex-app-server/protocol.ts`, `client.ts`, and `runtime.ts` handle `thread/started`, lifecycle loss, and `fs/changed`, but do not expose `turn/completed`. +- `server/terminal-registry.ts` writes PTY input immediately once the terminal exists. It has no state that can block input until candidate persistence is complete. +- `src/components/TerminalView.tsx` persists canonical identity only after `terminal.session.associated`; it has no candidate-only Codex durability state and no acknowledgement path back to the server. +- The sidebar and persisted tab state can represent `sessionRef` or legacy `resumeSessionId`, but not a non-canonical Codex candidate. This is why an unpromoted live Codex pane can appear as a generic grey terminal and then split into a second entry when canonical metadata appears later. +- The research document also mentions `durable-rollout-tracker.ts` and a pre-durable stability timer that promotes to `running_live_only` in `/home/user/code/freshell/.worktrees/dev` (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:576-582`). That timer-based promotion is not present in this `origin/main`-based implementation worktree. A `running_live_only` type string still exists in recovery policy code, so this plan avoids removing the enum value unless implementation proves it is dead. + +## State Model To Implement + +- `identity_pending`: fresh Codex TUI has been spawned through the proxy, but no candidate has been durably saved by Freshell. PTY output, resize, and narrow terminal-control replies required for TUI startup pass through. User-originating input is dropped, not buffered or replayed, and the server emits `terminal.input.blocked` for observability. +- `captured_pre_turn`: Freshell has atomically persisted `{ provider: "codex", candidateThreadId, rolloutPath, source, capturedAt }` in the server-side durability store. Input is allowed. This is not restorable/durable. Client localStorage acknowledgement may arrive later and is idempotent. +- `turn_in_progress_unproven`: the proxy observed `turn/start` or equivalent user-turn activity for the candidate. Live use continues. This is not restorable/durable. +- `proof_checking`: `turn/completed` or a deterministic repair trigger fired and one exact proof read is running. +- `durable`: the proof succeeded. Freshell sends the existing `terminal.session.associated` message with `sessionRef.provider == "codex"` and `sessionRef.sessionId == candidateThreadId`; normal resume uses that id. +- `durable_resuming`: a terminal launched from an existing canonical Codex `sessionRef`. It starts from already-proven durable identity and does not return to candidate capture. Normal launch trusts the saved canonical `sessionRef`; if durable proof metadata with `rolloutPath` is also available, repair/list/open paths may proof-read it before resume. If no proof metadata exists, Freshell must not invent one from cwd/time/title. +- `durability_unproven_after_completion`: proof failed after completion. Live terminal access remains possible if the PTY is alive, but sidebar/pane state must be degraded, not green/normal. +- `non_restorable`: no durable proof exists and no live terminal is attachable. Reopening fresh-creates Codex and keeps a local restore-error explanation. + +## Implementation Tasks + +### 1. Add Codex Durability Types And Proof Reader + +- [ ] Create `shared/codex-durability.ts`. + - [ ] Export `CodexDurabilityStateName` with the exact state names above. + - [ ] Export `CodexCandidateIdentity` with `provider: "codex"`, `candidateThreadId`, `rolloutPath`, `source`, `capturedAt`, and optional `cliVersion`. + - [ ] Export `CodexDurabilityRef` with `schemaVersion: 1`, `state`, `candidate`, optional `turnCompletedAt`, optional `lastProofFailure`, optional `durableThreadId`, and optional `nonRestorableReason`. + - [ ] Add Zod schemas so persisted client state and websocket payloads are validated instead of using ad hoc objects. + - [ ] Keep names explicit: use `candidateThreadId`, `rolloutProofId`, and `durableThreadId`, matching the research terminology (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:492-504`). +- [ ] Create `server/coding-cli/codex-app-server/durability-proof.ts`. + - [ ] Export `proofCodexRollout({ rolloutPath, candidateThreadId, fsImpl? })`. + - [ ] Require `rolloutPath` to be absolute and non-empty. + - [ ] `stat()` the exact path and require a regular file. + - [ ] Read only enough data to parse the first JSONL record; do not scan globs or nearby files. + - [ ] Require first record JSON to have `type === "session_meta"` and `payload.id === candidateThreadId`. + - [ ] Return a typed success/failure result with a machine-readable reason: `missing`, `not_regular_file`, `empty`, `malformed_json`, `wrong_record_type`, `missing_payload_id`, `mismatched_thread_id`, `read_error`. + - [ ] Do not check cwd, date directories, shell snapshots, or filename proximity. The proof is the exact path plus first-record identity (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:456`, `:520-526`). +- [ ] Create `server/coding-cli/codex-app-server/durability-store.ts`. + - [ ] Atomically persist candidate and proof-state records under a Freshell-owned directory, defaulting to `~/.freshell/codex-durability/`. + - [ ] Key records by `terminalId`, and include `tabId`, `paneId`, `candidateThreadId`, `rolloutPath`, `state`, `capturedAt`, and `serverInstanceId`. + - [ ] Treat this server-side write as the authoritative gate-release persistence. Client localStorage persistence is still required for refresh/reopen UX, but it is not what releases PTY input. + - [ ] Make duplicate writes idempotent when `candidateThreadId` and `rolloutPath` match; reject mismatched rewrites for the same terminal. + - [ ] Delete records when the terminal is killed and either durable `sessionRef` was promoted or the candidate is intentionally abandoned. +- [ ] Add `test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts`. + - [ ] Success: first line is matching `session_meta`. + - [ ] Failure: missing path, directory, empty file, malformed first line, first line not `session_meta`, missing `payload.id`, mismatched id. + - [ ] Regression: a later matching line must not succeed if the first record is wrong. +- [ ] Add `test/unit/server/coding-cli/codex-app-server/durability-store.test.ts`. + - [ ] Atomic write/read round trip. + - [ ] Duplicate matching candidate is idempotent. + - [ ] Mismatched candidate for the same terminal is rejected. + - [ ] Missing older persisted layouts with no Codex durability data read cleanly and never synthesize a candidate from `resumeSessionId`. + +Run: + +```bash +npm run test:vitest -- test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts test/unit/server/coding-cli/codex-app-server/durability-store.test.ts --run +``` + +Commit: + +```bash +git add shared/codex-durability.ts server/coding-cli/codex-app-server/durability-proof.ts server/coding-cli/codex-app-server/durability-store.ts test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts test/unit/server/coding-cli/codex-app-server/durability-store.test.ts +git commit -m "Add Codex rollout durability proof reader" +``` + +### 2. Add App-Server Event Schemas For Turns + +- [ ] Update `server/coding-cli/codex-app-server/protocol.ts`. + - [ ] Add `CodexTurnStartedNotificationSchema` if the protocol surface is present in the observed app-server traffic or fake server tests need it. + - [ ] Add `CodexTurnCompletedNotificationSchema` for `method: "turn/completed"` with `params.threadId` and pass-through turn fields. + - [ ] Export inferred types. + - [ ] Keep lifecycle parsing separate from turn parsing so lifecycle loss recovery behavior remains unchanged. +- [ ] Update `server/coding-cli/codex-app-server/client.ts`. + - [ ] Add `onTurnStarted` and `onTurnCompleted` handlers. + - [ ] Dispatch turn events from notification parsing before generic handling. + - [ ] Preserve existing `thread/started`, lifecycle loss, disconnect, and `fs/changed` behavior. +- [ ] Update `server/coding-cli/codex-app-server/runtime.ts`. + - [ ] Re-emit client turn events with `onTurnStarted` and `onTurnCompleted`. +- [ ] Add/update unit tests in: + - [ ] `test/unit/server/coding-cli/codex-app-server/client.test.ts` + - [ ] `test/unit/server/coding-cli/codex-app-server/runtime.test.ts` + +Run: + +```bash +npm run test:vitest -- test/unit/server/coding-cli/codex-app-server/client.test.ts test/unit/server/coding-cli/codex-app-server/runtime.test.ts --run +``` + +Commit: + +```bash +git add server/coding-cli/codex-app-server/protocol.ts server/coding-cli/codex-app-server/client.ts server/coding-cli/codex-app-server/runtime.ts test/unit/server/coding-cli/codex-app-server/client.test.ts test/unit/server/coding-cli/codex-app-server/runtime.test.ts +git commit -m "Observe Codex turn lifecycle notifications" +``` + +### 3. Add A Freshell-Owned Codex Remote Websocket Proxy + +- [ ] Create `server/coding-cli/codex-app-server/remote-proxy.ts`. + - [ ] Allocate a loopback websocket endpoint for the visible TUI. + - [ ] Forward TUI websocket traffic to the real app-server sidecar endpoint. + - [ ] Observe client-to-server JSON-RPC requests and remember request id to method for `thread/start`, `thread/resume`, `turn/start`, and any turn methods present in fixtures. + - [ ] Parse client-to-server `turn/start` as a generic JSON-RPC envelope by method name; do not require a full request-params Zod schema unless the implementation needs fields beyond the method and id. + - [ ] Observe server-to-client JSON-RPC responses. For a `thread/start` response, parse the `thread` payload and emit candidate `{ threadId, rolloutPath, source: "thread_start_response" }`. + - [ ] Observe server-to-client notifications. Emit candidate from `thread/started` if no response candidate has been persisted yet; emit `turn_started`, `turn_completed`, `fs_changed`, lifecycle loss, and connection loss events. + - [ ] If a `turn/start` request arrives before the server-side candidate persistence write completes, hold that request until the write completes. If the write fails, the terminal is shutting down, or `5_000ms` elapse without a persisted candidate, fail the held request with JSON-RPC error code `-32000` and message `Freshell could not persist Codex restore identity before accepting user input.` Transition the terminal to `non_restorable`, stop that fresh TUI, and fresh-create only if the user explicitly retries. + - [ ] Also start a candidate-capture deadline when the visible TUI is spawned, independent of user input. If no candidate has been persisted within `45_000ms`, transition the terminal to `non_restorable`, emit `terminal.codex.durability.updated`, send `terminal.input.blocked` with terminal reason `codex_identity_capture_timeout` for any later input, and stop the fresh TUI/sidecar. This is a bounded startup deadline, not polling; live production validation showed a 10 second deadline can kill a valid cold Codex launch before identity capture completes. + - [ ] Apply the candidate-capture deadline and `turn/start` hold only to fresh Codex launches that do not yet have a canonical durable `sessionRef`. Durable resume launches still pass through the proxy for turn/lifecycle observation, but the proxy must start with candidate persistence disabled, must not arm the fresh-candidate timeout, and must not hold `turn/start`. + - [ ] On held `turn/start` failure or candidate-capture timeout, return the JSON-RPC error if a request is pending, then close the proxy websocket and kill the PTY process for that failed fresh TUI. Do not leave Codex running against a dead or untrusted proxy, and do not replay held user bytes into a replacement session. + - [ ] Do not periodically query the app-server or filesystem from the proxy. + - [ ] Include structured logs for proxy start, candidate observed, held turn request, released turn request, turn completed, proof trigger, and proxy close/error. +- [ ] Ensure readiness ordering is explicit. + - [ ] `CodexRemoteProxy.start()` must resolve only after the local proxy websocket server is listening and all local event handlers are installed. + - [ ] `launch-planner.ts` must await proxy readiness before returning the plan that will spawn the visible TUI. + - [ ] Freshell-owned upstream observer/listener setup must complete before `buildSpawnSpec()` can hand the proxy URL to Codex. +- [ ] Add `test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts`. + - [ ] Fresh TUI traffic through the proxy captures candidate from `thread/start` response. + - [ ] Candidate can also be captured from `thread/started` notification. + - [ ] `turn/start` before server-side candidate persistence is held, then forwarded after the store write completes. + - [ ] `turn/start` times out and fails cleanly if candidate persistence never completes. + - [ ] Candidate-capture timeout fires even when the user never types and no `turn/start` request arrives. + - [ ] Durable resume proxy traffic forwards `turn/start` immediately and does not emit candidate-capture timeout when no fresh candidate is expected. + - [ ] Timeout/failure closes the proxy websocket and terminates the failed TUI rather than leaving it running. + - [ ] `turn/completed` is emitted with the matching thread id. + - [ ] Proxy close/error emits a deterministic repair trigger and shuts down without leaking sockets. + +Run: + +```bash +npm run test:vitest -- test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts --run +``` + +Commit: + +```bash +git add server/coding-cli/codex-app-server/remote-proxy.ts test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts +git commit -m "Proxy Codex remote traffic for deterministic identity capture" +``` + +### 4. Replace Fresh Codex Pre-Create/Resume With Fresh Remote Launch + +- [ ] Update `server/coding-cli/codex-app-server/launch-planner.ts`. + - [ ] For fresh Codex launch, call `runtime.ensureReady()` instead of `runtime.startThread()`. + - [ ] Start and await a `CodexRemoteProxy` before returning the plan. + - [ ] Return a launch plan whose `sessionId` is undefined for fresh launches. The fresh visible command must be `codex --remote <proxyWsUrl>` with no `resume <threadId>`. + - [ ] For durable resume launches, keep `sessionId == resumeSessionId`, route the TUI through the proxy, and keep readiness behavior for the durable id. + - [ ] Durable resume launches start in `durable_resuming`/`durable`; they construct the proxy with fresh-candidate persistence disabled, do not arm candidate-capture timeout, and do not re-promote on `thread/started`. + - [ ] Sidecar shutdown must close the proxy and the runtime sidecar. + - [ ] Sidecar adoption must still update sidecar ownership metadata with terminal id and generation. + - [ ] Expose proxy events on the sidecar: `onCandidate`, `markCandidatePersisted`, `onTurnStarted`, `onTurnCompleted`, `onRepairTrigger`, `onLifecycleLoss`, and `onFsChanged`. +- [ ] Update `server/ws-handler.ts`. + - [ ] Do not overwrite `effectiveResumeSessionId` with a fresh Codex plan id when the launch is fresh. + - [ ] Continue passing `effectiveResumeSessionId` only for proven durable resumes. + - [ ] Record lifecycle events distinguishing `codex_candidate_pending`, `codex_candidate_captured`, and `codex_durable_session_observed`. + - [ ] Remove the existing adoption-time `codex_durable_session_observed` emission for fresh Codex. That event must be emitted only after rollout proof success; durable resume can log `codex_durable_resume_started`. +- [ ] Update `server/terminal-registry.ts` recovery spawning. + - [ ] Durable recovery still spawns `resume <durableThreadId>`. + - [ ] Fresh launch never spawns `resume <candidateThreadId>`. +- [ ] Update `test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts`. + - [ ] Fresh `planCreate({ cwd })` must not call `startThread`. + - [ ] Fresh launch plan remote wsUrl is the proxy wsUrl. + - [ ] Fresh plan has no durable `sessionId`. + - [ ] Durable `planCreate({ resumeSessionId })` uses resume id and readiness as before. +- [ ] Update `test/unit/server/ws-handler-sdk.test.ts` or add a focused server WS test. + - [ ] Fresh `terminal.create` for Codex does not return `effectiveResumeSessionId`. + - [ ] Durable `terminal.create` for Codex still returns the durable resume id. + +Run: + +```bash +npm run test:vitest -- test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts test/unit/server/ws-handler-sdk.test.ts --run +``` + +Commit: + +```bash +git add server/coding-cli/codex-app-server/launch-planner.ts server/ws-handler.ts server/terminal-registry.ts test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts test/unit/server/ws-handler-sdk.test.ts +git commit -m "Launch fresh Codex without pre-durable resume" +``` + +### 5. Persist Candidate State Before Releasing Input + +- [ ] Update `shared/ws-protocol.ts`. + - [ ] Add server-to-client `terminal.codex.durability.updated` payload carrying `terminalId` and `CodexDurabilityRef`. + - [ ] Add client-to-server `terminal.codex.candidate.persisted` with `terminalId`, `candidateThreadId`, `rolloutPath`, and `capturedAt`. + - [ ] Register `terminal.codex.candidate.persisted` in every server-side websocket validator, including the dynamic schema built by `server/ws-handler.ts`, so browser acknowledgements cannot be rejected as `INVALID_MESSAGE`. + - [ ] Add optional `codexDurability` to `terminal.create` so persisted captured-but-unproven panes can be repaired or fresh-created deterministically on reopen. + - [ ] Add server-to-client `terminal.input.blocked` with `reason: "codex_identity_pending"` for diagnostic UI/logging when PTY input arrives during the narrow gate. Do not send `INVALID_TERMINAL_ID` for gated input. +- [ ] Update `src/store/paneTypes.ts`, `src/store/types.ts`, `src/store/persistedState.ts`, `src/store/storage-migration.ts`, `src/store/panesSlice.ts`, and `src/store/tabsSlice.ts`. + - [ ] Add optional `codexDurability?: CodexDurabilityRef` to terminal pane content and tab metadata. + - [ ] Persist it in localStorage. + - [ ] Preserve it across tab/pane merge logic. + - [ ] When canonical `sessionRef.provider == "codex"` is set for the same thread id, retain durable proof metadata if available with `state: "durable"` and clear only degraded/pending warnings. Do not keep a stale non-canonical pending state next to a matching canonical `sessionRef`. + - [ ] Do not backfill it from `resumeSessionId`, cwd, title, or time. + - [ ] Add a named migration test: older persisted layouts with no `codexDurability` field must load cleanly and must not synthesize candidate state from `resumeSessionId`. +- [ ] Update `src/components/TerminalView.tsx`. + - [ ] On `terminal.codex.durability.updated`, update pane content with the candidate/degraded state and flush persisted layout immediately. + - [ ] After flush succeeds, send `terminal.codex.candidate.persisted` for candidate states. This acknowledgement is idempotent and observational; it must not be required for server-side gate release because the server-side durability store is authoritative. + - [ ] On `terminal.session.associated`, clear matching `codexDurability` and set the canonical `sessionRef` through the existing durable path. + - [ ] Include persisted `codexDurability` in `terminal.create` when there is no canonical Codex `sessionRef`. + - [ ] Do not send a candidate thread id as `resumeSessionId`. + - [ ] On `terminal.input.blocked`, log the blocked reason and show a throttled terminal-local message so browser-originated input is not silently dropped during the identity gate. +- [ ] Update `server/terminal-registry.ts`. + - [ ] Extend `TerminalRecord` with `codexDurability` and `codexInputGate`. + - [ ] When proxy emits a candidate, write it to the server-side durability store first. After that atomic write succeeds, transition to `captured_pre_turn`, emit `terminal.codex.durability.updated`, call sidecar/proxy `markCandidatePersisted()`, and release held `turn/start` requests. + - [ ] If the candidate store write fails, do not release PTY input or held `turn/start`. Mark the terminal `non_restorable`, log the failure, and keep user work from entering an untracked Codex session. + - [ ] Change `input()` to return `TerminalInputResult`: + - [ ] `{ status: "written" }` + - [ ] `{ status: "blocked_codex_identity_pending"; terminalId: string }` + - [ ] `{ status: "blocked_codex_identity_capture_timeout"; terminalId: string }` + - [ ] `{ status: "blocked_codex_identity_unavailable"; terminalId: string; reason?: string }` + - [ ] `{ status: "blocked_codex_recovery_pending"; terminalId: string }` + - [ ] `{ status: "no_terminal" }` + - [ ] `{ status: "not_running" }` + - [ ] Update all callers of `TerminalRegistry.input()` to handle the new result shape. + - [ ] Keep terminal resize and output flowing while input is gated. + - [ ] Duplicate or replayed client `terminal.codex.candidate.persisted` acknowledgements succeed only when they match the stored candidate; mismatched acks are logged and ignored. +- [ ] Update automation prompt-seeding surfaces. + - [ ] For `freshell new-tab --codex --prompt ...` and MCP `new-tab` with `mode: "codex"` and `prompt`, opt into an event-driven `send-keys` wait. + - [ ] The server retries the prompt when a matching `terminal.codex.durability.updated` or terminal exit event arrives. Do not poll, and do not send the prompt while the terminal is still `identity_pending`. +- [ ] Update `server/ws-handler.ts`. + - [ ] Handle `terminal.codex.candidate.persisted` and call the registry acknowledgement method. + - [ ] For blocked input, log at debug/info with terminal id and bytes, send `terminal.input.blocked`, and do not misreport it as `INVALID_TERMINAL_ID`. + - [ ] Blocked input is dropped, not buffered and not replayed. The user can type again after the gate opens; Freshell must not silently submit stale pre-capture bytes. +- [ ] Add/update tests: + - [ ] `test/unit/server/terminal-registry.codex-sidecar.test.ts`: input is blocked before server-side candidate persistence and written after the store write completes. + - [ ] `test/unit/client/components/TerminalView.test.tsx` or nearest focused test: candidate update persists and sends ack; canonical association clears candidate state. + - [ ] `test/unit/client/store/*`: persisted state keeps `codexDurability`. + +Run: + +```bash +npm run test:vitest -- test/unit/server/terminal-registry.codex-sidecar.test.ts test/unit/client/components/TerminalView.test.tsx test/unit/client/store --run +``` + +Commit: + +```bash +git add shared/ws-protocol.ts shared/codex-durability.ts src/store src/components/TerminalView.tsx server/terminal-registry.ts server/ws-handler.ts test/unit/server/terminal-registry.codex-sidecar.test.ts test/unit/client +git commit -m "Persist Codex candidate identity before accepting input" +``` + +### 6. Promote Durable Codex Identity At Turn Completion + +- [ ] Update `server/terminal-registry.ts`. + - [ ] Subscribe each Codex sidecar/proxy to `turn_started`, `turn_completed`, `fs_changed`, proxy close/error, and lifecycle loss events. + - [ ] On `turn_started` for the candidate, transition to `turn_in_progress_unproven`. + - [ ] On `turn_completed` for the candidate, transition to `proof_checking`, run one `proofCodexRollout()` call, and then: + - [ ] Success: transition to `durable`, bind `resumeSessionId` through the existing `bindSessionToTerminal()` path, emit `terminal.session.associated`, record `codex_durable_session_observed`, and clear candidate state in the client. + - [ ] Failure: transition to `durability_unproven_after_completion`, emit `terminal.codex.durability.updated`, keep the live PTY attachable if running, and log structured proof failure data. + - [ ] Coalesce overlapping deterministic proof triggers into at most one extra immediate proof read after the current one finishes. Do not use `setInterval`, delayed backoff loops, or path-existence polling (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:526`). + - [ ] Watch the exact rollout path and parent through the Freshell-owned runtime/client connection, not by injecting requests into the TUI proxy socket. This avoids JSON-RPC request-id collisions with the visible TUI. Treat `fs/changed` only as a repair trigger that calls the same proof reader (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:482-490`, `:528-538`). + - [ ] On PTY exit or app-server/proxy close/error, run one proof read before finalizing state. +- [ ] Update `server/ws-handler.ts`. + - [ ] Ensure `terminal.session.associated` is sent only after proof success for fresh Codex. + - [ ] Ensure `sendError` logs server-side structured errors for `RESTORE_UNAVAILABLE` and Codex proof failures. This closes the silent logging gap from the original observation. +- [ ] Add/update tests: + - [ ] `test/unit/server/terminal-registry.codex-sidecar.test.ts`: `turn_completed` plus matching rollout emits canonical `terminal.session.associated`. + - [ ] `test/unit/server/terminal-registry.codex-sidecar.test.ts`: `turn_completed` plus missing/malformed/mismatched rollout emits degraded state and does not bind `resumeSessionId`. + - [ ] Test trigger coalescing: two repair events during a proof read cause one extra read, not an unbounded loop. + - [ ] Test PTY exit before/after turn completion. + +Run: + +```bash +npm run test:vitest -- test/unit/server/terminal-registry.codex-sidecar.test.ts test/unit/server/ws-handler-sdk.test.ts --run +``` + +Commit: + +```bash +git add server/terminal-registry.ts server/ws-handler.ts test/unit/server/terminal-registry.codex-sidecar.test.ts test/unit/server/ws-handler-sdk.test.ts +git commit -m "Promote Codex sessions only after rollout proof" +``` + +### 7. Reopen Captured-But-Unproven State Deterministically + +- [ ] Update `server/ws-handler.ts` create/reuse flow. + - [ ] Ensure all user restore/list/open surfaces funnel through this create/reuse decision: sidebar row click, tab restore, background terminal restore, MCP/new-tab restore, and any history/session open path that creates a Codex terminal. + - [ ] When `terminal.create` includes `codexDurability` and no canonical `sessionRef`, ask the registry to run one proof read before deciding how to open. + - [ ] Permit `restore: true` for Codex candidate-only requests when `codexDurability.candidate` is present, even without `sessionRef`, so the proof-first path runs instead of rejecting the request before repair. + - [ ] Reopen of `durability_unproven_after_completion` follows the same proof-first path as captured-but-unproven. Success promotes; failure with an exact live candidate attaches live and remains degraded; failure with no live attachable terminal becomes `non_restorable` and fresh-creates only for a new Codex session. + - [ ] If proof succeeds, set `effectiveResumeSessionId` to the proven `durableThreadId` and launch a durable resume. + - [ ] If proof fails and a live terminal on this server matches the exact candidate thread id and rollout path, attach that live terminal and keep degraded/unproven state visible. + - [ ] If proof fails and no live terminal is attachable, fresh-create a new Codex terminal. Do not pass the candidate as `resumeSessionId`; attach a clear local restore-error/non-restorable state to the pane. +- [ ] Add `TerminalRegistry.findRunningCodexTerminalByCandidate({ candidateThreadId, rolloutPath })`. + - [ ] Match only exact candidate thread id and exact rollout path stored in the live record. + - [ ] Do not match by cwd, time, title, or shell snapshot (`docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:540-544`). +- [ ] Update client sidebar/state rendering. + - [ ] A live open terminal can show a live/attached indicator only from terminal inventory. + - [ ] A Codex pane/session must not show normal restorable/durable state until canonical `sessionRef` exists. + - [ ] `durability_unproven_after_completion` shows degraded/restoration-not-proven state even if live terminal attach is available. + - [ ] Newly created Codex panes appear in the sidebar immediately as live pending/captured rather than generic grey entries. + - [ ] Own the state-to-sidebar mapping in `src/store/selectors/sidebarSelectors.ts` and render it in `src/components/Sidebar.tsx` or the row component it delegates to: + - [ ] `identity_pending`: "Starting Codex; restore identity not captured." + - [ ] `captured_pre_turn`: "Codex identity captured; restore proof pending." + - [ ] `turn_in_progress_unproven`: "Codex turn running; restore proof pending." + - [ ] `proof_checking`: "Checking Codex restore proof." + - [ ] `durability_unproven_after_completion`: "Codex restore proof failed after turn completion." + - [ ] `non_restorable`: "Codex session could not be proven restorable." + - [ ] `durable` / `durable_resuming`: normal Codex restorable display. +- [ ] Add tests: + - [ ] Server: captured unproven reopen proof success resumes durable id. + - [ ] Server: captured unproven reopen proof fail plus live exact candidate attaches live and stays degraded. + - [ ] Server: captured unproven reopen proof fail plus no live exact candidate fresh-creates without passing candidate to resume. + - [ ] Server websocket tests must exercise the real client shape with `restore: true` and candidate-only `codexDurability`, not only raw `terminal.create` messages without restore semantics. + - [ ] Client/sidebar: live pending Codex appears as Codex, not a generic grey terminal; durable promotion updates the same entry rather than adding a duplicate. + - [ ] Each restore/list/open surface above uses the same proof-first path and has no independent cwd/time/title matching. + +Run: + +```bash +npm run test:vitest -- test/unit/server/ws-handler-sdk.test.ts test/unit/server/terminal-registry.findRunningTerminal.test.ts test/unit/client --run +``` + +Commit: + +```bash +git add server/ws-handler.ts server/terminal-registry.ts src test +git commit -m "Repair captured Codex reopen without nondeterministic matching" +``` + +### 8. Extend Fake Codex Fixtures For Realistic Tests + +- [ ] Update `test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs`. + - [ ] Keep current app-server fixture mode for `app-server --listen`. + - [ ] Add fake TUI mode for `--remote <ws>` that connects to the proxy, sends `thread/start` for fresh launch, writes a visible PTY banner, reads stdin, sends `turn/start`, optionally writes rollout JSONL, then sends `turn/completed`. + - [ ] Add fixture controls for delayed candidate, missing rollout, malformed rollout, mismatched rollout id, delayed `turn/completed`, proxy close, and app-server close. + - [ ] Ensure fixture processes are tagged with temp env vars so cleanup cannot kill real user sessions. +- [ ] Add or update e2e/integration tests: + - [ ] Fresh Codex launch: candidate captured, input initially gated, server-side candidate persistence releases input, `turn/completed` promotes to canonical `sessionRef`. + - [ ] Missing rollout after `turn/completed`: state becomes degraded and no canonical `sessionRef` is persisted. + - [ ] Reopen degraded with later rollout proof: proof-read repairs and resumes durable id. + - [ ] Reopen degraded without proof after server restart: fresh-creates Codex and does not call `resume <candidateThreadId>`. + - [ ] Duplicate sidebar regression: a new live Codex terminal stays one sidebar item before and after durable promotion. + +Run: + +```bash +npm run test:vitest -- test/e2e/codex-session-resilience-flow.test.tsx test/e2e/codex-refresh-rehydrate-flow.test.tsx --run +``` + +Commit: + +```bash +git add test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs test/e2e +git commit -m "Cover Codex durability flow end to end" +``` + +### 9. Observability And Logging + +- [ ] Update `server/session-lifecycle-logger.ts` or the nearest lifecycle telemetry module. + - [ ] Add lifecycle event kinds for `codex_candidate_observed`, `codex_candidate_persist_requested`, `codex_candidate_persisted`, `codex_input_gate_blocked`, `codex_turn_completed`, `codex_rollout_proof_success`, `codex_rollout_proof_failure`, `codex_repair_triggered`, `codex_reopen_fresh_created`. +- [ ] Update `server/ws-handler.ts` `sendError`. + - [ ] Log every server-sent error with code, message, requestId, terminalId/session id when present, and connection id. + - [ ] Avoid relying on stdout/stderr-only messages from child processes; structured server logs should show the reason Freshell chose degraded/fresh-create/restore-unavailable. +- [ ] Add log assertions in focused unit tests where practical, especially for proof failure and restore-unavailable paths. + +Run: + +```bash +npm run test:vitest -- test/unit/server/ws-handler-sdk.test.ts test/unit/server/terminal-registry.codex-sidecar.test.ts --run +``` + +Commit: + +```bash +git add server test +git commit -m "Log Codex durability transitions and server errors" +``` + +### 10. Broad Verification + +- [ ] Run typecheck and focused tests: + +```bash +npm run typecheck +npm run test:vitest -- test/unit/server/coding-cli/codex-app-server test/unit/server/terminal-registry.codex-sidecar.test.ts test/unit/server/ws-handler-sdk.test.ts test/unit/client test/e2e/codex-session-resilience-flow.test.tsx test/e2e/codex-refresh-rehydrate-flow.test.tsx --run +``` + +- [ ] Run coordinated full check when focused tests are green: + +```bash +FRESHELL_TEST_SUMMARY="codex turn-completion durability implementation" npm run check +``` + +- [ ] Commit any fixes: + +```bash +git status --short +git add <changed-files> +git commit -m "Stabilize Codex durability verification" +``` + +### 11. Review Hardening Items + +These checks come from the implementation reviews and are part of the same one-shot delivery, not follow-up work. + +- [ ] Arm fresh Codex candidate-capture timeout when the proxy is ready, even if the visible Codex TUI never connects. This closes the stuck `identity_pending` state described in the research evidence that input must not be accepted until Freshell has server-persisted Codex's reported restore identity (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Working Codex contract"). +- [ ] Initialize durable Codex resume records as durable in `TerminalRegistry.create()` when the caller supplies a canonical `sessionRef`. The research says `sessionRef` is the only durable restore identity; a terminal created from one must advertise that same identity through inventory and sidebar state (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Recommendation"). +- [ ] On final Codex process loss, run exactly one rollout proof if a candidate exists, even if the `turn/completed` notification was lost. Ordinary repair events still wait for `turn/completed`; final loss is the last chance to avoid falsely discarding a restorable session (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "What remains unproven"). +- [ ] Preserve captured candidate state across browser refresh and use it for the recreate request after the old live terminal id is gone. This is the client-side half of "prefer terminal, then proof-read candidate, then fresh-create if proof fails" (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Failure handling without polling"). +- [ ] Extend the fake app-server/fake TUI integration path so tests exercise actual proxy candidate capture, input, `turn/completed`, rollout proof, durable promotion, and sidebar/inventory exposure instead of only direct sidecar callbacks. +- [ ] Delete transient Codex durability store records when the owning terminal is killed, removed, or reaped. The server-side store is a crash bridge for an active terminal, not a durable session database. +- [ ] If rollout proof succeeds but canonical session binding fails, do not broadcast `terminal.session.associated`; mark the terminal non-restorable instead so the client cannot persist a session the server does not own. +- [ ] After an async candidate-store write completes, re-check that the same terminal is still running and still accepting a candidate before mutating in-memory state or calling `markCandidatePersisted()`. +- [ ] Report input after a candidate-capture timeout as `terminal.input.blocked` with `codex_identity_capture_timeout`, not as a generic invalid/dead terminal. +- [ ] Treat candidate-only Codex ids as pane/tab locators only. They may focus an existing pane so the sidebar does not duplicate entries, but they must not become `sessionRef`, `resumeSessionId`, or tab session metadata until rollout proof succeeds. This follows the research distinction between candidate identity and durable restore identity (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Working Codex contract"; `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:492-504`). +- [ ] When a non-restorable Codex row has no live terminal to attach, open a fresh Codex pane without carrying the old candidate id. This preserves user ergonomics without pretending restore succeeded (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Failure handling without polling"; `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:540-544`). +- [ ] Reject raw Codex resume ids from generic automation surfaces (`/api/tabs`, pane split/respawn, and MCP `new-tab`) unless they are already flowing through the proven canonical `sessionRef` path. These surfaces do not carry rollout proof, so they must fresh-create instead of invoking `codex resume <candidateThreadId>` (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:369-412`, `:474-480`, `:550`). +- [ ] Expose Codex durability state in sidebar rows, not just a boolean. Pending, checking, degraded, and non-restorable Codex rows must be distinguishable while durable rows remain normal. The research explicitly rejects green/grey/live-only ambiguity after proof failure (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:506-518`, `:584-590`). +- [ ] Arm an exact app-server `fs/watch` on the captured rollout path as a wake-up source after candidate persistence, and unwatch it after durable promotion or sidecar teardown. The research says `fs/watch` cannot be the only proof path, but it remains the deterministic repair trigger when the rollout appears after the first proof attempt (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:325-331`, `:482-490`, `:554`, `:586-588`). +- [ ] When attaching an existing live non-restorable Codex terminal by `terminalId`, carry its full `codexDurability` into the reopened tab and pane. A degraded live session is still failure, but Freshell must retain the evidence needed for proof-first repair instead of collapsing it into a generic grey terminal (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Failure handling without polling"). +- [ ] Allow canonical Codex `sessionRef` through automation surfaces while continuing to ignore legacy raw Codex `resumeSessionId`. The durable path is the canonical session contract; the unsafe path is treating an unproven candidate or legacy resume token as durable (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-04-20-coding-cli-session-contract.md`, "Working Codex contract"; `/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:492-504`). +- [ ] Make rollout proof read only the first JSONL record, not the full rollout file. The proof contract is first-record `session_meta.payload.id`, so proof should be O(first line) rather than O(full transcript) (`/home/user/code/freshell/.worktrees/codex-stability-implementation-20260514/docs/lab-notes/2026-05-13-coding-cli-session-restore-research.md:456`, `:520-526`). + +## Temporary Server Validation + +Use a port that does not interfere with dev, for example `3477`. Do not restart `/home/user/code/freshell/.worktrees/dev`. + +- [ ] Build from the implementation worktree: + +```bash +npm run build +``` + +- [ ] Start a temporary server from this worktree only: + +```bash +PORT=3477 npm start > /tmp/freshell-codex-durability-3477.log 2>&1 & echo $! > /tmp/freshell-codex-durability-3477.pid +``` + +- [ ] Verify the PID belongs to this worktree before stopping it later: + +```bash +ps -fp "$(cat /tmp/freshell-codex-durability-3477.pid)" +``` + +- [ ] Use Freshell orchestration against `http://127.0.0.1:3477` and run each fixture-backed scenario at least three times: + - [ ] Fresh Codex pane: before candidate capture, user input is not accepted but terminal-control replies needed for TUI startup are allowed; after server-side candidate persistence, user input works. + - [ ] Fresh Codex pane with fake TUI: send a test prompt, wait for fake `turn/completed`, verify canonical `sessionRef` is persisted and sidebar entry remains a single Codex item. + - [ ] Fresh Codex pane: close/reopen after durable promotion, verify it resumes with `codex --remote <proxy> resume <durableThreadId>`. + - [ ] Fresh Codex pane: reload browser before first turn completes, verify candidate state is preserved and no candidate id is used as `resumeSessionId`. + - [ ] Fresh Codex pane: restart only the temporary server after durable promotion, verify reopen resumes durable id. + - [ ] Fresh Codex pane: simulate/mutate missing rollout after `turn/completed`, verify degraded state and no fake green/normal state. + - [ ] Captured-but-unproven pane after temporary server restart: verify Freshell proof-reads once and fresh-creates if proof fails, without trying `codex resume <candidateThreadId>`. + - [ ] Existing durable Codex pane: sidecar lifecycle loss triggers durable recovery and preserves the same sidebar item. + - [ ] Shell and Claude panes: create, type, close/reopen, and server restart to verify Codex changes did not regress non-Codex terminal state. +- [ ] Run a real Codex smoke only if the machine has working Codex auth and model access: + - [ ] Fresh real Codex pane: send a short harmless prompt, wait for completion, verify canonical `sessionRef` is persisted and restore works. + - [ ] If real Codex auth/model access is unavailable, record the skipped reason and rely on fixture-backed scenarios plus unit/integration coverage. + +- [ ] Inspect `/tmp/freshell-codex-durability-3477.log`. + - [ ] Confirm structured events exist for candidate observed, candidate persisted, input gate release, turn completed, proof success/failure, and reopen decisions. + - [ ] Confirm no proof-read polling loop is visible. + - [ ] Confirm no server-sent errors are silent. + +- [ ] Stop only the temporary server: + +```bash +kill "$(cat /tmp/freshell-codex-durability-3477.pid)" && rm -f /tmp/freshell-codex-durability-3477.pid +``` + +## Done Criteria + +- Fresh Codex launch no longer pre-creates an app-server thread and no longer TUI-resumes a pre-durable id. +- User-originating Codex input is blocked only until Freshell has persisted the candidate thread id and rollout path. +- `turn/completed` triggers one exact proof read of the provider-reported rollout path. +- Canonical Codex `sessionRef` is persisted only after first-record `session_meta.payload.id` matches the candidate thread id. +- Captured-but-unproven Codex reopen never matches by cwd/time/title and never tries `codex resume <candidateThreadId>` before proof. +- New live Codex panes appear in the sidebar immediately as Codex entries, do not stay generic grey, and do not duplicate on promotion. +- Post-completion proof failure is visible degraded state, not a normal grey/green state. +- Unit, integration/e2e, coordinated check, and repeated temporary-server scenarios pass. diff --git a/server/agent-api/router.ts b/server/agent-api/router.ts index b2fb02db4..82a4d784f 100644 --- a/server/agent-api/router.ts +++ b/server/agent-api/router.ts @@ -4,24 +4,39 @@ import { randomUUID } from 'node:crypto' import { nanoid } from 'nanoid' import { allocateLocalhostPort } from '../local-port.js' import type { CodexLaunchPlan, CodexLaunchPlanner } from '../coding-cli/codex-app-server/launch-planner.js' +import { + planCodexLaunchWithRetry, +} from '../coding-cli/codex-app-server/launch-retry.js' import { CodexLaunchConfigError, getCodexSessionBindingReason, normalizeCodexSandboxSetting, } from '../coding-cli/codex-launch-config.js' +import { INVALID_RAW_CODEX_RESUME_MESSAGE } from '../coding-cli/codex-app-server/restore-decision.js' import { makeSessionKey } from '../coding-cli/types.js' -import { terminalIdFromCreateError, type ProviderSettings } from '../terminal-registry.js' +import { terminalIdFromCreateError, type ProviderSettings, type TerminalInputResult } from '../terminal-registry.js' import { MAX_TERMINAL_TITLE_OVERRIDE_LENGTH } from '../terminals-router.js' +import { logger } from '../logger.js' import { ok, approx, fail } from './response.js' import { renderCapture } from './capture.js' import { waitForMatch } from './wait-for.js' import { resolveScreenshotOutputPath } from './screenshot-path.js' +import { sanitizeSessionRef } from '../../shared/session-contract.js' const truthy = (value: unknown) => value === true || value === 'true' || value === '1' || value === 'yes' const SYNCABLE_TERMINAL_MODES = new Set(['claude', 'codex', 'opencode', 'gemini', 'kimi']) +const log = logger.child({ component: 'agent-api' }) +const CODEX_INPUT_READY_WAIT_TIMEOUT_MS = 60_000 + +class AgentRouteInputError extends Error { + constructor(message: string) { + super(message) + this.name = 'AgentRouteInputError' + } +} function agentRouteErrorStatus(error: unknown): number { - return error instanceof CodexLaunchConfigError ? 400 : 500 + return error instanceof CodexLaunchConfigError || error instanceof AgentRouteInputError ? 400 : 500 } function errorMessage(error: unknown): string { @@ -71,15 +86,19 @@ async function resolveSpawnProviderSettings( throw new Error('Codex terminal launch requires the app-server launch planner.') } opts.assertTerminalCreateAccepted?.() - const plan = await opts.codexLaunchPlanner.planCreate({ - cwd: opts.cwd, - resumeSessionId: opts.resumeSessionId, - model: providerSettings?.model, - sandbox: normalizeCodexSandboxSetting(providerSettings?.sandbox), - approvalPolicy: providerSettings?.permissionMode, + const plan = await planCodexLaunchWithRetry({ + planner: opts.codexLaunchPlanner, + logger: log, + input: { + cwd: opts.cwd, + resumeSessionId: opts.resumeSessionId, + model: providerSettings?.model, + sandbox: normalizeCodexSandboxSetting(providerSettings?.sandbox), + approvalPolicy: providerSettings?.permissionMode, + }, }) return { - resumeSessionId: plan.sessionId, + resumeSessionId: opts.resumeSessionId ? (plan.sessionId ?? opts.resumeSessionId) : undefined, providerSettings: { codexAppServer: { ...plan.remote, @@ -116,6 +135,34 @@ async function resolveSpawnProviderSettings( type ResolvedSpawnProviderSettings = Awaited<ReturnType<typeof resolveSpawnProviderSettings>> +function requestedResumeSessionIdForMode( + sessionRef: ReturnType<typeof sanitizeSessionRef>, + mode: string, + legacyResumeSessionId: unknown, +): string | undefined { + const acceptedSessionRef = acceptedSessionRefForMode(sessionRef, mode) + if (acceptedSessionRef) return acceptedSessionRef.sessionId + if (mode === 'codex') { + if (isNonEmptyString(legacyResumeSessionId)) { + throw new AgentRouteInputError(INVALID_RAW_CODEX_RESUME_MESSAGE) + } + return undefined + } + return typeof legacyResumeSessionId === 'string' ? legacyResumeSessionId : undefined +} + +function acceptedSessionRefForMode( + sessionRef: ReturnType<typeof sanitizeSessionRef>, + mode: string, +): ReturnType<typeof sanitizeSessionRef> { + if (!sessionRef || sessionRef.provider !== mode) return undefined + return sessionRef +} + +function isNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.length > 0 +} + async function adoptCodexLaunch( launch: ResolvedSpawnProviderSettings | undefined, terminalId: string, @@ -127,14 +174,6 @@ async function cleanupUnadoptedCodexLaunch(launch: ResolvedSpawnProviderSettings await launch?.codexPlan?.sidecar.shutdown() } -async function waitForCodexResumeReadiness( - launch: ResolvedSpawnProviderSettings | undefined, - requestedResumeSessionId: string | undefined, -): Promise<void> { - if (!launch?.codexPlan || !requestedResumeSessionId) return - await launch.codexPlan.sidecar.waitForLoadedThread(requestedResumeSessionId) -} - function publishCodexLaunch(registry: any, launch: ResolvedSpawnProviderSettings | undefined, terminalId: string): void { if (!launch?.codexPlan) return registry.publishCodexSidecar?.(terminalId) @@ -146,6 +185,85 @@ function assertCodexCreateTerminalRunning(terminal: { status?: unknown }): void } } +function terminalInputFailureMessage(result: Exclude<TerminalInputResult, { status: 'written' }>): string { + if (result.status === 'blocked_codex_identity_pending') { + return 'Codex restore identity is not ready yet.' + } + if (result.status === 'blocked_codex_identity_capture_timeout') { + return 'Codex restore identity timed out before input could be accepted.' + } + if (result.status === 'blocked_codex_identity_unavailable') { + return 'Codex restore identity could not be captured before input could be accepted.' + } + if (result.status === 'blocked_codex_recovery_pending') { + return 'Codex durable recovery is still in progress.' + } + return 'Terminal is not running.' +} + +function shouldWaitForCodexIdentity(payload: Record<string, unknown>): boolean { + return truthy(payload.waitForCodexIdentity) +} + +function registrySupportsEvents(registry: any): registry is { + input: (terminalId: string, data: string) => TerminalInputResult + on: (event: string, handler: (...args: any[]) => void) => void + off: (event: string, handler: (...args: any[]) => void) => void +} { + return typeof registry?.on === 'function' && typeof registry?.off === 'function' +} + +async function sendTerminalInput( + registry: any, + terminalId: string, + data: string, + options: { waitForCodexIdentity?: boolean } = {}, +): Promise<TerminalInputResult> { + const first = registry.input(terminalId, data) as TerminalInputResult + if (first.status !== 'blocked_codex_identity_pending' || !options.waitForCodexIdentity) { + return first + } + if (!registrySupportsEvents(registry)) return first + + return new Promise<TerminalInputResult>((resolve) => { + let settled = false + let timeout: ReturnType<typeof setTimeout> | undefined + + const cleanup = () => { + if (timeout) clearTimeout(timeout) + registry.off('terminal.codex.durability.updated', onDurabilityUpdated) + registry.off('terminal.exit', onTerminalExit) + } + const finish = (result: TerminalInputResult) => { + if (settled) return + settled = true + cleanup() + resolve(result) + } + const retry = () => { + if (settled) return + const next = registry.input(terminalId, data) as TerminalInputResult + if (next.status === 'blocked_codex_identity_pending') return + finish(next) + } + const onDurabilityUpdated = (event: { terminalId?: string }) => { + if (event?.terminalId !== terminalId) return + retry() + } + const onTerminalExit = (event: { terminalId?: string }) => { + if (event?.terminalId !== terminalId) return + finish({ status: 'not_running' }) + } + + registry.on('terminal.codex.durability.updated', onDurabilityUpdated) + registry.on('terminal.exit', onTerminalExit) + timeout = setTimeout(() => { + finish({ status: 'blocked_codex_identity_capture_timeout', terminalId }) + }, CODEX_INPUT_READY_WAIT_TIMEOUT_MS) + queueMicrotask(retry) + }) +} + async function cleanupCreatedTerminal(registry: any, terminalId: string | undefined): Promise<void> { if (!terminalId) return if (typeof registry?.killAndWait === 'function') { @@ -358,10 +476,12 @@ export function createAgentApiRouter({ router.post('/tabs', async (req, res) => { const { name, mode, shell, cwd, browser, editor, resumeSessionId, permissionMode, model, sandbox } = req.body || {} + const requestedSessionRef = sanitizeSessionRef(req.body?.sessionRef) const wantsBrowser = !!browser const wantsEditor = !!editor let launch: ResolvedSpawnProviderSettings | undefined let createdTerminalId: string | undefined + let createdTabId: string | undefined try { let paneContent: any @@ -373,16 +493,23 @@ export function createAgentApiRouter({ paneContent = { kind: 'editor', filePath: editor, language: null, readOnly: false, content: '', viewMode: 'source' } } else { const effectiveMode = mode || 'shell' + const acceptedSessionRef = acceptedSessionRefForMode(requestedSessionRef, effectiveMode) + const requestedResumeSessionId = requestedResumeSessionIdForMode( + requestedSessionRef, + effectiveMode, + resumeSessionId, + ) assertTerminalAdmission() launch = await resolveSpawnProviderSettings( effectiveMode, configStore, { permissionMode, model, sandbox }, - { cwd, resumeSessionId, codexLaunchPlanner, assertTerminalCreateAccepted: assertTerminalAdmission }, + { cwd, resumeSessionId: requestedResumeSessionId, codexLaunchPlanner, assertTerminalCreateAccepted: assertTerminalAdmission }, ) assertTerminalAdmission() const { tabId, paneId } = layoutStore.createTab({ title: name, browser, editor }) - const sessionBindingReason = getCodexSessionBindingReason(effectiveMode, resumeSessionId) + createdTabId = tabId + const sessionBindingReason = getCodexSessionBindingReason(effectiveMode, requestedResumeSessionId) assertTerminalAdmission() const terminal = registry.create({ mode: effectiveMode, @@ -397,8 +524,6 @@ export function createAgentApiRouter({ const launchResumeSessionId = launch.resumeSessionId assertTerminalAdmission() await adoptCodexLaunch(launch, terminal.terminalId) - assertTerminalAdmission() - await waitForCodexResumeReadiness(launch, resumeSessionId) assertCodexCreateTerminalRunning(terminal) assertTerminalAdmission() publishCodexLaunch(registry, launch, terminal.terminalId) @@ -410,7 +535,8 @@ export function createAgentApiRouter({ status: 'running', mode: mode || 'shell', shell: shell || 'system', - resumeSessionId: launchResumeSessionId, + ...(acceptedSessionRef ? { sessionRef: acceptedSessionRef } : {}), + ...(launchResumeSessionId && !acceptedSessionRef ? { resumeSessionId: launchResumeSessionId } : {}), initialCwd: cwd, } @@ -425,7 +551,8 @@ export function createAgentApiRouter({ shell, terminalId, initialCwd: cwd, - resumeSessionId: paneContent?.resumeSessionId, + ...(paneContent?.resumeSessionId ? { resumeSessionId: paneContent.resumeSessionId } : {}), + ...(paneContent?.sessionRef ? { sessionRef: paneContent.sessionRef } : {}), paneId, paneContent, }, @@ -437,6 +564,7 @@ export function createAgentApiRouter({ } const { tabId, paneId } = layoutStore.createTab({ title: name, browser, editor }) + createdTabId = tabId layoutStore.attachPaneContent(tabId, paneId, paneContent) wsHandler?.broadcastUiCommand({ @@ -449,6 +577,7 @@ export function createAgentApiRouter({ terminalId, initialCwd: cwd, resumeSessionId: paneContent?.resumeSessionId, + sessionRef: paneContent?.sessionRef, paneId, paneContent, }, @@ -460,6 +589,13 @@ export function createAgentApiRouter({ await cleanupFailedCodexCreate(registry, createdTerminalId ?? terminalIdFromCreateError(err), launch).catch((cleanupError) => { responseError = combineWithCleanupError(err, cleanupError) }) + if (createdTabId && typeof layoutStore.closeTab === 'function') { + try { + layoutStore.closeTab(createdTabId) + } catch { + // best-effort cleanup; terminal/sidecar cleanup errors above remain authoritative + } + } const status = agentRouteErrorStatus(responseError) res.status(status).json(fail(responseError?.message || 'Failed to create tab')) } @@ -746,6 +882,7 @@ export function createAgentApiRouter({ const timeoutMs = Number.isFinite(timeoutSeconds) ? timeoutSeconds * 1000 : 30000 let launch: ResolvedSpawnProviderSettings | undefined let createdTerminalId: string | undefined + let createdTabId: string | undefined try { assertTerminalAdmission() launch = await resolveSpawnProviderSettings(mode, configStore, {}, { @@ -757,6 +894,7 @@ export function createAgentApiRouter({ const created = layoutStore.createTab?.({ title }) const tabId = created?.tabId || nanoid() const paneId = created?.paneId || nanoid() + createdTabId = created?.tabId const sessionBindingReason = getCodexSessionBindingReason(mode) assertTerminalAdmission() const terminal = registry.create({ @@ -782,7 +920,12 @@ export function createAgentApiRouter({ const sentinel = `__FRESHELL_DONE_${nanoid()}__` const input = capture ? `${command}; echo ${sentinel}\r` : `${command}\r` - registry.input(terminal.terminalId, input) + const inputResult = await sendTerminalInput(registry, terminal.terminalId, input, { + waitForCodexIdentity: mode === 'codex', + }) + if (inputResult.status !== 'written') { + throw new Error(terminalInputFailureMessage(inputResult)) + } if (!capture || detached) { const message = detached ? 'command started (detached)' : 'command sent' @@ -807,6 +950,13 @@ export function createAgentApiRouter({ await cleanupFailedCodexCreate(registry, createdTerminalId ?? terminalIdFromCreateError(err), launch).catch((cleanupError) => { responseError = combineWithCleanupError(err, cleanupError) }) + if (createdTabId && typeof layoutStore.closeTab === 'function') { + try { + layoutStore.closeTab(createdTabId) + } catch { + // best-effort cleanup; terminal/sidecar cleanup errors above remain authoritative + } + } const status = agentRouteErrorStatus(responseError) return res.status(status).json(fail(responseError?.message || 'Failed to run command')) } @@ -823,6 +973,18 @@ export function createAgentApiRouter({ const direction = req.body?.direction || 'vertical' const wantsBrowser = !!req.body?.browser const wantsEditor = !!req.body?.editor + const splitMode = !wantsBrowser && !wantsEditor ? req.body?.mode || 'shell' : undefined + const requestedSessionRef = splitMode ? sanitizeSessionRef(req.body?.sessionRef) : undefined + const acceptedSessionRef = splitMode + ? acceptedSessionRefForMode(requestedSessionRef, splitMode) + : undefined + const requestedResumeSessionId = splitMode + ? requestedResumeSessionIdForMode( + requestedSessionRef, + splitMode, + req.body?.resumeSessionId, + ) + : undefined if (!wantsBrowser && !wantsEditor) { assertTerminalAdmission() } @@ -849,23 +1011,23 @@ export function createAgentApiRouter({ } else if (wantsEditor) { content = { kind: 'editor', filePath: req.body.editor, language: null, readOnly: false, content: '', viewMode: 'source' } } else { - const splitMode = req.body?.mode || 'shell' + const terminalMode = splitMode ?? 'shell' launch = await resolveSpawnProviderSettings( - splitMode, + terminalMode, configStore, {}, { cwd: req.body?.cwd, - resumeSessionId: req.body?.resumeSessionId, + resumeSessionId: requestedResumeSessionId, codexLaunchPlanner, assertTerminalCreateAccepted: assertTerminalAdmission, }, ) assertTerminalAdmission() - const sessionBindingReason = getCodexSessionBindingReason(splitMode, req.body?.resumeSessionId) + const sessionBindingReason = getCodexSessionBindingReason(terminalMode, requestedResumeSessionId) assertTerminalAdmission() const terminal = registry.create({ - mode: splitMode, + mode: terminalMode, shell: req.body?.shell, cwd: req.body?.cwd, resumeSessionId: launch.resumeSessionId, @@ -877,8 +1039,6 @@ export function createAgentApiRouter({ const launchResumeSessionId = launch.resumeSessionId assertTerminalAdmission() await adoptCodexLaunch(launch, terminal.terminalId) - assertTerminalAdmission() - await waitForCodexResumeReadiness(launch, req.body?.resumeSessionId) assertCodexCreateTerminalRunning(terminal) assertTerminalAdmission() publishCodexLaunch(registry, launch, terminal.terminalId) @@ -890,7 +1050,8 @@ export function createAgentApiRouter({ status: 'running', mode: req.body?.mode || 'shell', shell: req.body?.shell || 'system', - ...(launchResumeSessionId ? { resumeSessionId: launchResumeSessionId } : {}), + ...(acceptedSessionRef ? { sessionRef: acceptedSessionRef } : {}), + ...(launchResumeSessionId && !acceptedSessionRef ? { resumeSessionId: launchResumeSessionId } : {}), } } @@ -1080,6 +1241,13 @@ export function createAgentApiRouter({ const tabId = target?.tabId if (!tabId) return res.status(404).json(fail('pane not found')) const effectiveMode = req.body?.mode || 'shell' + const requestedSessionRef = sanitizeSessionRef(req.body?.sessionRef) + const acceptedSessionRef = acceptedSessionRefForMode(requestedSessionRef, effectiveMode) + const requestedResumeSessionId = requestedResumeSessionIdForMode( + requestedSessionRef, + effectiveMode, + req.body?.resumeSessionId, + ) assertTerminalAdmission() launch = await resolveSpawnProviderSettings( effectiveMode, @@ -1087,13 +1255,13 @@ export function createAgentApiRouter({ {}, { cwd: req.body?.cwd, - resumeSessionId: req.body?.resumeSessionId, + resumeSessionId: requestedResumeSessionId, codexLaunchPlanner, assertTerminalCreateAccepted: assertTerminalAdmission, }, ) assertTerminalAdmission() - const sessionBindingReason = getCodexSessionBindingReason(effectiveMode, req.body?.resumeSessionId) + const sessionBindingReason = getCodexSessionBindingReason(effectiveMode, requestedResumeSessionId) assertTerminalAdmission() const terminal = registry.create({ mode: effectiveMode, @@ -1108,8 +1276,6 @@ export function createAgentApiRouter({ const launchResumeSessionId = launch.resumeSessionId assertTerminalAdmission() await adoptCodexLaunch(launch, terminal.terminalId) - assertTerminalAdmission() - await waitForCodexResumeReadiness(launch, req.body?.resumeSessionId) assertCodexCreateTerminalRunning(terminal) assertTerminalAdmission() publishCodexLaunch(registry, launch, terminal.terminalId) @@ -1121,7 +1287,8 @@ export function createAgentApiRouter({ mode: req.body?.mode || 'shell', shell: req.body?.shell || 'system', createRequestId: nanoid(), - ...(launchResumeSessionId ? { resumeSessionId: launchResumeSessionId } : {}), + ...(acceptedSessionRef ? { sessionRef: acceptedSessionRef } : {}), + ...(launchResumeSessionId && !acceptedSessionRef ? { resumeSessionId: launchResumeSessionId } : {}), } layoutStore.attachPaneContent(tabId, paneId, content) wsHandler?.broadcastUiCommand({ command: 'pane.attach', payload: { tabId, paneId, content } }) @@ -1174,7 +1341,7 @@ export function createAgentApiRouter({ res.json(ok(undefined, 'navigate requested')) }) - router.post('/panes/:id/send-keys', (req, res) => { + router.post('/panes/:id/send-keys', async (req, res) => { const resolved = resolvePaneTarget(req.params.id) if (rejectPaneTargetError(res, resolved)) return const paneId = resolved.paneId || req.params.id @@ -1186,8 +1353,13 @@ export function createAgentApiRouter({ if (target?.paneId) terminalId = layoutStore.resolvePaneToTerminal?.(target.paneId) } if (!terminalId) return res.status(404).json(fail('terminal not found')) - const okInput = registry.input(terminalId, data) - res.json(ok({ terminalId }, okInput ? 'input sent' : 'terminal not running')) + const inputResult = await sendTerminalInput(registry, terminalId, data, { + waitForCodexIdentity: shouldWaitForCodexIdentity(payload), + }) + if (inputResult.status !== 'written') { + return res.status(409).json(fail(terminalInputFailureMessage(inputResult))) + } + res.json(ok({ terminalId }, 'input sent')) }) return router diff --git a/server/cli/index.ts b/server/cli/index.ts index 6a0a8b346..248caf75a 100644 --- a/server/cli/index.ts +++ b/server/cli/index.ts @@ -7,6 +7,7 @@ import { resolveConfig } from './config.js' import { resolveTarget } from './targets.js' import { runCommand as sendKeysCommand } from './commands/sendKeys.js' import { partitionSendKeysArgs } from './send-keys-args.js' +import { INVALID_RAW_CODEX_RESUME_MESSAGE } from '../coding-cli/codex-app-server/restore-decision.js' type Flags = Record<string, string | boolean> @@ -106,6 +107,45 @@ const isTruthy = (value: unknown) => value === true || value === 'true' || value const unwrap = (response: any) => (response && typeof response === 'object' && 'data' in response ? response.data : response) +function rejectRawCodexResume(mode: unknown, resumeSessionId: unknown): boolean { + if (mode === 'codex' && typeof resumeSessionId === 'string' && resumeSessionId.length > 0) { + writeError(INVALID_RAW_CODEX_RESUME_MESSAGE) + process.exitCode = 1 + return true + } + return false +} + +type CliSessionRef = { provider: string; sessionId: string } + +function resolveSessionRefFlag(mode: unknown, raw: unknown): { rejected: boolean; sessionRef?: CliSessionRef } { + if (raw === undefined) return { rejected: false } + if (typeof raw !== 'string' || raw.trim().length === 0) { + writeError('--session-ref must use provider:sessionId syntax.') + process.exitCode = 1 + return { rejected: true } + } + const separator = raw.indexOf(':') + if (separator <= 0 || separator === raw.length - 1) { + writeError('--session-ref must use provider:sessionId syntax.') + process.exitCode = 1 + return { rejected: true } + } + const provider = raw.slice(0, separator).trim() + const sessionId = raw.slice(separator + 1).trim() + if (!provider || !sessionId) { + writeError('--session-ref must use provider:sessionId syntax.') + process.exitCode = 1 + return { rejected: true } + } + if (typeof mode !== 'string' || mode !== provider) { + writeError('--session-ref provider must match --mode.') + process.exitCode = 1 + return { rejected: true } + } + return { rejected: false, sessionRef: { provider, sessionId } } +} + async function fetchTabs(client: ReturnType<typeof createHttpClient>): Promise<{ tabs: TabSummary[]; activeTabId?: string | null }> { const res = await client.get('/api/tabs') const data = unwrap(res) @@ -311,12 +351,27 @@ async function main() { const browser = getFlag(flags, 'browser') as string | undefined const editor = getFlag(flags, 'editor') as string | undefined const resumeSessionId = getFlag(flags, 'resume') as string | undefined + const sessionRefResult = resolveSessionRefFlag(mode, getFlag(flags, 'session-ref')) const prompt = getFlag(flags, 'prompt') as string | undefined + if (rejectRawCodexResume(mode, resumeSessionId)) return + if (sessionRefResult.rejected) return - const res = await client.post('/api/tabs', { name, mode, shell, cwd, browser, editor, resumeSessionId }) + const res = await client.post('/api/tabs', { + name, + mode, + shell, + cwd, + browser, + editor, + resumeSessionId, + ...(sessionRefResult.sessionRef ? { sessionRef: sessionRefResult.sessionRef } : {}), + }) const data = unwrap(res) if (prompt && data?.paneId) { - await client.post(`/api/panes/${encodeURIComponent(data.paneId)}/send-keys`, { data: `${prompt}\r` }) + await client.post(`/api/panes/${encodeURIComponent(data.paneId)}/send-keys`, { + data: `${prompt}\r`, + ...(mode === 'codex' ? { waitForCodexIdentity: true } : {}), + }) } writeJson(res) return @@ -405,6 +460,10 @@ async function main() { const mode = getFlag(flags, 'mode') as string | undefined const shell = getFlag(flags, 'shell') as string | undefined const cwd = getFlag(flags, 'cwd') as string | undefined + const resumeSessionId = getFlag(flags, 'resume') as string | undefined + const sessionRefResult = resolveSessionRefFlag(mode, getFlag(flags, 'session-ref')) + if (rejectRawCodexResume(mode, resumeSessionId)) return + if (sessionRefResult.rejected) return const resolved = await resolvePaneTarget(client, target) if (!resolved.pane?.id) { @@ -420,6 +479,8 @@ async function main() { mode, shell, cwd, + resumeSessionId, + ...(sessionRefResult.sessionRef ? { sessionRef: sessionRefResult.sessionRef } : {}), }) writeJson(res) return @@ -536,13 +597,23 @@ async function main() { const mode = getFlag(flags, 'mode') as string | undefined const shell = getFlag(flags, 'shell') as string | undefined const cwd = getFlag(flags, 'cwd') as string | undefined + const resumeSessionId = getFlag(flags, 'resume') as string | undefined + const sessionRefResult = resolveSessionRefFlag(mode, getFlag(flags, 'session-ref')) + if (rejectRawCodexResume(mode, resumeSessionId)) return + if (sessionRefResult.rejected) return const resolved = await resolvePaneTarget(client, target) if (!resolved.pane?.id) { writeError(resolved.message || 'pane not found') process.exitCode = 1 return } - const res = await client.post(`/api/panes/${encodeURIComponent(resolved.pane.id)}/respawn`, { mode, shell, cwd }) + const res = await client.post(`/api/panes/${encodeURIComponent(resolved.pane.id)}/respawn`, { + mode, + shell, + cwd, + resumeSessionId, + ...(sessionRefResult.sessionRef ? { sessionRef: sessionRefResult.sessionRef } : {}), + }) writeJson(res) return } diff --git a/server/coding-cli/codex-app-server/client.ts b/server/coding-cli/codex-app-server/client.ts index c0154263d..23a19b937 100644 --- a/server/coding-cli/codex-app-server/client.ts +++ b/server/coding-cli/codex-app-server/client.ts @@ -13,6 +13,8 @@ import { CodexThreadLifecycleNotificationSchema, CodexThreadStartedNotificationSchema, CodexThreadOperationResultSchema, + CodexTurnCompletedNotificationSchema, + CodexTurnStartedNotificationSchema, type CodexInitializeResult, type CodexRpcError, type CodexThreadHandle, @@ -60,6 +62,12 @@ export type CodexAppServerDisconnectEvent = { error?: Error } +export type CodexTurnEvent = { + threadId: string + turnId?: string + params: Record<string, unknown> +} + function normalizeThread(thread: CodexThreadHandle): CodexThreadHandle { return { ...thread, @@ -79,6 +87,8 @@ export class CodexAppServerClient { private readonly threadLifecycleHandlers = new Set<(event: CodexThreadLifecycleEvent) => void>() private readonly disconnectHandlers = new Set<(event: CodexAppServerDisconnectEvent) => void>() private readonly fsChangedHandlers = new Set<(event: { watchId: string; changedPaths: string[] }) => void>() + private readonly turnStartedHandlers = new Set<(event: CodexTurnEvent) => void>() + private readonly turnCompletedHandlers = new Set<(event: CodexTurnEvent) => void>() private lifecycleLossHandlers = new Set<(event: CodexThreadLifecycleLossEvent) => void>() constructor( @@ -125,7 +135,9 @@ export class CodexAppServerClient { if (!parsed.success) { throw new Error('Codex app-server returned an invalid thread/start payload.') } - return { threadId: parsed.data.thread.id } + const thread = normalizeThread(parsed.data.thread) + this.emitThreadStartedEvidence(thread) + return { threadId: thread.id } } async resumeThread( @@ -140,7 +152,9 @@ export class CodexAppServerClient { if (!parsed.success) { throw new Error('Codex app-server returned an invalid thread/resume payload.') } - return { threadId: parsed.data.thread.id } + const thread = normalizeThread(parsed.data.thread) + this.emitThreadStartedEvidence(thread) + return { threadId: thread.id } } async watchPath(targetPath: string, watchId: string): Promise<{ path: string }> { @@ -229,6 +243,20 @@ export class CodexAppServerClient { } } + onTurnStarted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnStartedHandlers.add(handler) + return () => { + this.turnStartedHandlers.delete(handler) + } + } + + onTurnCompleted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnCompletedHandlers.add(handler) + return () => { + this.turnCompletedHandlers.delete(handler) + } + } + onThreadLifecycleLoss(handler: (event: CodexThreadLifecycleLossEvent) => void): () => void { this.lifecycleLossHandlers.add(handler) return () => { @@ -316,6 +344,18 @@ export class CodexAppServerClient { return } + const turnStarted = CodexTurnStartedNotificationSchema.safeParse(notification.data) + if (turnStarted.success) { + this.emitTurnEvent(this.turnStartedHandlers, turnStarted.data.params) + return + } + + const turnCompleted = CodexTurnCompletedNotificationSchema.safeParse(notification.data) + if (turnCompleted.success) { + this.emitTurnEvent(this.turnCompletedHandlers, turnCompleted.data.params) + return + } + this.handleNotification(notification.data) return } @@ -372,6 +412,17 @@ export class CodexAppServerClient { } } + private emitTurnEvent(handlers: Set<(event: CodexTurnEvent) => void>, params: { threadId: string; turnId?: string } & Record<string, unknown>): void { + const event: CodexTurnEvent = { + threadId: params.threadId, + ...(typeof params.turnId === 'string' ? { turnId: params.turnId } : {}), + params, + } + for (const handler of handlers) { + handler(event) + } + } + private extractThreadId(params: unknown): string | undefined { if (!params || typeof params !== 'object') return undefined const object = params as Record<string, unknown> @@ -430,16 +481,7 @@ export class CodexAppServerClient { private emitThreadLifecycle(notification: import('./protocol.js').CodexThreadLifecycleNotification): void { if (notification.method === 'thread/started') { const thread = normalizeThread(notification.params.thread) - const event: CodexThreadLifecycleEvent = { - kind: 'thread_started', - thread, - } - for (const handler of this.threadLifecycleHandlers) { - handler(event) - } - for (const handler of this.threadStartedHandlers) { - handler(thread) - } + this.emitThreadStartedEvidence(thread) return } @@ -464,6 +506,19 @@ export class CodexAppServerClient { } } + private emitThreadStartedEvidence(thread: CodexThreadOperationResult['thread']): void { + const event: CodexThreadLifecycleEvent = { + kind: 'thread_started', + thread, + } + for (const handler of this.threadLifecycleHandlers) { + handler(event) + } + for (const handler of this.threadStartedHandlers) { + handler(thread) + } + } + private async request<TParams extends object>(method: string, params: TParams): Promise<unknown> { if (method !== 'initialize') { await (this.initializePromise ?? this.initialize()) diff --git a/server/coding-cli/codex-app-server/durability-proof.ts b/server/coding-cli/codex-app-server/durability-proof.ts new file mode 100644 index 000000000..001986e20 --- /dev/null +++ b/server/coding-cli/codex-app-server/durability-proof.ts @@ -0,0 +1,142 @@ +import fsp from 'node:fs/promises' +import path from 'node:path' +import type { CodexRolloutProofFailureReason } from '../../../shared/codex-durability.js' + +type ProofFs = Pick<typeof fsp, 'open' | 'stat'> + +const FIRST_RECORD_CHUNK_BYTES = 8192 +const MAX_FIRST_RECORD_BYTES = 1024 * 1024 + +export type CodexRolloutProofSuccess = { + ok: true + candidateThreadId: string + rolloutPath: string + rolloutProofId: string +} + +export type CodexRolloutProofFailure = { + ok: false + reason: CodexRolloutProofFailureReason + message: string + candidateThreadId: string + rolloutPath: string +} + +export type CodexRolloutProofResult = CodexRolloutProofSuccess | CodexRolloutProofFailure + +export async function proofCodexRollout(input: { + rolloutPath: string + candidateThreadId: string + fsImpl?: ProofFs +}): Promise<CodexRolloutProofResult> { + const fsImpl = input.fsImpl ?? fsp + const rolloutPath = input.rolloutPath + const candidateThreadId = input.candidateThreadId + + const fail = (reason: CodexRolloutProofFailureReason, message: string): CodexRolloutProofFailure => ({ + ok: false, + reason, + message, + candidateThreadId, + rolloutPath, + }) + + if (!path.isAbsolute(rolloutPath)) { + return fail('invalid_path', 'Codex rollout proof path must be absolute.') + } + if (!candidateThreadId) { + return fail('mismatched_thread_id', 'Codex candidate thread id is empty.') + } + + let stat: Awaited<ReturnType<ProofFs['stat']>> + try { + stat = await fsImpl.stat(rolloutPath) + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + return fail('missing', 'Codex rollout proof file does not exist.') + } + return fail('read_error', `Could not stat Codex rollout proof file: ${errorMessage(error)}`) + } + + if (!stat.isFile()) { + return fail('not_regular_file', 'Codex rollout proof path is not a regular file.') + } + + let firstLine: string + try { + firstLine = (await readFirstLine(fsImpl, rolloutPath)).trim() + } catch (error) { + return fail('read_error', `Could not read Codex rollout proof file: ${errorMessage(error)}`) + } + + if (!firstLine) { + return fail('empty', 'Codex rollout proof file does not start with a JSONL record.') + } + + let firstRecord: unknown + try { + firstRecord = JSON.parse(firstLine) + } catch { + return fail('malformed_json', 'Codex rollout proof first JSONL record is malformed.') + } + + if (!firstRecord || typeof firstRecord !== 'object') { + return fail('malformed_json', 'Codex rollout proof first JSONL record is not an object.') + } + + const record = firstRecord as Record<string, unknown> + if (record.type !== 'session_meta') { + return fail('wrong_record_type', 'Codex rollout proof first JSONL record is not session_meta.') + } + + const payload = record.payload + const rolloutProofId = payload && typeof payload === 'object' + ? (payload as Record<string, unknown>).id + : undefined + if (typeof rolloutProofId !== 'string' || rolloutProofId.length === 0) { + return fail('missing_payload_id', 'Codex rollout proof session_meta payload.id is missing.') + } + + if (rolloutProofId !== candidateThreadId) { + return fail('mismatched_thread_id', 'Codex rollout proof id does not match candidate thread id.') + } + + return { + ok: true, + candidateThreadId, + rolloutPath, + rolloutProofId, + } +} + +async function readFirstLine(fsImpl: ProofFs, filePath: string): Promise<string> { + const handle = await fsImpl.open(filePath, 'r') + const chunks: Buffer[] = [] + let bytesSeen = 0 + + try { + while (bytesSeen < MAX_FIRST_RECORD_BYTES) { + const buffer = Buffer.alloc(Math.min(FIRST_RECORD_CHUNK_BYTES, MAX_FIRST_RECORD_BYTES - bytesSeen)) + const { bytesRead } = await handle.read(buffer, 0, buffer.length, bytesSeen) + if (bytesRead === 0) break + + const slice = buffer.subarray(0, bytesRead) + const newlineIndex = slice.indexOf(10) + if (newlineIndex >= 0) { + chunks.push(slice.subarray(0, newlineIndex)) + return Buffer.concat(chunks).toString('utf8').replace(/\r$/, '') + } + + chunks.push(slice) + bytesSeen += bytesRead + } + } finally { + await handle.close() + } + + return Buffer.concat(chunks).toString('utf8').replace(/\r$/, '') +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error) +} diff --git a/server/coding-cli/codex-app-server/durability-store.ts b/server/coding-cli/codex-app-server/durability-store.ts new file mode 100644 index 000000000..f7fe8c03b --- /dev/null +++ b/server/coding-cli/codex-app-server/durability-store.ts @@ -0,0 +1,123 @@ +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' +import { + CodexDurabilityStoreRecordSchema, + type CodexDurabilityStoreRecord, +} from '../../../shared/codex-durability.js' + +type StoreFs = Pick<typeof fsp, 'mkdir' | 'readdir' | 'readFile' | 'rename' | 'unlink' | 'writeFile'> + +export type CodexDurabilityRestoreLocator = { + terminalId?: string + tabId?: string + paneId?: string + serverInstanceId?: string +} + +export class CodexDurabilityRestoreAmbiguousError extends Error { + constructor(locator: CodexDurabilityRestoreLocator, readonly matches: string[]) { + super(`Multiple Codex durability records match restore locator ${JSON.stringify(locator)}.`) + this.name = 'CodexDurabilityRestoreAmbiguousError' + } +} + +export function defaultCodexDurabilityStoreDir(): string { + return process.env.FRESHELL_CODEX_DURABILITY_DIR + || path.join(os.homedir(), '.freshell', 'codex-durability') +} + +export class CodexDurabilityStore { + private readonly dir: string + private readonly fsImpl: StoreFs + + constructor(options: { dir?: string; fsImpl?: StoreFs } = {}) { + this.dir = options.dir ?? defaultCodexDurabilityStoreDir() + this.fsImpl = options.fsImpl ?? fsp + } + + async read(terminalId: string): Promise<CodexDurabilityStoreRecord | undefined> { + const filePath = this.recordPath(terminalId) + let raw: string + try { + raw = await this.fsImpl.readFile(filePath, 'utf8') + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return undefined + throw error + } + const parsed = CodexDurabilityStoreRecordSchema.safeParse(JSON.parse(raw)) + if (!parsed.success) { + throw new Error(`Codex durability store record is invalid for terminal ${terminalId}.`) + } + return parsed.data + } + + async readForRestoreLocator(locator: CodexDurabilityRestoreLocator): Promise<CodexDurabilityStoreRecord | undefined> { + if (locator.terminalId) { + return this.read(locator.terminalId) + } + if (!locator.tabId || !locator.paneId) return undefined + + let entries: string[] + try { + entries = await this.fsImpl.readdir(this.dir) + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return undefined + throw error + } + + const matches: CodexDurabilityStoreRecord[] = [] + for (const entry of entries) { + if (!entry.endsWith('.json')) continue + let terminalId: string + let record: CodexDurabilityStoreRecord | undefined + try { + terminalId = decodeURIComponent(entry.slice(0, -'.json'.length)) + record = await this.read(terminalId) + } catch { + continue + } + if (!record) continue + if (record.tabId !== locator.tabId || record.paneId !== locator.paneId) continue + if (locator.serverInstanceId && record.serverInstanceId !== locator.serverInstanceId) continue + matches.push(record) + } + + if (matches.length > 1) { + throw new CodexDurabilityRestoreAmbiguousError(locator, matches.map((match) => match.terminalId)) + } + return matches[0] + } + + async write(record: CodexDurabilityStoreRecord): Promise<CodexDurabilityStoreRecord> { + const parsed = CodexDurabilityStoreRecordSchema.parse(record) + const existing = await this.read(parsed.terminalId) + if (existing?.candidate && parsed.candidate) { + if ( + existing.candidate.candidateThreadId !== parsed.candidate.candidateThreadId + || existing.candidate.rolloutPath !== parsed.candidate.rolloutPath + ) { + throw new Error(`Codex durability candidate mismatch for terminal ${parsed.terminalId}.`) + } + } + + await this.fsImpl.mkdir(this.dir, { recursive: true }) + const filePath = this.recordPath(parsed.terminalId) + const tmpPath = `${filePath}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}` + await this.fsImpl.writeFile(tmpPath, `${JSON.stringify(parsed, null, 2)}\n`, { mode: 0o600 }) + await this.fsImpl.rename(tmpPath, filePath) + return parsed + } + + async delete(terminalId: string): Promise<void> { + try { + await this.fsImpl.unlink(this.recordPath(terminalId)) + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') throw error + } + } + + recordPath(terminalId: string): string { + return path.join(this.dir, `${encodeURIComponent(terminalId)}.json`) + } +} diff --git a/server/coding-cli/codex-app-server/launch-planner.ts b/server/coding-cli/codex-app-server/launch-planner.ts index 020bf3278..e8eb913d9 100644 --- a/server/coding-cli/codex-app-server/launch-planner.ts +++ b/server/coding-cli/codex-app-server/launch-planner.ts @@ -1,22 +1,40 @@ import type { CodexAppServerRuntime } from './runtime.js' -import type { CodexThreadLifecycleLossEvent } from './client.js' +import type { CodexThreadLifecycleEvent, CodexThreadLifecycleLossEvent, CodexTurnEvent } from './client.js' import { waitForAllSettledOrThrow } from '../../shutdown-join.js' +import { + CodexRemoteProxy, + type CodexRemoteProxyCandidate, + type CodexRemoteProxyRepairTrigger, +} from './remote-proxy.js' type CodexRuntimeLike = Pick< CodexAppServerRuntime, - 'ensureReady' | 'startThread' | 'listLoadedThreads' | 'shutdown' | 'updateOwnershipMetadata' | 'onThreadLifecycleLoss' + | 'ensureReady' + | 'shutdown' + | 'updateOwnershipMetadata' + | 'onThreadLifecycleLoss' + | 'onFsChanged' + | 'watchPath' + | 'unwatchPath' > export type CodexLaunchSidecar = { adopt(input: { terminalId: string; generation: number }): Promise<void> - listLoadedThreads(): Promise<string[]> + markCandidatePersisted?(): void + onCandidate?(handler: (candidate: CodexRemoteProxyCandidate) => void): () => void + onTurnStarted?(handler: (event: CodexTurnEvent) => void): () => void + onTurnCompleted?(handler: (event: CodexTurnEvent) => void): () => void + onRepairTrigger?(handler: (event: CodexRemoteProxyRepairTrigger) => void): () => void + onFsChanged?(handler: (event: { watchId: string; changedPaths: string[] }) => void): () => void + onThreadLifecycle?(handler: (event: CodexThreadLifecycleEvent) => void): () => void onLifecycleLoss?(handler: (event: CodexThreadLifecycleLossEvent) => void): () => void + watchPath?(targetPath: string, watchId: string): Promise<{ path: string }> + unwatchPath?(watchId: string): Promise<void> shutdown(): Promise<void> - waitForLoadedThread(threadId: string, options?: { timeoutMs?: number; pollMs?: number }): Promise<void> } export type CodexLaunchPlan = { - sessionId: string + sessionId?: string remote: { wsUrl: string } @@ -69,34 +87,36 @@ export class CodexLaunchPlanner { this.assertAcceptingPlans() const runtime = this.runtimeFactory() - const sidecar = this.createSidecar(runtime) + let proxy: CodexRemoteProxy | undefined + const sidecar = this.createSidecar(runtime, () => proxy) this.activeSidecars.add(sidecar) try { if (input.resumeSessionId) { const ready = await runtime.ensureReady() + proxy = new CodexRemoteProxy({ + upstreamWsUrl: ready.wsUrl, + requireCandidatePersistence: false, + }) + const proxyReady = await proxy.start() this.assertAcceptingPlans() return { sessionId: input.resumeSessionId, remote: { - wsUrl: ready.wsUrl, + wsUrl: proxyReady.wsUrl, }, sidecar, } } - const planResult = await runtime.startThread({ - cwd: input.cwd, - model: input.model, - sandbox: input.sandbox, - approvalPolicy: input.approvalPolicy, - }) + const ready = await runtime.ensureReady() + proxy = new CodexRemoteProxy({ upstreamWsUrl: ready.wsUrl }) + const proxyReady = await proxy.start() this.assertAcceptingPlans() return { - sessionId: planResult.threadId, remote: { - wsUrl: planResult.wsUrl, + wsUrl: proxyReady.wsUrl, }, sidecar, } @@ -157,7 +177,7 @@ export class CodexLaunchPlanner { } } - private createSidecar(runtime: CodexRuntimeLike): CodexLaunchSidecar { + private createSidecar(runtime: CodexRuntimeLike, getProxy: () => CodexRemoteProxy | undefined): CodexLaunchSidecar { let shutdownPromise: Promise<void> | null = null let shutdownAttemptStarted = false let shutdownSucceeded = false @@ -170,18 +190,11 @@ export class CodexLaunchPlanner { throw new Error('Codex launch sidecar is shutting down; it cannot be adopted.') } } - const assertReadable = () => { + const assertActive = () => { if (this.shutdownStarted || shutdownAttemptStarted) { - throw new Error('Codex launch sidecar is shutting down; loaded-thread readiness polling stopped.') + throw new Error('Codex launch sidecar is shutting down; remote operations stopped.') } } - const waitForNextPoll = async (pollMs: number) => { - await Promise.race([ - new Promise((resolve) => setTimeout(resolve, pollMs)), - shutdownStarted, - ]) - assertReadable() - } const sidecar: CodexLaunchSidecar = { adopt: async ({ terminalId, generation }) => { assertAdoptable() @@ -190,13 +203,32 @@ export class CodexLaunchPlanner { this.activeSidecars.delete(sidecar) this.failedSidecarShutdowns.delete(sidecar) }, - listLoadedThreads: async () => { - assertReadable() - const loaded = await runtime.listLoadedThreads() - assertReadable() - return loaded + markCandidatePersisted: () => getProxy()?.markCandidatePersisted(), + onCandidate: (handler) => getProxy()?.onCandidate(handler) ?? (() => undefined), + onTurnStarted: (handler) => getProxy()?.onTurnStarted(handler) ?? (() => undefined), + onTurnCompleted: (handler) => getProxy()?.onTurnCompleted(handler) ?? (() => undefined), + onRepairTrigger: (handler) => getProxy()?.onRepairTrigger(handler) ?? (() => undefined), + onFsChanged: (handler) => runtime.onFsChanged(handler), + onThreadLifecycle: (handler) => getProxy()?.onThreadLifecycle(handler) ?? (() => undefined), + onLifecycleLoss: (handler) => { + const unsubRuntime = runtime.onThreadLifecycleLoss(handler) + const unsubProxy = getProxy()?.onLifecycleLoss(handler) + return () => { + unsubRuntime() + unsubProxy?.() + } + }, + watchPath: async (targetPath, watchId) => { + assertActive() + const result = await runtime.watchPath(targetPath, watchId) + assertActive() + return result + }, + unwatchPath: async (watchId) => { + assertActive() + await runtime.unwatchPath(watchId) + assertActive() }, - onLifecycleLoss: (handler) => runtime.onThreadLifecycleLoss(handler), shutdown: async () => { if (shutdownSucceeded) return if (shutdownPromise) { @@ -208,7 +240,10 @@ export class CodexLaunchPlanner { notifyShutdownStarted() } const attempt = Promise.resolve() - .then(() => runtime.shutdown()) + .then(async () => { + await getProxy()?.close() + await runtime.shutdown() + }) .then(() => { shutdownSucceeded = true this.activeSidecars.delete(sidecar) @@ -227,19 +262,6 @@ export class CodexLaunchPlanner { } } }, - waitForLoadedThread: async (threadId, options = {}) => { - const timeoutMs = options.timeoutMs ?? 10_000 - const pollMs = options.pollMs ?? 100 - const deadline = Date.now() + timeoutMs - - while (Date.now() < deadline) { - const loaded = await sidecar.listLoadedThreads() - if (loaded.includes(threadId)) return - await waitForNextPoll(pollMs) - } - - throw new Error(`Codex app-server did not load thread ${threadId} within ${timeoutMs}ms.`) - }, } return sidecar } diff --git a/server/coding-cli/codex-app-server/launch-retry.ts b/server/coding-cli/codex-app-server/launch-retry.ts new file mode 100644 index 000000000..c5a0e78eb --- /dev/null +++ b/server/coding-cli/codex-app-server/launch-retry.ts @@ -0,0 +1,50 @@ +import { setTimeout as delay } from 'node:timers/promises' +import { CodexLaunchConfigError } from '../codex-launch-config.js' +import type { CodexLaunchPlan, CodexLaunchPlanner } from './launch-planner.js' + +export const CODEX_INITIAL_LAUNCH_ATTEMPTS = 5 +const CODEX_INITIAL_LAUNCH_RETRY_DELAY_MS = 100 + +type CodexLaunchRetryLogger = { + warn: (fields: Record<string, unknown>, message: string) => void +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error) +} + +export async function planCodexLaunchWithRetry({ + planner, + input, + attempts = CODEX_INITIAL_LAUNCH_ATTEMPTS, + retryDelayMs = CODEX_INITIAL_LAUNCH_RETRY_DELAY_MS, + logger, +}: { + planner: CodexLaunchPlanner + input: Parameters<CodexLaunchPlanner['planCreate']>[0] + attempts?: number + retryDelayMs?: number + logger?: CodexLaunchRetryLogger +}): Promise<CodexLaunchPlan> { + let lastError: unknown + for (let attempt = 1; attempt <= attempts; attempt += 1) { + try { + return await planner.planCreate(input) + } catch (error) { + lastError = error + if (error instanceof CodexLaunchConfigError || attempt >= attempts) break + + const delayMs = retryDelayMs * attempt + logger?.warn({ + err: error, + attempt, + attempts, + delayMs, + cwd: input.cwd, + hasResumeSessionId: Boolean(input.resumeSessionId), + }, 'Codex launch planning failed; retrying') + await delay(delayMs) + } + } + throw lastError instanceof Error ? lastError : new Error(errorMessage(lastError)) +} diff --git a/server/coding-cli/codex-app-server/protocol.ts b/server/coding-cli/codex-app-server/protocol.ts index ea3e6a906..daccd8de4 100644 --- a/server/coding-cli/codex-app-server/protocol.ts +++ b/server/coding-cli/codex-app-server/protocol.ts @@ -125,6 +125,22 @@ export const CodexFsChangedNotificationSchema = z.object({ }), }).passthrough() +export const CodexTurnStartedNotificationSchema = z.object({ + method: z.literal('turn/started'), + params: z.object({ + threadId: z.string().min(1), + turnId: z.string().min(1).optional(), + }).passthrough(), +}).passthrough() + +export const CodexTurnCompletedNotificationSchema = z.object({ + method: z.literal('turn/completed'), + params: z.object({ + threadId: z.string().min(1), + turnId: z.string().min(1).optional(), + }).passthrough(), +}).passthrough() + export type CodexInitializeCapabilities = z.infer<typeof CodexInitializeCapabilitiesSchema> export type CodexInitializeParams = z.infer<typeof CodexInitializeParamsSchema> export type CodexInitializeResult = z.infer<typeof CodexInitializeResultSchema> @@ -142,3 +158,5 @@ export type CodexThreadClosedNotification = z.infer<typeof CodexThreadClosedNoti export type CodexThreadStatusChangedNotification = z.infer<typeof CodexThreadStatusChangedNotificationSchema> export type CodexThreadLifecycleNotification = z.infer<typeof CodexThreadLifecycleNotificationSchema> export type CodexFsChangedNotification = z.infer<typeof CodexFsChangedNotificationSchema> +export type CodexTurnStartedNotification = z.infer<typeof CodexTurnStartedNotificationSchema> +export type CodexTurnCompletedNotification = z.infer<typeof CodexTurnCompletedNotificationSchema> diff --git a/server/coding-cli/codex-app-server/remote-proxy.ts b/server/coding-cli/codex-app-server/remote-proxy.ts new file mode 100644 index 000000000..ec6ccc9cd --- /dev/null +++ b/server/coding-cli/codex-app-server/remote-proxy.ts @@ -0,0 +1,534 @@ +import WebSocket, { WebSocketServer } from 'ws' +import { allocateLocalhostPort, type LoopbackServerEndpoint } from '../../local-port.js' +import { + CodexFsChangedNotificationSchema, + CodexThreadLifecycleNotificationSchema, + CodexTurnCompletedNotificationSchema, + CodexTurnStartedNotificationSchema, + type CodexThreadHandle, +} from './protocol.js' +import type { CodexThreadLifecycleEvent, CodexThreadLifecycleLossEvent, CodexTurnEvent } from './client.js' +import { logger } from '../../logger.js' + +const log = logger.child({ component: 'codex-remote-proxy' }) + +export type CodexRemoteProxyCandidate = { + thread: CodexThreadHandle + source: 'thread_start_response' | 'thread_started_notification' +} + +export type CodexRemoteProxyRepairTrigger = + | { kind: 'proxy_close' | 'proxy_error' | 'candidate_capture_timeout'; error?: Error } + | { kind: 'fs_changed'; watchId: string; changedPaths: string[] } + +type JsonRpcId = string | number + +type PendingTurnStart = { + raw: WebSocket.RawData | string + client: WebSocket + upstream: WebSocket + id?: JsonRpcId + timer: NodeJS.Timeout +} + +type ProxyConnection = { + client: WebSocket + upstream: WebSocket + pendingMethods: Map<JsonRpcId, string> +} + +type CodexRemoteProxyOptions = { + upstreamWsUrl: string + portAllocator?: () => Promise<LoopbackServerEndpoint> + requestHoldTimeoutMs?: number + candidateCaptureTimeoutMs?: number + requireCandidatePersistence?: boolean +} + +const DEFAULT_REQUEST_HOLD_TIMEOUT_MS = 5_000 +const DEFAULT_CANDIDATE_CAPTURE_TIMEOUT_MS = 45_000 + +export class CodexRemoteProxy { + private readonly upstreamWsUrl: string + private readonly portAllocator: () => Promise<LoopbackServerEndpoint> + private readonly requestHoldTimeoutMs: number + private readonly candidateCaptureTimeoutMs: number + private readonly requireCandidatePersistence: boolean + private server: WebSocketServer | null = null + private endpoint: LoopbackServerEndpoint | null = null + private candidatePersisted = false + private candidateCaptureFailed = false + private candidateCaptureTimer: NodeJS.Timeout | null = null + private readonly pendingTurnStarts = new Set<PendingTurnStart>() + private readonly connections = new Set<ProxyConnection>() + private readonly candidateHandlers = new Set<(candidate: CodexRemoteProxyCandidate) => void>() + private readonly turnStartedHandlers = new Set<(event: CodexTurnEvent) => void>() + private readonly turnCompletedHandlers = new Set<(event: CodexTurnEvent) => void>() + private readonly repairTriggerHandlers = new Set<(event: CodexRemoteProxyRepairTrigger) => void>() + private readonly lifecycleHandlers = new Set<(event: CodexThreadLifecycleEvent) => void>() + private readonly lifecycleLossHandlers = new Set<(event: CodexThreadLifecycleLossEvent) => void>() + + constructor(options: CodexRemoteProxyOptions) { + this.upstreamWsUrl = options.upstreamWsUrl + this.portAllocator = options.portAllocator ?? allocateLocalhostPort + this.requestHoldTimeoutMs = options.requestHoldTimeoutMs ?? DEFAULT_REQUEST_HOLD_TIMEOUT_MS + this.candidateCaptureTimeoutMs = options.candidateCaptureTimeoutMs ?? DEFAULT_CANDIDATE_CAPTURE_TIMEOUT_MS + this.requireCandidatePersistence = options.requireCandidatePersistence ?? true + this.candidatePersisted = !this.requireCandidatePersistence + } + + get wsUrl(): string { + if (!this.endpoint) { + throw new Error('Codex remote proxy has not been started.') + } + return `ws://${this.endpoint.hostname}:${this.endpoint.port}` + } + + async start(): Promise<{ wsUrl: string }> { + if (this.server) return { wsUrl: this.wsUrl } + const endpoint = await this.portAllocator() + await new Promise<void>((resolve, reject) => { + const server = new WebSocketServer({ host: endpoint.hostname, port: endpoint.port }, () => resolve()) + server.once('error', reject) + server.on('connection', (client) => this.handleClientConnection(client)) + this.server = server + this.endpoint = endpoint + }) + if (this.requireCandidatePersistence) { + this.ensureCandidateCaptureTimer() + } + log.info({ + wsUrl: this.wsUrl, + upstreamWsUrl: this.upstreamWsUrl, + requireCandidatePersistence: this.requireCandidatePersistence, + }, 'Codex remote proxy listening') + return { wsUrl: this.wsUrl } + } + + async close(): Promise<void> { + this.clearCandidateCaptureTimer() + for (const pending of [...this.pendingTurnStarts]) { + this.failHeldTurnStart(pending, 'Codex remote proxy is closing before restore identity persistence completed.') + } + for (const connection of [...this.connections]) { + connection.client.close() + connection.upstream.close() + } + const server = this.server + this.server = null + this.endpoint = null + if (!server) return + await new Promise<void>((resolve) => server.close(() => resolve())) + } + + markCandidatePersisted(): void { + if (this.candidatePersisted) return + if (this.candidateCaptureFailed) return + this.candidatePersisted = true + this.clearCandidateCaptureTimer() + for (const pending of [...this.pendingTurnStarts]) { + this.releaseHeldTurnStart(pending) + } + } + + failCandidateCapture(message = 'Freshell could not persist Codex restore identity before accepting user input.'): void { + if (!this.requireCandidatePersistence) return + if (this.candidateCaptureFailed || this.candidatePersisted) return + this.candidateCaptureFailed = true + this.clearCandidateCaptureTimer() + this.emitRepairTrigger({ kind: 'candidate_capture_timeout' }) + for (const pending of [...this.pendingTurnStarts]) { + this.failHeldTurnStart(pending, message) + } + for (const connection of [...this.connections]) { + this.sendJsonRpcError(connection.client, undefined, message) + connection.client.close() + connection.upstream.close() + } + } + + onCandidate(handler: (candidate: CodexRemoteProxyCandidate) => void): () => void { + this.candidateHandlers.add(handler) + return () => this.candidateHandlers.delete(handler) + } + + onTurnStarted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnStartedHandlers.add(handler) + return () => this.turnStartedHandlers.delete(handler) + } + + onTurnCompleted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnCompletedHandlers.add(handler) + return () => this.turnCompletedHandlers.delete(handler) + } + + onRepairTrigger(handler: (event: CodexRemoteProxyRepairTrigger) => void): () => void { + this.repairTriggerHandlers.add(handler) + return () => this.repairTriggerHandlers.delete(handler) + } + + onThreadLifecycle(handler: (event: CodexThreadLifecycleEvent) => void): () => void { + this.lifecycleHandlers.add(handler) + return () => this.lifecycleHandlers.delete(handler) + } + + onLifecycleLoss(handler: (event: CodexThreadLifecycleLossEvent) => void): () => void { + this.lifecycleLossHandlers.add(handler) + return () => this.lifecycleLossHandlers.delete(handler) + } + + private handleClientConnection(client: WebSocket): void { + if (this.candidateCaptureFailed) { + this.sendJsonRpcError(client, undefined, 'Freshell timed out before Codex restore identity was captured.') + client.close() + return + } + const upstream = new WebSocket(this.upstreamWsUrl) + const connection: ProxyConnection = { + client, + upstream, + pendingMethods: new Map(), + } + this.connections.add(connection) + if (this.requireCandidatePersistence) { + this.ensureCandidateCaptureTimer() + } + log.info({ + proxyWsUrl: this.wsUrl, + upstreamWsUrl: this.upstreamWsUrl, + requireCandidatePersistence: this.requireCandidatePersistence, + activeConnections: this.connections.size, + }, 'Codex remote proxy client connected') + + client.on('message', (raw, isBinary) => this.handleClientMessage(connection, raw, isBinary)) + upstream.on('message', (raw, isBinary) => this.handleUpstreamMessage(connection, raw, isBinary)) + upstream.on('open', () => { + log.info({ + proxyWsUrl: this.wsUrl, + upstreamWsUrl: this.upstreamWsUrl, + }, 'Codex remote proxy upstream connected') + }) + + const closeBoth = () => { + this.connections.delete(connection) + client.close() + upstream.close() + } + client.on('close', (code, reason) => { + log.info({ + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + code, + reason: reason.toString(), + activeConnections: Math.max(0, this.connections.size - 1), + }, 'Codex remote proxy client closed') + closeBoth() + }) + upstream.on('close', (code, reason) => { + log.warn({ + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + code, + reason: reason.toString(), + activeConnections: Math.max(0, this.connections.size - 1), + }, 'Codex remote proxy upstream closed') + this.emitRepairTrigger({ kind: 'proxy_close' }) + closeBoth() + }) + client.on('error', (error) => { + log.warn({ + err: error, + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + }, 'Codex remote proxy client error') + this.emitRepairTrigger({ kind: 'proxy_error', error }) + closeBoth() + }) + upstream.on('error', (error) => { + log.warn({ + err: error, + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + }, 'Codex remote proxy upstream error') + this.emitRepairTrigger({ kind: 'proxy_error', error }) + closeBoth() + }) + } + + private handleClientMessage(connection: ProxyConnection, raw: WebSocket.RawData, isBinary: boolean): void { + const forward = framePayload(raw, isBinary) + const parsed = parseJson(raw) + const method = parsed && typeof parsed === 'object' ? (parsed as Record<string, unknown>).method : undefined + const id = jsonRpcId(parsed) + if (id !== undefined && typeof method === 'string') { + connection.pendingMethods.set(id, method) + } + if (typeof method === 'string') { + log.debug({ + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + method, + id, + }, 'Codex remote proxy forwarding client request') + } + + if (this.requireCandidatePersistence && method === 'turn/start' && !this.candidatePersisted) { + this.holdTurnStart(connection, forward, id) + return + } + + sendIfOpen(connection.upstream, forward) + } + + private handleUpstreamMessage(connection: ProxyConnection, raw: WebSocket.RawData, isBinary: boolean): void { + const forward = framePayload(raw, isBinary) + const parsed = parseJson(raw) + const id = jsonRpcId(parsed) + if (id !== undefined) { + const method = connection.pendingMethods.get(id) + connection.pendingMethods.delete(id) + log.debug({ + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + method, + id, + }, 'Codex remote proxy forwarding upstream response') + if (method === 'thread/start') { + this.maybeEmitThreadStartResponseCandidate(parsed) + } + } else { + const method = parsed && typeof parsed === 'object' + ? (parsed as Record<string, unknown>).method + : undefined + if (typeof method === 'string') { + log.debug({ + proxyWsUrl: this.endpoint ? this.wsUrl : undefined, + upstreamWsUrl: this.upstreamWsUrl, + method, + }, 'Codex remote proxy forwarding upstream notification') + } + this.handleUpstreamNotification(parsed) + } + sendIfOpen(connection.client, forward) + } + + private maybeEmitThreadStartResponseCandidate(parsed: unknown): void { + if (!parsed || typeof parsed !== 'object') return + const result = (parsed as Record<string, unknown>).result + const thread = result && typeof result === 'object' + ? normalizeCandidateThread((result as Record<string, unknown>).thread) + : undefined + if (!thread) return + this.emitCandidate({ + thread, + source: 'thread_start_response', + }) + } + + private handleUpstreamNotification(parsed: unknown): void { + const method = parsed && typeof parsed === 'object' + ? (parsed as Record<string, unknown>).method + : undefined + if (method === 'thread/started') { + const params = (parsed as Record<string, unknown>).params + const thread = params && typeof params === 'object' + ? normalizeCandidateThread((params as Record<string, unknown>).thread) + : undefined + if (!thread) return + this.emitCandidate({ + thread, + source: 'thread_started_notification', + }) + this.emitThreadLifecycle({ + kind: 'thread_started', + thread, + }) + return + } + + const turnStarted = CodexTurnStartedNotificationSchema.safeParse(parsed) + if (turnStarted.success) { + this.emitTurnEvent(this.turnStartedHandlers, turnStarted.data.params) + return + } + + const turnCompleted = CodexTurnCompletedNotificationSchema.safeParse(parsed) + if (turnCompleted.success) { + this.emitTurnEvent(this.turnCompletedHandlers, turnCompleted.data.params) + return + } + + const fsChanged = CodexFsChangedNotificationSchema.safeParse(parsed) + if (fsChanged.success) { + this.emitRepairTrigger({ kind: 'fs_changed', ...fsChanged.data.params }) + return + } + + const lifecycle = CodexThreadLifecycleNotificationSchema.safeParse(parsed) + if (lifecycle.success) { + if (lifecycle.data.method === 'thread/closed') { + this.emitThreadLifecycle({ kind: 'thread_closed', threadId: lifecycle.data.params.threadId }) + this.emitLifecycleLoss({ method: 'thread/closed', threadId: lifecycle.data.params.threadId }) + } else if (lifecycle.data.method === 'thread/status/changed') { + this.emitThreadLifecycle({ + kind: 'thread_status_changed', + threadId: lifecycle.data.params.threadId, + status: lifecycle.data.params.status, + }) + const status = lifecycle.data.params.status.type + if (status === 'notLoaded' || status === 'systemError') { + this.emitLifecycleLoss({ + method: 'thread/status/changed', + threadId: lifecycle.data.params.threadId, + status, + }) + } + } + } + } + + private holdTurnStart(connection: ProxyConnection, raw: WebSocket.RawData | string, id?: JsonRpcId): void { + const pending: PendingTurnStart = { + raw, + client: connection.client, + upstream: connection.upstream, + id, + timer: setTimeout(() => { + this.failHeldTurnStart( + pending, + 'Freshell could not persist Codex restore identity before accepting user input.', + ) + }, this.requestHoldTimeoutMs), + } + pending.timer.unref?.() + this.pendingTurnStarts.add(pending) + } + + private releaseHeldTurnStart(pending: PendingTurnStart): void { + if (!this.pendingTurnStarts.delete(pending)) return + clearTimeout(pending.timer) + sendIfOpen(pending.upstream, pending.raw) + } + + private failHeldTurnStart(pending: PendingTurnStart, message: string): void { + if (!this.pendingTurnStarts.delete(pending)) return + clearTimeout(pending.timer) + this.emitRepairTrigger({ kind: 'candidate_capture_timeout' }) + this.sendJsonRpcError(pending.client, pending.id, message) + pending.client.close() + pending.upstream.close() + } + + private sendJsonRpcError(client: WebSocket, id: JsonRpcId | undefined, message: string): void { + sendIfOpen(client, JSON.stringify({ + jsonrpc: '2.0', + ...(id !== undefined ? { id } : {}), + error: { + code: -32000, + message, + }, + })) + } + + private ensureCandidateCaptureTimer(): void { + if (!this.requireCandidatePersistence) return + if (this.candidatePersisted || this.candidateCaptureTimer) return + this.candidateCaptureTimer = setTimeout(() => { + this.failCandidateCapture('Freshell timed out before Codex restore identity was captured.') + }, this.candidateCaptureTimeoutMs) + this.candidateCaptureTimer.unref?.() + } + + private clearCandidateCaptureTimer(): void { + if (!this.candidateCaptureTimer) return + clearTimeout(this.candidateCaptureTimer) + this.candidateCaptureTimer = null + } + + private emitCandidate(candidate: CodexRemoteProxyCandidate): void { + log.info({ + threadId: candidate.thread.id, + rolloutPath: candidate.thread.path, + source: candidate.source, + }, 'Codex remote proxy observed candidate restore identity') + for (const handler of this.candidateHandlers) { + handler(candidate) + } + } + + private emitTurnEvent(handlers: Set<(event: CodexTurnEvent) => void>, params: { threadId: string; turnId?: string } & Record<string, unknown>): void { + const event: CodexTurnEvent = { + threadId: params.threadId, + ...(typeof params.turnId === 'string' ? { turnId: params.turnId } : {}), + params, + } + for (const handler of handlers) { + handler(event) + } + } + + private emitRepairTrigger(event: CodexRemoteProxyRepairTrigger): void { + for (const handler of this.repairTriggerHandlers) { + handler(event) + } + } + + private emitThreadLifecycle(event: CodexThreadLifecycleEvent): void { + for (const handler of this.lifecycleHandlers) { + handler(event) + } + } + + private emitLifecycleLoss(event: CodexThreadLifecycleLossEvent): void { + for (const handler of this.lifecycleLossHandlers) { + handler(event) + } + } +} + +function parseJson(raw: WebSocket.RawData): unknown { + try { + return JSON.parse(raw.toString()) + } catch { + return undefined + } +} + +function jsonRpcId(parsed: unknown): JsonRpcId | undefined { + if (!parsed || typeof parsed !== 'object') return undefined + const id = (parsed as Record<string, unknown>).id + return typeof id === 'string' || typeof id === 'number' ? id : undefined +} + +function framePayload(raw: WebSocket.RawData, isBinary: boolean): WebSocket.RawData | string { + return isBinary ? raw : raw.toString() +} + +function sendIfOpen(socket: WebSocket, data: WebSocket.RawData | string): void { + if (socket.readyState === WebSocket.OPEN) { + socket.send(data) + } else if (socket.readyState === WebSocket.CONNECTING) { + socket.once('open', () => { + if (socket.readyState === WebSocket.OPEN) socket.send(data) + }) + } +} + +function normalizeCandidateThread(thread: unknown): CodexThreadHandle | undefined { + if (!thread || typeof thread !== 'object') return undefined + const candidate = thread as Record<string, unknown> + if (typeof candidate.id !== 'string' || candidate.id.length === 0) return undefined + return { + id: candidate.id, + path: typeof candidate.path === 'string' ? candidate.path : null, + ephemeral: typeof candidate.ephemeral === 'boolean' ? candidate.ephemeral : false, + } +} + +function normalizeThread(thread: CodexThreadHandle): CodexThreadHandle { + return { + ...thread, + path: thread.path ?? null, + ephemeral: thread.ephemeral ?? false, + } +} diff --git a/server/coding-cli/codex-app-server/restore-decision.ts b/server/coding-cli/codex-app-server/restore-decision.ts new file mode 100644 index 000000000..21be058d4 --- /dev/null +++ b/server/coding-cli/codex-app-server/restore-decision.ts @@ -0,0 +1,179 @@ +import type { SessionRef, RestoreError } from '../../../shared/session-contract.js' +import { buildRestoreError } from '../../../shared/session-contract.js' +import type { CodexCandidateIdentity, CodexDurabilityRef } from '../../../shared/codex-durability.js' +import { proofCodexRollout, type CodexRolloutProofResult } from './durability-proof.js' + +type MaybePromise<T> = T | Promise<T> + +export type CodexLiveRestoreTerminal = { + terminalId: string + createdAt: number + resumeSessionId?: string + codexDurability?: CodexDurabilityRef +} + +export type RejectCodexCreateRestoreDecision = { + kind: 'reject_invalid_raw_codex_resume_request' | 'reject_missing_codex_session_ref' + code: 'INVALID_MESSAGE' | 'RESTORE_UNAVAILABLE' + message: string +} + +export type CodexCreateRestorePlan = + | RejectCodexCreateRestoreDecision + | { kind: 'fresh_codex_launch' } + | { kind: 'proof_existing_candidate_first'; candidate: CodexCandidateIdentity } + | { kind: 'durable_session_ref_resume'; sessionRef: SessionRef & { provider: 'codex' }; sessionId: string } + +export type CodexCreateRestoreDecision<TLiveTerminal extends CodexLiveRestoreTerminal = CodexLiveRestoreTerminal> = + | Exclude<CodexCreateRestorePlan, { kind: 'proof_existing_candidate_first' }> + | { + kind: 'proof_succeeded_resume_durable' + candidate: CodexCandidateIdentity + proof: Extract<CodexRolloutProofResult, { ok: true }> + sessionId: string + liveTerminal?: TLiveTerminal + } + | { + kind: 'proof_failed_attach_live_candidate' + candidate: CodexCandidateIdentity + proof: Extract<CodexRolloutProofResult, { ok: false }> + liveTerminal: TLiveTerminal + } + | { + kind: 'proof_failed_fresh_create' + candidate: CodexCandidateIdentity + proof: Extract<CodexRolloutProofResult, { ok: false }> + clearCodexDurability: true + restoreError: RestoreError + } + +export const INVALID_RAW_CODEX_RESUME_MESSAGE = + 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.' + +export const MISSING_CODEX_SESSION_REF_MESSAGE = 'Restore requires a canonical session reference.' + +export function planCodexCreateRestoreDecision(input: { + restoreRequested?: boolean + legacyResumeSessionId?: string + sessionRef?: SessionRef + codexDurability?: CodexDurabilityRef +}): CodexCreateRestorePlan { + const codexSessionRef = isCodexSessionRef(input.sessionRef) ? input.sessionRef : undefined + + if (hasRawLegacyResume(input.legacyResumeSessionId) && !codexSessionRef) { + return { + kind: 'reject_invalid_raw_codex_resume_request', + code: 'INVALID_MESSAGE', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + } + } + + if (codexSessionRef) { + return { + kind: 'durable_session_ref_resume', + sessionRef: codexSessionRef, + sessionId: codexSessionRef.sessionId, + } + } + + const durableSessionId = input.restoreRequested ? getDurableCodexSessionId(input.codexDurability) : undefined + if (durableSessionId) { + return { + kind: 'durable_session_ref_resume', + sessionRef: { provider: 'codex', sessionId: durableSessionId }, + sessionId: durableSessionId, + } + } + + const candidate = input.codexDurability?.candidate + if (input.restoreRequested && candidate && !input.legacyResumeSessionId) { + return { + kind: 'proof_existing_candidate_first', + candidate, + } + } + + if (input.restoreRequested) { + return { + kind: 'reject_missing_codex_session_ref', + code: 'RESTORE_UNAVAILABLE', + message: MISSING_CODEX_SESSION_REF_MESSAGE, + } + } + + return { kind: 'fresh_codex_launch' } +} + +export async function resolveCodexCreateRestoreDecision<TLiveTerminal extends CodexLiveRestoreTerminal>( + input: { + restoreRequested?: boolean + legacyResumeSessionId?: string + sessionRef?: SessionRef + codexDurability?: CodexDurabilityRef + proofRollout?: (input: { rolloutPath: string; candidateThreadId: string }) => Promise<CodexRolloutProofResult> + findLiveTerminalByCandidate?: (candidate: CodexCandidateIdentity) => MaybePromise<TLiveTerminal | undefined> + }, +): Promise<CodexCreateRestoreDecision<TLiveTerminal>> { + const plan = planCodexCreateRestoreDecision(input) + if (plan.kind !== 'proof_existing_candidate_first') { + return plan + } + + const candidate = plan.candidate + const proof = await (input.proofRollout ?? proofCodexRollout)({ + rolloutPath: candidate.rolloutPath, + candidateThreadId: candidate.candidateThreadId, + }) + const returnedLiveTerminal = await input.findLiveTerminalByCandidate?.(candidate) + const liveTerminal = returnedLiveTerminal && isExactLiveCodexCandidate(returnedLiveTerminal, candidate) + ? returnedLiveTerminal + : undefined + + if (proof.ok) { + return { + kind: 'proof_succeeded_resume_durable', + candidate, + proof, + sessionId: proof.rolloutProofId, + ...(liveTerminal ? { liveTerminal } : {}), + } + } + + if (liveTerminal) { + return { + kind: 'proof_failed_attach_live_candidate', + candidate, + proof, + liveTerminal, + } + } + + return { + kind: 'proof_failed_fresh_create', + candidate, + proof, + clearCodexDurability: true, + restoreError: buildRestoreError('durable_artifact_missing'), + } +} + +function isCodexSessionRef(value: SessionRef | undefined): value is SessionRef & { provider: 'codex' } { + return value?.provider === 'codex' +} + +function hasRawLegacyResume(value: string | undefined): boolean { + return typeof value === 'string' && value.length > 0 +} + +function getDurableCodexSessionId(value: CodexDurabilityRef | undefined): string | undefined { + return value?.state === 'durable' ? value.durableThreadId : undefined +} + +export function isExactLiveCodexCandidate( + terminal: CodexLiveRestoreTerminal, + candidate: Pick<CodexCandidateIdentity, 'candidateThreadId' | 'rolloutPath'>, +): boolean { + const liveCandidate = terminal.codexDurability?.candidate + return liveCandidate?.candidateThreadId === candidate.candidateThreadId + && liveCandidate.rolloutPath === candidate.rolloutPath +} diff --git a/server/coding-cli/codex-app-server/runtime.ts b/server/coding-cli/codex-app-server/runtime.ts index 96471533c..88a5ea164 100644 --- a/server/coding-cli/codex-app-server/runtime.ts +++ b/server/coding-cli/codex-app-server/runtime.ts @@ -3,10 +3,12 @@ import { spawn } from 'node:child_process' import fsp from 'node:fs/promises' import os from 'node:os' import path from 'node:path' +import { CODEX_MANAGED_REMOTE_CONFIG_ARGS } from '../codex-managed-config.js' import { allocateLocalhostPort, type LoopbackServerEndpoint } from '../../local-port.js' import { logger } from '../../logger.js' import { CodexAppServerClient, + type CodexTurnEvent, type CodexThreadLifecycleEvent, type CodexThreadLifecycleLossEvent, } from './client.js' @@ -95,6 +97,7 @@ const DEFAULT_STARTUP_ATTEMPT_TIMEOUT_MS = 3_000 const STARTUP_POLL_MS = 50 const DEFAULT_TERMINATE_GRACE_MS = 1_000 const OWNERSHIP_SCHEMA_VERSION = 1 +export const DEFAULT_CODEX_SIDECAR_METADATA_DIR = path.join(os.homedir(), '.freshell', 'codex-sidecars') function sleep(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)) @@ -102,7 +105,7 @@ function sleep(ms: number): Promise<void> { function defaultMetadataDir(): string { return process.env.FRESHELL_CODEX_SIDECAR_DIR - || path.join(os.homedir(), '.freshell', 'codex-sidecars') + || DEFAULT_CODEX_SIDECAR_METADATA_DIR } function assertUnixSidecarSupport(): void { @@ -478,16 +481,25 @@ export async function runCodexStartupReaper( export const reapOrphanedCodexAppServerSidecarsOnStartup = runCodexStartupReaper export function assertCodexStartupReaperSucceeded(result: ReapOrphanedSidecarsResult): void { - const unreapedOwnershipIds = [ - ...result.failedOwnershipIds, - ...result.skippedActiveOwnershipIds, - ] - if (unreapedOwnershipIds.length === 0) return + const failedOwnershipIds = [...new Set(result.failedOwnershipIds)] + const activeOwnershipIds = [...new Set(result.skippedActiveOwnershipIds)] + if (failedOwnershipIds.length === 0 && activeOwnershipIds.length === 0) return + + const reasons: string[] = [] + if (failedOwnershipIds.length > 0) { + reasons.push( + `failed to reap ${failedOwnershipIds.length} ownership record(s): ${failedOwnershipIds.join(', ')}`, + ) + } + if (activeOwnershipIds.length > 0) { + reasons.push( + `${activeOwnershipIds.length} ownership record(s) still owned by a live Freshell server/process: ${activeOwnershipIds.join(', ')}`, + ) + } - const blockedOwnershipIds = [...new Set(unreapedOwnershipIds)] throw new Error( - `Codex app-server startup reaper failed to reap ${blockedOwnershipIds.length} ownership record(s): ${blockedOwnershipIds.join(', ')}. ` - + 'Refusing to continue until the unreaped Codex sidecar ownership is verified gone or handled explicitly.', + `Codex app-server startup reaper blocked startup: ${reasons.join('; ')}. ` + + 'Refusing to continue until failed ownership records are handled and active owners have shut down or been verified gone.', ) } @@ -506,6 +518,8 @@ export class CodexAppServerRuntime { private readonly threadStartedHandlers = new Set<(thread: CodexThreadHandle) => void>() private readonly threadLifecycleHandlers = new Set<(event: CodexThreadLifecycleEvent) => void>() private readonly fsChangedHandlers = new Set<(event: { watchId: string; changedPaths: string[] }) => void>() + private readonly turnStartedHandlers = new Set<(event: CodexTurnEvent) => void>() + private readonly turnCompletedHandlers = new Set<(event: CodexTurnEvent) => void>() private readonly command: string private readonly commandArgs: string[] @@ -635,6 +649,20 @@ export class CodexAppServerRuntime { } } + onTurnStarted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnStartedHandlers.add(handler) + return () => { + this.turnStartedHandlers.delete(handler) + } + } + + onTurnCompleted(handler: (event: CodexTurnEvent) => void): () => void { + this.turnCompletedHandlers.add(handler) + return () => { + this.turnCompletedHandlers.delete(handler) + } + } + async watchPath(targetPath: string, watchId: string): Promise<CodexFsWatchResult> { await this.ensureReady() return this.client!.watchPath(targetPath, watchId) @@ -690,6 +718,7 @@ export class CodexAppServerRuntime { const ownershipId = this.ownershipIdFactory() const child = spawn(this.command, [ ...this.commandArgs, + ...CODEX_MANAGED_REMOTE_CONFIG_ARGS, 'app-server', '--listen', wsUrl, @@ -760,6 +789,16 @@ export class CodexAppServerRuntime { handler(event) } }) + client.onTurnStarted((event) => { + for (const handler of this.turnStartedHandlers) { + handler(event) + } + }) + client.onTurnCompleted((event) => { + for (const handler of this.turnCompletedHandlers) { + handler(event) + } + }) client.onDisconnect((event) => { if (this.shutdownRequested) return for (const handler of this.exitHandlers) { @@ -842,17 +881,25 @@ export class CodexAppServerRuntime { } private async readWrapperIdentityInto(ownership: ActiveOwnership): Promise<void> { - const wrapperIdentity = await this.processIdentityReader(ownership.metadata.wrapperPid) - if (!isCompleteWrapperIdentity(wrapperIdentity)) { - throw new Error( - `Codex app-server wrapper identity could not be completely read for PID ${ownership.metadata.wrapperPid}.`, - ) - } - ownership.metadata = { - ...ownership.metadata, - wrapperIdentity, - updatedAt: new Date().toISOString(), + const timeoutMs = Math.min(this.startupAttemptTimeoutMs, 1_000) + const deadline = Date.now() + timeoutMs + + while (Date.now() < deadline) { + const wrapperIdentity = await this.processIdentityReader(ownership.metadata.wrapperPid) + if (isCompleteWrapperIdentity(wrapperIdentity)) { + ownership.metadata = { + ...ownership.metadata, + wrapperIdentity, + updatedAt: new Date().toISOString(), + } + return + } + await sleep(25) } + + throw new Error( + `Codex app-server wrapper identity could not be completely read for PID ${ownership.metadata.wrapperPid}.`, + ) } private async writeOwnershipRecord(ownership: ActiveOwnership): Promise<void> { diff --git a/server/coding-cli/codex-managed-config.ts b/server/coding-cli/codex-managed-config.ts new file mode 100644 index 000000000..d3a6371a9 --- /dev/null +++ b/server/coding-cli/codex-managed-config.ts @@ -0,0 +1 @@ +export const CODEX_MANAGED_REMOTE_CONFIG_ARGS = ['-c', 'features.apps=false'] as const diff --git a/server/index.ts b/server/index.ts index d443fae17..e34099985 100644 --- a/server/index.ts +++ b/server/index.ts @@ -439,6 +439,29 @@ async function main() { } }) + registry.on('terminal.session.bound', (payload) => { + const event = payload as { + terminalId?: string + provider?: CodingCliProviderName + sessionId?: string + } + if (event.provider !== 'codex') return + if (!event.terminalId || !event.sessionId) return + try { + broadcastTerminalSessionAssociation({ + wsHandler, + terminalMetadata, + broadcastTerminalMetaUpserts, + provider: 'codex', + terminalId: event.terminalId, + sessionId: event.sessionId, + source: 'codex_durability', + }) + } catch (err) { + log.warn({ err, terminalId: event.terminalId, sessionId: event.sessionId }, 'Failed to broadcast Codex session association') + } + }) + const applyDebugLogging = (enabled: boolean, source: string) => { const nextEnabled = !!enabled setLogLevel(resolveRuntimeLogLevel(nextEnabled)) diff --git a/server/mcp/freshell-tool.ts b/server/mcp/freshell-tool.ts index 085dfad00..4e174c4b7 100644 --- a/server/mcp/freshell-tool.ts +++ b/server/mcp/freshell-tool.ts @@ -8,6 +8,7 @@ import { z } from 'zod' import { createApiClient, resolveConfig, type ApiClient } from './http-client.js' import { translateKeys } from '../cli/keys.js' +import { INVALID_RAW_CODEX_RESUME_MESSAGE } from '../coding-cli/codex-app-server/restore-decision.js' // Lazy-initialized client -- created on first use so env vars are read at call time. let _client: ApiClient | undefined @@ -70,7 +71,7 @@ FRESHELL_URL and FRESHELL_TOKEN are already set in your environment. ## Key gotchas - **Tab and pane IDs are ephemeral.** IDs from open-browser, new-tab, and split-pane are valid only within the current session. If the Freshell server restarts or the agent conversation resumes after a disconnect, previously returned IDs may no longer exist. Always call open-browser or list-tabs fresh rather than reusing stale IDs. -- **Always screenshot with `screenshot({ scope: "tab", target: tabId })` after open-browser.** Network errors, CORS issues, or server problems can cause blank pages. open-browser returns a tabId — use it immediately to screenshot and confirm the page rendered before proceeding. +- **Always screenshot with screenshot({ scope: "tab", target: tabId }) after open-browser.** Network errors, CORS issues, or server problems can cause blank pages. open-browser returns a tabId — use it immediately to screenshot and confirm the page rendered before proceeding. - send-keys: use literal mode (literal: true + keys as a string) for natural-language prompts or multi-word text. Do NOT append "ENTER" as literal text -- send the command with literal:true, then send ["ENTER"] as a separate call in token mode. - wait-for with stable (seconds of no output) is more reliable than pattern matching across different CLI providers. - Editor panes show "Loading..." until the tab is visited in the browser. When screenshotting multiple tabs, visit each tab first (select-tab), then loop back for screenshots. @@ -246,7 +247,7 @@ async function handleDisplay(format: string, target?: string): Promise<string> { // --------------------------------------------------------------------------- const ACTION_PARAMS: Record<string, { required: string[]; optional: string[] }> = { - 'new-tab': { required: [], optional: ['name', 'mode', 'shell', 'cwd', 'browser', 'editor', 'resume', 'prompt'] }, + 'new-tab': { required: [], optional: ['name', 'mode', 'shell', 'cwd', 'browser', 'editor', 'resume', 'sessionRef', 'prompt'] }, 'list-tabs': { required: [], optional: [] }, 'select-tab': { required: ['target'], optional: [] }, 'kill-tab': { required: ['target'], optional: [] }, @@ -254,14 +255,14 @@ const ACTION_PARAMS: Record<string, { required: string[]; optional: string[] }> 'has-tab': { required: ['target'], optional: [] }, 'next-tab': { required: [], optional: [] }, 'prev-tab': { required: [], optional: [] }, - 'split-pane': { required: [], optional: ['target', 'direction', 'mode', 'shell', 'cwd', 'browser', 'editor'] }, + 'split-pane': { required: [], optional: ['target', 'direction', 'mode', 'shell', 'cwd', 'browser', 'editor', 'resume', 'sessionRef'] }, 'list-panes': { required: [], optional: ['target'] }, 'select-pane': { required: ['target'], optional: [] }, 'rename-pane': { required: ['name'], optional: ['target'] }, 'kill-pane': { required: ['target'], optional: [] }, 'resize-pane': { required: ['target'], optional: ['x', 'y', 'sizes'] }, 'swap-pane': { required: ['target', 'with'], optional: [] }, - 'respawn-pane': { required: ['target'], optional: ['mode', 'shell', 'cwd'] }, + 'respawn-pane': { required: ['target'], optional: ['mode', 'shell', 'cwd', 'resume', 'sessionRef'] }, 'send-keys': { required: [], optional: ['target', 'keys', 'literal'] }, 'capture-pane': { required: [], optional: ['target', 'S', 'J', 'e'] }, 'wait-for': { required: [], optional: ['target', 'pattern', 'stable', 'exit', 'prompt', 'timeout'] }, @@ -280,6 +281,8 @@ const ACTION_PARAMS: Record<string, { required: string[]; optional: string[] }> 'help': { required: [], optional: [] }, } +const RAW_CODEX_RESUME_HINT = 'Use sessionRef: { provider: "codex", sessionId } after Codex identity is durable.' + const COMMON_CONFUSIONS: Record<string, Record<string, string>> = { 'new-tab': { url: "Unknown parameter 'url' for action 'new-tab'. Did you mean to use 'open-browser' to open a URL? Or pass the URL as 'browser' to create a browser pane in a new tab.", @@ -309,6 +312,29 @@ function validateParams(action: string, params: Record<string, unknown> | undefi } } +function isCodexSessionRef(value: unknown): boolean { + return !!value + && typeof value === 'object' + && !Array.isArray(value) + && (value as { provider?: unknown }).provider === 'codex' + && typeof (value as { sessionId?: unknown }).sessionId === 'string' + && (value as { sessionId: string }).sessionId.length > 0 +} + +function rejectRawCodexResume( + mode: unknown, + resume: unknown, + sessionRef: unknown, +): { error: string; hint: string } | undefined { + if (mode === 'codex' && typeof resume === 'string' && resume.length > 0 && !isCodexSessionRef(sessionRef)) { + return { + error: INVALID_RAW_CODEX_RESUME_MESSAGE, + hint: RAW_CODEX_RESUME_HINT, + } + } + return undefined +} + // --------------------------------------------------------------------------- // Action router // --------------------------------------------------------------------------- @@ -335,7 +361,7 @@ Rules: ## Command reference Tab commands: - new-tab Create a tab with a terminal pane (default). Params: name?, mode?, shell?, cwd?, browser?, editor?, resume?, prompt? + new-tab Create a tab with a terminal pane (default). Params: name?, mode?, shell?, cwd?, browser?, editor?, resume?, sessionRef?, prompt? mode values: shell (default), claude, codex, kimi, opencode, or any supported CLI. prompt: text to send to the terminal after creation (via send-keys with literal mode). To open a URL in a browser pane, use 'open-browser' instead. @@ -349,7 +375,7 @@ Tab commands: prev-tab Switch to the previous tab. Pane commands: - split-pane Split a pane. Params: target?, direction (horizontal|vertical, default vertical), mode?, shell?, cwd?, browser?, editor? + split-pane Split a pane. Params: target?, direction (horizontal|vertical, default vertical), mode?, shell?, cwd?, browser?, editor?, resume?, sessionRef? Omit target to split your own pane (the pane where this MCP server was spawned). Returns { paneId, tabId }. list-panes List panes. Params: target? (tab ID or title to filter by). Returns { panes: [...] }. select-pane Activate a pane. Params: target (pane ID or index) @@ -358,7 +384,7 @@ Pane commands: Omit target to rename the caller pane (or the tab's active pane as fallback). resize-pane Resize a pane. Params: target, x? (1-99), y? (1-99) swap-pane Swap two panes. Params: target, with (other pane ID) - respawn-pane Restart a pane's terminal. Params: target, mode?, shell?, cwd? + respawn-pane Restart a pane's terminal. Params: target, mode?, shell?, cwd?, resume?, sessionRef? Terminal I/O: send-keys Send input to a pane. Params: target, keys, literal? @@ -469,7 +495,7 @@ Meta: ## Screenshot guidance -- **Always screenshot with `screenshot({ scope: "tab", target: tabId })` after open-browser.** Network errors, blank pages, and CORS failures are silent unless you look. open-browser returns a tabId — use it immediately to confirm the page rendered before acting on it. +- **Always screenshot with screenshot({ scope: "tab", target: tabId }) after open-browser.** Network errors, blank pages, and CORS failures are silent unless you look. open-browser returns a tabId — use it immediately to confirm the page rendered before acting on it. - Tab and pane IDs from earlier in a session may become stale after reconnections or server restarts. If screenshot fails to find a tab/pane, call list-tabs or list-panes to get fresh IDs rather than reusing old ones. - Use a dedicated canary tab when validating screenshot behavior so live project panes are not contaminated. - Close temporary tabs/panes after verification unless user asked to keep them open. @@ -540,10 +566,12 @@ async function routeAction( switch (action) { // -- Tab actions -- case 'new-tab': { - const { name, mode, shell, cwd, browser, editor, resume, prompt, ...rest } = params || {} - const sessionRef = typeof mode === 'string' && typeof resume === 'string' + const { name, mode, shell, cwd, browser, editor, resume, sessionRef: explicitSessionRef, prompt, ...rest } = params || {} + const codexResumeError = rejectRawCodexResume(mode, resume, explicitSessionRef) + if (codexResumeError) return codexResumeError + const sessionRef = explicitSessionRef ?? (typeof mode === 'string' && mode !== 'codex' && typeof resume === 'string' ? { provider: mode, sessionId: resume } - : undefined + : undefined) const tabResult = await c.post('/api/tabs', { name, mode, @@ -559,7 +587,10 @@ async function routeAction( const data = unwrapData(tabResult) const paneId = data?.paneId if (paneId) { - await c.post(`/api/panes/${encodeURIComponent(paneId)}/send-keys`, { data: `${prompt}\r` }) + await c.post(`/api/panes/${encodeURIComponent(paneId)}/send-keys`, { + data: `${prompt}\r`, + ...(mode === 'codex' ? { waitForCodexIdentity: true } : {}), + }) } } return tabResult @@ -602,9 +633,14 @@ async function routeAction( const resolved = await resolvePaneTarget(rawTarget) if (!resolved.pane) return { error: resolved.message || 'No pane found', hint: "Run action 'list-panes' to see available panes." } const paneId = resolved.pane.id - const { direction, browser, editor, mode, shell, cwd, target: _t, ...rest } = params || {} + const { direction, browser, editor, mode, shell, cwd, target: _t, resume, sessionRef, ...rest } = params || {} + const codexResumeError = rejectRawCodexResume(mode, resume, sessionRef) + if (codexResumeError) return codexResumeError + const effectiveSessionRef = sessionRef ?? (typeof mode === 'string' && mode !== 'codex' && typeof resume === 'string' + ? { provider: mode, sessionId: resume } + : undefined) return c.post(`/api/panes/${encodeURIComponent(paneId)}/split`, { - direction, browser, editor, mode, shell, cwd, ...rest, + direction, browser, editor, mode, shell, cwd, ...(effectiveSessionRef ? { sessionRef: effectiveSessionRef } : {}), ...rest, }) } case 'list-panes': { @@ -646,8 +682,13 @@ async function routeAction( } case 'respawn-pane': { const target = requireParam(params, 'target') - const { mode, shell, cwd } = params || {} - return c.post(`/api/panes/${encodeURIComponent(target)}/respawn`, { mode, shell, cwd }) + const { mode, shell, cwd, resume, sessionRef } = params || {} + const codexResumeError = rejectRawCodexResume(mode, resume, sessionRef) + if (codexResumeError) return codexResumeError + const effectiveSessionRef = sessionRef ?? (typeof mode === 'string' && mode !== 'codex' && typeof resume === 'string' + ? { provider: mode, sessionId: resume } + : undefined) + return c.post(`/api/panes/${encodeURIComponent(target)}/respawn`, { mode, shell, cwd, sessionRef: effectiveSessionRef }) } // -- Terminal I/O -- diff --git a/server/session-association-broadcast.ts b/server/session-association-broadcast.ts index 71cec2d6d..1743ba5f1 100644 --- a/server/session-association-broadcast.ts +++ b/server/session-association-broadcast.ts @@ -2,7 +2,7 @@ import { recordSessionLifecycleEvent } from './session-observability.js' import type { CodingCliProviderName } from './coding-cli/types.js' import type { TerminalMetadataService } from './terminal-metadata-service.js' -type AssociationBroadcastSource = 'indexer_update' | 'claude_new_session' | 'opencode_controller' +type AssociationBroadcastSource = 'indexer_update' | 'claude_new_session' | 'opencode_controller' | 'codex_durability' export function broadcastTerminalSessionAssociation(opts: { wsHandler: { broadcast: (message: unknown) => void } diff --git a/server/session-observability.ts b/server/session-observability.ts index de493ba7d..2c2317a20 100644 --- a/server/session-observability.ts +++ b/server/session-observability.ts @@ -29,6 +29,24 @@ export type SessionLifecycleEvent = reused: boolean hasSessionRef: boolean }) + | { + kind: 'codex_candidate_pending' + provider: 'codex' + terminalId: string + generation: number + tabId?: string + paneId?: string + cwd?: string + } + | { + kind: 'codex_candidate_captured' + provider: 'codex' + terminalId: string + candidateThreadId: string + rolloutPath: string + source: string + generation: number + } | { kind: 'codex_durable_session_observed' provider: 'codex' @@ -38,12 +56,20 @@ export type SessionLifecycleEvent = attemptId?: string source: 'sidecar' } + | { + kind: 'codex_durable_resume_started' + provider: 'codex' + terminalId: string + sessionId: string + generation: number + source: 'sidecar' + } | { kind: 'session_association_broadcast' provider: CodingCliProviderName terminalId: string sessionId: string - source: 'indexer_update' | 'claude_new_session' | 'opencode_controller' + source: 'indexer_update' | 'claude_new_session' | 'opencode_controller' | 'codex_durability' } | { kind: 'terminal_session_bound' @@ -135,6 +161,26 @@ function buildPayload(event: SessionLifecycleEvent): Record<string, unknown> { reused: event.reused, hasSessionRef: event.hasSessionRef, } + case 'codex_candidate_pending': + return { + ...base, + provider: event.provider, + terminalId: event.terminalId, + generation: event.generation, + tabId: event.tabId, + paneId: event.paneId, + cwd: event.cwd, + } + case 'codex_candidate_captured': + return { + ...base, + provider: event.provider, + terminalId: event.terminalId, + candidateThreadId: event.candidateThreadId, + rolloutPath: event.rolloutPath, + source: event.source, + generation: event.generation, + } case 'codex_durable_session_observed': return { ...base, @@ -145,6 +191,15 @@ function buildPayload(event: SessionLifecycleEvent): Record<string, unknown> { attemptId: event.attemptId, source: event.source, } + case 'codex_durable_resume_started': + return { + ...base, + provider: event.provider, + terminalId: event.terminalId, + sessionId: event.sessionId, + generation: event.generation, + source: event.source, + } case 'session_association_broadcast': return { ...base, diff --git a/server/terminal-registry.ts b/server/terminal-registry.ts index 8e0395bda..69e0b4f06 100644 --- a/server/terminal-registry.ts +++ b/server/terminal-registry.ts @@ -9,6 +9,12 @@ import { EventEmitter } from 'events' import { logger } from './logger.js' import { getPerfConfig, logPerfEvent, shouldLog, startPerfTimer } from './perf-logger.js' import type { ServerSettings } from '../shared/settings.js' +import { + CODEX_DURABILITY_SCHEMA_VERSION, + type CodexCandidateSource, + type CodexDurabilityRef, + type CodexDurabilityStoreRecord, +} from '../shared/codex-durability.js' import { convertWindowsPathToWslPath, isReachableDirectorySync } from './path-utils.js' import { isValidClaudeSessionId } from './claude-session-id.js' import type { LoopbackServerEndpoint } from './local-port.js' @@ -24,8 +30,16 @@ import type { } from './terminal-stream/registry-events.js' import { getOpencodeEnvOverrides, resolveOpencodeLaunchModel } from './opencode-launch.js' import { generateMcpInjection, cleanupMcpConfig } from './mcp/config-writer.js' +import { CODEX_MANAGED_REMOTE_CONFIG_ARGS } from './coding-cli/codex-managed-config.js' import type { CodexLaunchPlan, CodexLaunchSidecar } from './coding-cli/codex-app-server/launch-planner.js' import { isCodexSidecarTeardownError } from './coding-cli/codex-app-server/launch-planner.js' +import { + CodexDurabilityStore, + type CodexDurabilityRestoreLocator, +} from './coding-cli/codex-app-server/durability-store.js' +import { proofCodexRollout } from './coding-cli/codex-app-server/durability-proof.js' +import type { CodexRemoteProxyCandidate } from './coding-cli/codex-app-server/remote-proxy.js' +import type { CodexTurnEvent } from './coding-cli/codex-app-server/client.js' import { collectShutdownFailures, throwShutdownFailures } from './shutdown-join.js' import { recordSessionLifecycleEvent } from './session-observability.js' @@ -193,8 +207,11 @@ export type CodexRecoveryLaunchInput = { export type CodexRecoveryOptions = { planCreate(input: CodexRecoveryLaunchInput): Promise<CodexLaunchPlan> retryDelayMs?: number - readinessTimeoutMs?: number - readinessPollMs?: number +} + +export type CodexDurabilityRestoreRecord = { + terminalId: string + durability: CodexDurabilityRef } function resolveCodingCliCommand( @@ -228,7 +245,7 @@ function resolveCodingCliCommand( if (parsed.protocol !== 'ws:' || parsed.hostname !== '127.0.0.1') { throw new Error('Codex launch requires a loopback app-server websocket URL.') } - remoteArgs.push('--remote', wsUrl) + remoteArgs.push('--remote', wsUrl, ...CODEX_MANAGED_REMOTE_CONFIG_ARGS) } let resumeArgs: string[] = [] if (resumeSessionId) { @@ -451,11 +468,30 @@ export type TerminalRecord = { lastInputToOutputMs?: number maxInputToOutputMs: number } - codexSidecar?: Pick<CodexLaunchSidecar, 'shutdown' | 'onLifecycleLoss'> + codexSidecar?: Pick< + CodexLaunchSidecar, + | 'shutdown' + | 'onLifecycleLoss' + | 'onCandidate' + | 'onTurnStarted' + | 'onTurnCompleted' + | 'onRepairTrigger' + | 'onFsChanged' + | 'watchPath' + | 'unwatchPath' + | 'markCandidatePersisted' + > codexSidecarLifecycleUnsubscribe?: () => void codexSidecarLifecyclePublished?: boolean codexSidecarPrePublicationLoss?: unknown codexSidecarGeneration?: number + codexRolloutWatch?: { watchId: string; rolloutPath: string } + codexDurability?: CodexDurabilityRef + codexDurabilityProof?: { + inFlight?: Promise<void> + rerunRequested?: boolean + } + codexInputGate?: { state: 'identity_pending' } codexRecovery?: CodexRecoveryOptions codexRecoveryAttempt?: Promise<void> codexRecoveryRetry?: { timer: NodeJS.Timeout; resolve: () => void } @@ -464,10 +500,27 @@ export type TerminalRecord = { codexRecoveryRetiringPty?: pty.IPty } +export type TerminalInputResult = + | { status: 'written' } + | { status: 'blocked_codex_identity_pending'; terminalId: string } + | { status: 'blocked_codex_identity_capture_timeout'; terminalId: string } + | { status: 'blocked_codex_identity_unavailable'; terminalId: string; reason?: string } + | { status: 'blocked_codex_recovery_pending'; terminalId: string } + | { status: 'no_terminal' } + | { status: 'not_running' } + +function isCodexStartupTerminalControlInput(data: string): boolean { + if (data.length === 0 || data.length > 128) return false + if (data === '\x1b[I' || data === '\x1b[O') return true + if (/^\x1b\[\d{1,4};\d{1,4}R$/.test(data)) return true + if (/^\x1b\[(?:\?|\>)?[\d;]{0,32}c$/.test(data)) return true + return /^\x1b\](?:10|11|12|4;\d{1,3});rgb:[0-9a-fA-F]{1,4}\/[0-9a-fA-F]{1,4}\/[0-9a-fA-F]{1,4}(?:\x07|\x1b\\)$/.test(data) +} + export type BindSessionResult = | { ok: true; terminalId: string; sessionId: string } | { ok: false; reason: 'terminal_missing' | 'mode_mismatch' | 'invalid_session_id' | 'terminal_not_running' } - | BindResult + | Extract<BindResult, { ok: false }> export type RepairLegacySessionOwnersResult = { repaired: boolean @@ -475,6 +528,11 @@ export type RepairLegacySessionOwnersResult = { clearedTerminalIds: string[] } +type TerminalRegistryOptions = { + codexDurabilityStore?: CodexDurabilityStore + serverInstanceId?: string +} + export class ChunkRingBuffer { private chunks: string[] = [] private size = 0 @@ -1006,11 +1064,19 @@ export class TerminalRegistry extends EventEmitter { private scrollbackMaxChars: number private maxPendingSnapshotChars: number private sidecarShutdowns = new Map<string, SidecarShutdownEntry>() + private codexDurabilityStore: CodexDurabilityStore + private codexCandidatePersistenceQueues = new Map<string, Promise<void>>() + private serverInstanceId: string // Legacy transport batching path. Broker cutover destination: // - outputBuffers/flush timers/mobile batching -> broker client-output queue. private outputBuffers = new Map<WebSocket, PendingOutput>() - constructor(settings?: ServerSettings, maxTerminals?: number, maxExitedTerminals?: number) { + constructor( + settings?: ServerSettings, + maxTerminals?: number, + maxExitedTerminals?: number, + options: TerminalRegistryOptions = {}, + ) { super() // Permanent terminal.exit listeners: index, ws-handler, broker, codex-wiring, // terminal-view. Shutdown uses a single shared listener (no per-terminal scaling). @@ -1018,6 +1084,8 @@ export class TerminalRegistry extends EventEmitter { this.settings = settings this.maxTerminals = maxTerminals ?? MAX_TERMINALS this.maxExitedTerminals = maxExitedTerminals ?? Number(process.env.MAX_EXITED_TERMINALS || 200) + this.codexDurabilityStore = options.codexDurabilityStore ?? new CodexDurabilityStore() + this.serverInstanceId = options.serverInstanceId?.trim() || process.env.FRESHELL_SERVER_INSTANCE_ID || `srv-${process.pid}` this.scrollbackMaxChars = this.computeScrollbackMaxChars(settings) { const raw = Number(process.env.MAX_PENDING_SNAPSHOT_CHARS || DEFAULT_MAX_PENDING_SNAPSHOT_CHARS) @@ -1027,6 +1095,12 @@ export class TerminalRegistry extends EventEmitter { this.startPerfMonitor() } + setServerInstanceId(serverInstanceId: string): void { + const normalized = serverInstanceId.trim() + if (!normalized) return + this.serverInstanceId = normalized + } + setSettings(settings: ServerSettings) { this.settings = settings this.scrollbackMaxChars = this.computeScrollbackMaxChars(settings) @@ -1156,7 +1230,11 @@ export class TerminalRegistry extends EventEmitter { exitCode: number | undefined, reason: 'pty_exit' | 'user_final_close', ): void { - if (record.mode === 'shell' || record.resumeSessionId) { + if ( + record.mode === 'shell' + || record.resumeSessionId + || (record.mode === 'codex' && record.codexDurability?.state === 'durable' && record.codexDurability.durableThreadId) + ) { return } const ptyPid = record.pty.pid @@ -1171,6 +1249,40 @@ export class TerminalRegistry extends EventEmitter { }) } + private forgetCodexDurabilityStoreRecord(record: TerminalRecord, reason: string): void { + if (record.mode !== 'codex') return + if (!record.codexDurability) return + void this.codexDurabilityStore.delete(record.terminalId).catch((err) => { + logger.warn({ err, terminalId: record.terminalId, reason }, 'Failed to delete Codex durability store record') + }) + } + + private finishTerminalPtyExit( + record: TerminalRecord, + event: { exitCode: number; signal?: number }, + ): void { + this.markCodexRecoveryFinalClose(record) + record.status = 'exited' + record.exitCode = event.exitCode + const now = Date.now() + record.lastActivityAt = now + record.exitedAt = now + cleanupMcpConfig(record.terminalId, record.mode, record.mcpCwd) + for (const client of record.clients) { + this.flushOutputBuffer(client) + this.safeSend(client, { type: 'terminal.exit', terminalId: record.terminalId, exitCode: event.exitCode }, { terminalId: record.terminalId, perf: record.perf }) + } + record.clients.clear() + record.suppressedOutputClients.clear() + record.pendingSnapshotClients.clear() + this.releaseBinding(record.terminalId, 'exit') + this.emit('terminal.exit', { terminalId: record.terminalId, exitCode: event.exitCode }) + this.recordTerminalExitWithoutDurableSession(record, event.exitCode, 'pty_exit') + this.forgetCodexDurabilityStoreRecord(record, 'pty_exit') + void this.releaseCodexSidecar(record).catch(() => undefined) + this.reapExitedTerminals() + } + private reapExitedTerminals(): void { const max = this.maxExitedTerminals if (!max || max <= 0) return @@ -1182,7 +1294,9 @@ export class TerminalRegistry extends EventEmitter { const excess = exited.length - max if (excess <= 0) return for (let i = 0; i < excess; i += 1) { - this.terminals.delete(exited[i].terminalId) + const terminal = exited[i] + this.terminals.delete(terminal.terminalId) + this.forgetCodexDurabilityStoreRecord(terminal, 'reap_exited') } } @@ -1270,6 +1384,14 @@ export class TerminalRegistry extends EventEmitter { const title = getModeLabel(opts.mode) + const initialCodexDurability: CodexDurabilityRef | undefined = opts.mode === 'codex' && resumeForBinding + ? { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + durableThreadId: resumeForBinding, + } + : undefined + const record: TerminalRecord = { terminalId, title, @@ -1297,6 +1419,10 @@ export class TerminalRegistry extends EventEmitter { ? !opts.providerSettings?.codexAppServer?.deferLifecycleUntilPublished : undefined, codexSidecarGeneration: opts.mode === 'codex' ? 0 : undefined, + codexDurability: initialCodexDurability, + codexInputGate: opts.mode === 'codex' && !resumeForBinding + ? { state: 'identity_pending' } + : undefined, codexRecovery: opts.mode === 'codex' ? opts.providerSettings?.codexAppServer?.recovery : undefined, perf: perfConfig.enabled ? { @@ -1396,28 +1522,39 @@ export class TerminalRegistry extends EventEmitter { if (record.status === 'exited') { return } - this.markCodexRecoveryFinalClose(record) - record.status = 'exited' - record.exitCode = e.exitCode - const now = Date.now() - record.lastActivityAt = now - record.exitedAt = now - cleanupMcpConfig(terminalId, opts.mode, record.mcpCwd) - for (const client of record.clients) { - this.flushOutputBuffer(client) - this.safeSend(client, { type: 'terminal.exit', terminalId, exitCode: e.exitCode }, { terminalId, perf: record.perf }) + const finishExit = () => { + if (this.startCodexDurableRecovery(record, { + source: 'pty_exit', + exitCode: e.exitCode, + signal: e.signal, + })) { + return + } + this.finishTerminalPtyExit(record, e) + } + if (this.needsCodexFinalDurabilityProof(record)) { + void (async () => { + await this.proveCodexBeforeFinalLoss(record, 'pty_exit') + if (record.pty !== ptyProc || record.status === 'exited') return + finishExit() + })() + return } - record.clients.clear() - record.suppressedOutputClients.clear() - record.pendingSnapshotClients.clear() - this.releaseBinding(terminalId, 'exit') - this.emit('terminal.exit', { terminalId, exitCode: e.exitCode }) - this.recordTerminalExitWithoutDurableSession(record, e.exitCode, 'pty_exit') - void this.releaseCodexSidecar(record).catch(() => undefined) - this.reapExitedTerminals() + finishExit() }) this.terminals.set(terminalId, record) + if (opts.mode === 'codex' && record.codexInputGate?.state === 'identity_pending') { + recordSessionLifecycleEvent({ + kind: 'codex_candidate_pending', + provider: 'codex', + terminalId, + generation: record.codexSidecarGeneration ?? 0, + ...(record.envContext?.tabId ? { tabId: record.envContext.tabId } : {}), + ...(record.envContext?.paneId ? { paneId: record.envContext.paneId } : {}), + ...(record.cwd ? { cwd: record.cwd } : {}), + }) + } const exactSessionId = resumeForBinding if (modeSupportsResume(opts.mode) && exactSessionId) { const bound = this.bindSession( @@ -1450,9 +1587,598 @@ export class TerminalRegistry extends EventEmitter { private registerCodexSidecarLifecycle(record: TerminalRecord): void { record.codexSidecarLifecycleUnsubscribe?.() - record.codexSidecarLifecycleUnsubscribe = record.codexSidecar?.onLifecycleLoss?.((event) => { + const sidecar = record.codexSidecar + if (!sidecar) { + record.codexSidecarLifecycleUnsubscribe = undefined + return + } + + const unsubscribers: Array<() => void> = [] + const lifecycleUnsubscribe = sidecar.onLifecycleLoss?.((event) => { this.handleCodexLifecycleLoss(record.terminalId, event) }) + if (lifecycleUnsubscribe) unsubscribers.push(lifecycleUnsubscribe) + + const candidateUnsubscribe = sidecar.onCandidate?.((candidate) => { + void this.persistCodexCandidate(record.terminalId, candidate).catch((err) => { + logger.error({ err, terminalId: record.terminalId }, 'Failed to persist Codex restore identity') + void this.failCodexFreshIdentity(record.terminalId, 'candidate_persist_failed').catch((failErr) => { + logger.error({ err: failErr, terminalId: record.terminalId }, 'Failed to mark Codex terminal non-restorable after candidate persistence failure') + }) + }) + }) + if (candidateUnsubscribe) unsubscribers.push(candidateUnsubscribe) + + const turnStartedUnsubscribe = sidecar.onTurnStarted?.((event) => { + void this.handleCodexTurnStarted(record.terminalId, event).catch((err) => { + logger.error({ err, terminalId: record.terminalId }, 'Failed to update Codex turn-start durability state') + }) + }) + if (turnStartedUnsubscribe) unsubscribers.push(turnStartedUnsubscribe) + + const turnCompletedUnsubscribe = sidecar.onTurnCompleted?.((event) => { + void this.handleCodexTurnCompleted(record.terminalId, event).catch((err) => { + logger.error({ err, terminalId: record.terminalId }, 'Failed to proof Codex rollout after turn completion') + }) + }) + if (turnCompletedUnsubscribe) unsubscribers.push(turnCompletedUnsubscribe) + + const repairUnsubscribe = sidecar.onRepairTrigger?.((event) => { + if (event.kind === 'candidate_capture_timeout') { + void this.failCodexFreshIdentity(record.terminalId, 'candidate_capture_timeout').catch((err) => { + logger.error({ err, terminalId: record.terminalId }, 'Failed to mark Codex terminal non-restorable after candidate capture timeout') + }) + return + } + this.requestCodexDurabilityProof(record.terminalId, `repair:${event.kind}`) + }) + if (repairUnsubscribe) unsubscribers.push(repairUnsubscribe) + + const fsChangedUnsubscribe = sidecar.onFsChanged?.((event) => { + this.handleCodexRolloutFsChanged(record.terminalId, event) + }) + if (fsChangedUnsubscribe) unsubscribers.push(fsChangedUnsubscribe) + + record.codexSidecarLifecycleUnsubscribe = () => { + for (const unsubscribe of unsubscribers.splice(0)) { + unsubscribe() + } + } + } + + private armCodexRolloutWatch(record: TerminalRecord): void { + const candidate = record.codexDurability?.candidate + const sidecar = record.codexSidecar + if (!candidate || !sidecar?.watchPath) return + if (record.codexRolloutWatch?.rolloutPath === candidate.rolloutPath) return + + this.unwatchCodexRollout(record, 'replace') + const watchId = `codex-rollout-${record.terminalId}-${Date.now()}` + record.codexRolloutWatch = { watchId, rolloutPath: candidate.rolloutPath } + sidecar.watchPath(candidate.rolloutPath, watchId) + .then(() => { + logger.debug({ + terminalId: record.terminalId, + watchId, + rolloutPath: candidate.rolloutPath, + }, 'Watching Codex rollout proof path') + }) + .catch((err) => { + if (record.codexRolloutWatch?.watchId === watchId) { + record.codexRolloutWatch = undefined + } + logger.warn({ + err, + terminalId: record.terminalId, + watchId, + rolloutPath: candidate.rolloutPath, + }, 'Failed to watch Codex rollout proof path') + }) + } + + private unwatchCodexRollout(record: TerminalRecord, reason: string): void { + const watch = record.codexRolloutWatch + if (!watch) return + record.codexRolloutWatch = undefined + record.codexSidecar?.unwatchPath?.(watch.watchId).catch((err) => { + logger.warn({ + err, + terminalId: record.terminalId, + watchId: watch.watchId, + rolloutPath: watch.rolloutPath, + reason, + }, 'Failed to unwatch Codex rollout proof path') + }) + } + + private handleCodexRolloutFsChanged( + terminalId: string, + event: { watchId: string; changedPaths: string[] }, + ): void { + const record = this.terminals.get(terminalId) + if (!record?.codexRolloutWatch) return + const watch = record.codexRolloutWatch + if (event.watchId !== watch.watchId) return + if (event.changedPaths.length > 0 && !event.changedPaths.includes(watch.rolloutPath)) return + this.requestCodexDurabilityProof(terminalId, 'fs_changed') + } + + private codexCandidateMatches(record: TerminalRecord, threadId: string | undefined): boolean { + const candidateThreadId = record.codexDurability?.candidate?.candidateThreadId + return !!candidateThreadId && candidateThreadId === threadId + } + + private buildCodexDurabilityRef(candidate: CodexRemoteProxyCandidate, capturedAt: number): CodexDurabilityRef | undefined { + const candidateThreadId = candidate.thread.id + const rolloutPath = typeof candidate.thread.path === 'string' ? candidate.thread.path : undefined + if (!candidateThreadId || !rolloutPath || candidate.thread.ephemeral === true || !path.isAbsolute(rolloutPath)) { + return undefined + } + return { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId, + rolloutPath, + source: candidate.source as CodexCandidateSource, + capturedAt, + }, + } + } + + private codexDurabilityRecordToRef(record: CodexDurabilityStoreRecord): CodexDurabilityRef { + return { + schemaVersion: record.schemaVersion, + state: record.state, + ...(record.candidate ? { candidate: record.candidate } : {}), + ...(record.turnCompletedAt !== undefined ? { turnCompletedAt: record.turnCompletedAt } : {}), + ...(record.lastProofFailure ? { lastProofFailure: record.lastProofFailure } : {}), + ...(record.durableThreadId ? { durableThreadId: record.durableThreadId } : {}), + ...(record.nonRestorableReason ? { nonRestorableReason: record.nonRestorableReason } : {}), + } + } + + async readCodexDurabilityForRestoreLocator(locator: CodexDurabilityRestoreLocator): Promise<CodexDurabilityRef | undefined> { + return (await this.readCodexDurabilityRecordForRestoreLocator(locator))?.durability + } + + async readCodexDurabilityRecordForRestoreLocator(locator: CodexDurabilityRestoreLocator): Promise<CodexDurabilityRestoreRecord | undefined> { + const record = await this.codexDurabilityStore.readForRestoreLocator(locator) + return record + ? { + terminalId: record.terminalId, + durability: this.codexDurabilityRecordToRef(record), + } + : undefined + } + + async deleteCodexDurabilityStoreRecord(terminalId: string, reason: string): Promise<void> { + await this.codexDurabilityStore.delete(terminalId) + logger.info({ terminalId, reason }, 'Deleted Codex durability store record') + } + + private async writeCodexDurability(record: TerminalRecord, durability: CodexDurabilityRef, updatedAt = Date.now()): Promise<CodexDurabilityRef> { + const stored = await this.codexDurabilityStore.write({ + ...durability, + terminalId: record.terminalId, + ...(record.envContext?.tabId ? { tabId: record.envContext.tabId } : {}), + ...(record.envContext?.paneId ? { paneId: record.envContext.paneId } : {}), + serverInstanceId: this.serverInstanceId, + updatedAt, + }) + const storedDurability = this.codexDurabilityRecordToRef(stored) + record.codexDurability = storedDurability + return storedDurability + } + + private async replaceCodexDurabilityStoreRecord(record: TerminalRecord, durability: CodexDurabilityRef, updatedAt = Date.now()): Promise<CodexDurabilityRef> { + await this.codexDurabilityStore.delete(record.terminalId) + return this.writeCodexDurability(record, durability, updatedAt) + } + + private async persistCodexCandidate(terminalId: string, candidate: CodexRemoteProxyCandidate): Promise<void> { + const previous = this.codexCandidatePersistenceQueues.get(terminalId) ?? Promise.resolve() + const next = previous + .catch(() => undefined) + .then(() => this.persistCodexCandidateSerial(terminalId, candidate)) + this.codexCandidatePersistenceQueues.set(terminalId, next) + void next.finally(() => { + if (this.codexCandidatePersistenceQueues.get(terminalId) === next) { + this.codexCandidatePersistenceQueues.delete(terminalId) + } + }).catch(() => undefined) + return next + } + + private async persistCodexCandidateSerial(terminalId: string, candidate: CodexRemoteProxyCandidate): Promise<void> { + const record = this.terminals.get(terminalId) + if (!record || record.status !== 'running') return + if (record.mode !== 'codex') return + if (record.resumeSessionId) return + + const capturedAt = Date.now() + const durability = this.buildCodexDurabilityRef(candidate, capturedAt) + if (!durability?.candidate) { + logger.warn({ + terminalId, + threadId: candidate.thread.id, + rolloutPath: candidate.thread.path, + ephemeral: candidate.thread.ephemeral, + source: candidate.source, + }, 'Ignoring Codex restore identity candidate without deterministic rollout path') + return + } + + if (record.codexDurability?.candidate) { + const existing = record.codexDurability.candidate + if ( + existing.candidateThreadId === durability.candidate.candidateThreadId + && existing.rolloutPath === durability.candidate.rolloutPath + ) { + record.codexSidecar?.markCandidatePersisted?.() + return + } + logger.warn({ + terminalId, + existingThreadId: existing.candidateThreadId, + candidateThreadId: durability.candidate.candidateThreadId, + }, 'Ignoring mismatched Codex restore identity candidate after one was already persisted') + return + } + + const stored = await this.codexDurabilityStore.write({ + ...durability, + terminalId: record.terminalId, + ...(record.envContext?.tabId ? { tabId: record.envContext.tabId } : {}), + ...(record.envContext?.paneId ? { paneId: record.envContext.paneId } : {}), + serverInstanceId: this.serverInstanceId, + updatedAt: capturedAt, + }) + const latest = this.terminals.get(terminalId) + if ( + latest !== record + || record.status !== 'running' + || record.resumeSessionId + || record.codexDurability?.state === 'non_restorable' + ) { + if (record.status === 'running' && record.resumeSessionId && record.codexDurability?.state === 'durable') { + await this.replaceCodexDurabilityStoreRecord(record, record.codexDurability) + } else { + await this.codexDurabilityStore.delete(terminalId) + } + logger.warn({ + terminalId, + threadId: durability.candidate.candidateThreadId, + rolloutPath: durability.candidate.rolloutPath, + }, 'Discarded late Codex restore identity candidate after terminal stopped accepting candidates') + return + } + if (record.codexDurability?.candidate) { + const existing = record.codexDurability.candidate + if ( + existing.candidateThreadId === durability.candidate.candidateThreadId + && existing.rolloutPath === durability.candidate.rolloutPath + ) { + record.codexSidecar?.markCandidatePersisted?.() + } else if (record.codexDurability) { + await this.replaceCodexDurabilityStoreRecord(record, record.codexDurability) + } + return + } + const storedDurability = this.codexDurabilityRecordToRef(stored) + record.codexDurability = storedDurability + record.codexInputGate = undefined + record.codexSidecar?.markCandidatePersisted?.() + this.armCodexRolloutWatch(record) + logger.info({ + terminalId, + candidateThreadId: storedDurability.candidate?.candidateThreadId, + rolloutPath: storedDurability.candidate?.rolloutPath, + source: storedDurability.candidate?.source, + }, 'Persisted Codex restore identity before user input') + if (storedDurability.candidate) { + recordSessionLifecycleEvent({ + kind: 'codex_candidate_captured', + provider: 'codex', + terminalId, + candidateThreadId: storedDurability.candidate.candidateThreadId, + rolloutPath: storedDurability.candidate.rolloutPath, + source: storedDurability.candidate.source, + generation: record.codexSidecarGeneration ?? 0, + }) + } + this.broadcastCodexDurability(record, storedDurability) + } + + private async failCodexFreshIdentity(terminalId: string, reason: string): Promise<void> { + const record = this.terminals.get(terminalId) + if (!record || record.mode !== 'codex' || record.status !== 'running') return + if (record.codexDurability?.candidate || record.resumeSessionId) return + + const durability: CodexDurabilityRef = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'non_restorable', + nonRestorableReason: reason, + } + try { + const stored = await this.writeCodexDurability(record, durability) + record.codexInputGate = undefined + this.broadcastCodexDurability(record, stored) + } catch (err) { + logger.error({ err, terminalId, reason }, 'Failed to persist non-restorable Codex identity state') + } + logger.warn({ terminalId, reason }, 'Closing Codex terminal before user input because restore identity was not captured') + await this.killAndWait(terminalId) + } + + private async handleCodexTurnStarted(terminalId: string, event: CodexTurnEvent): Promise<void> { + const record = this.terminals.get(terminalId) + if (!record || record.status !== 'running') return + if (!this.codexCandidateMatches(record, event.threadId)) return + if (!record.codexDurability?.candidate || record.codexDurability.state === 'durable') return + + const durability: CodexDurabilityRef = { + ...record.codexDurability, + state: 'turn_in_progress_unproven', + } + const stored = await this.writeCodexDurability(record, durability) + logger.info({ + terminalId, + candidateThreadId: stored.candidate?.candidateThreadId, + turnId: event.turnId, + }, 'Codex turn started before restore proof') + this.broadcastCodexDurability(record, stored) + } + + private async handleCodexTurnCompleted(terminalId: string, event: CodexTurnEvent): Promise<void> { + const record = this.terminals.get(terminalId) + if (!record || record.status !== 'running') return + if (!this.codexCandidateMatches(record, event.threadId)) return + if (!record.codexDurability?.candidate || record.codexDurability.state === 'durable') return + + const completedAt = Date.now() + const durability: CodexDurabilityRef = { + ...record.codexDurability, + state: 'proof_checking', + turnCompletedAt: completedAt, + } + const stored = await this.writeCodexDurability(record, durability, completedAt) + logger.info({ + terminalId, + candidateThreadId: stored.candidate?.candidateThreadId, + rolloutPath: stored.candidate?.rolloutPath, + turnId: event.turnId, + }, 'Codex turn completed; checking rollout proof') + this.broadcastCodexDurability(record, stored) + this.requestCodexDurabilityProof(terminalId, 'turn_completed') + } + + private requestCodexDurabilityProof(terminalId: string, trigger: string): void { + const record = this.terminals.get(terminalId) + if ( + !record + || !record.codexDurability?.candidate + || record.codexDurability.state === 'durable' + || record.codexDurability.state === 'non_restorable' + ) return + if (record.codexDurability.turnCompletedAt === undefined) { + logger.debug({ terminalId, trigger }, 'Skipping Codex rollout proof before turn completion') + return + } + const proofState = record.codexDurabilityProof ?? {} + record.codexDurabilityProof = proofState + if (proofState.inFlight) { + proofState.rerunRequested = true + return + } + + const run = async (): Promise<void> => { + do { + proofState.rerunRequested = false + await this.runCodexDurabilityProof(terminalId, trigger) + } while (proofState.rerunRequested) + } + proofState.inFlight = run() + .catch((err) => { + logger.error({ err, terminalId, trigger }, 'Codex rollout proof execution failed') + }) + .finally(() => { + const current = this.terminals.get(terminalId) + if (current?.codexDurabilityProof === proofState) { + proofState.inFlight = undefined + proofState.rerunRequested = false + } + }) + } + + private async runCodexDurabilityProof(terminalId: string, trigger: string): Promise<void> { + const record = this.terminals.get(terminalId) + if ( + !record + || !record.codexDurability?.candidate + || record.codexDurability.state === 'durable' + || record.codexDurability.state === 'non_restorable' + ) return + const candidate = record.codexDurability.candidate + const preProofDurability = record.codexDurability + + const checking: CodexDurabilityRef = { + ...record.codexDurability, + state: 'proof_checking', + } + const checkingStored = await this.writeCodexDurability(record, checking) + this.broadcastCodexDurability(record, checkingStored) + + const proof = await proofCodexRollout({ + rolloutPath: candidate.rolloutPath, + candidateThreadId: candidate.candidateThreadId, + }) + const checkedAt = Date.now() + if (proof.ok) { + const bound = this.bindSession(terminalId, 'codex', proof.rolloutProofId, 'association') + if (!bound.ok) { + const failed: CodexDurabilityRef = { + ...checkingStored, + state: 'non_restorable', + lastProofFailure: undefined, + nonRestorableReason: `session_binding_failed:${bound.reason}`, + } + const stored = await this.writeCodexDurability(record, failed, checkedAt) + record.codexDurabilityProof = undefined + this.unwatchCodexRollout(record, 'session_binding_failed') + logger.warn({ terminalId, proof, reason: bound.reason }, 'Codex rollout proof succeeded but session binding failed') + this.broadcastCodexDurability(record, stored) + await this.killAndWait(terminalId).catch((err) => { + logger.warn({ err, terminalId }, 'Failed to close Codex terminal after session binding failure') + }) + return + } + const durable: CodexDurabilityRef = { + ...checkingStored, + state: 'durable', + durableThreadId: proof.rolloutProofId, + lastProofFailure: undefined, + } + const stored = await this.writeCodexDurability(record, durable, checkedAt) + record.codexDurabilityProof = undefined + this.unwatchCodexRollout(record, 'durable') + logger.info({ + terminalId, + candidateThreadId: candidate.candidateThreadId, + durableThreadId: proof.rolloutProofId, + rolloutPath: candidate.rolloutPath, + trigger, + }, 'Codex rollout proof succeeded') + this.broadcastCodexDurability(record, stored) + this.broadcastCodexSessionAssociated(record, proof.rolloutProofId) + recordSessionLifecycleEvent({ + kind: 'codex_durable_session_observed', + provider: 'codex', + terminalId, + sessionId: proof.rolloutProofId, + generation: record.codexSidecarGeneration ?? 0, + source: 'sidecar', + }) + return + } + + const failed: CodexDurabilityRef = { + ...checkingStored, + state: checkingStored.turnCompletedAt !== undefined + ? 'durability_unproven_after_completion' + : preProofDurability.state, + lastProofFailure: { + reason: proof.reason, + message: proof.message, + checkedAt, + }, + } + const stored = await this.writeCodexDurability(record, failed, checkedAt) + logger.warn({ + terminalId, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + trigger, + reason: proof.reason, + message: proof.message, + }, 'Codex rollout proof failed') + this.broadcastCodexDurability(record, stored) + } + + async promoteCodexDurabilityFromCreateProof( + terminalId: string, + durableThreadId: string, + checkedAt = Date.now(), + ): Promise<BindSessionResult> { + const record = this.terminals.get(terminalId) + if (!record) return { ok: false, reason: 'terminal_missing' } + if (record.mode !== 'codex') return { ok: false, reason: 'mode_mismatch' } + if (record.status !== 'running') return { ok: false, reason: 'terminal_not_running' } + + const bound = this.bindSession(terminalId, 'codex', durableThreadId, 'association') + if (!bound.ok) return bound + const sessionId = bound.sessionId + record.resumeSessionId = sessionId + + const durability: CodexDurabilityRef = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + ...(record.codexDurability?.candidate ? { candidate: record.codexDurability.candidate } : {}), + ...(record.codexDurability?.turnCompletedAt !== undefined ? { turnCompletedAt: record.codexDurability.turnCompletedAt } : {}), + durableThreadId: sessionId, + } + const stored = await this.writeCodexDurability(record, durability, checkedAt) + record.codexDurabilityProof = undefined + this.unwatchCodexRollout(record, 'durable') + logger.info({ + terminalId, + durableThreadId: sessionId, + }, 'Codex rollout proof promoted captured restore state during terminal.create') + this.broadcastCodexDurability(record, stored) + recordSessionLifecycleEvent({ + kind: 'codex_durable_session_observed', + provider: 'codex', + terminalId, + sessionId, + generation: record.codexSidecarGeneration ?? 0, + source: 'sidecar', + }) + return { ok: true, terminalId, sessionId } + } + + private needsCodexFinalDurabilityProof(record: TerminalRecord): boolean { + return record.mode === 'codex' + && !record.resumeSessionId + && !!record.codexDurability?.candidate + && record.codexDurability.state !== 'durable' + && record.codexDurability.state !== 'non_restorable' + } + + private async proveCodexBeforeFinalLoss(record: TerminalRecord, trigger: string): Promise<void> { + if (!this.needsCodexFinalDurabilityProof(record)) return + try { + await this.runCodexDurabilityProof(record.terminalId, trigger) + } catch (err) { + logger.warn({ err, terminalId: record.terminalId, trigger }, 'Final Codex rollout proof read failed') + } + } + + private closeCodexTerminalAfterBlockedLifecycleLoss(record: TerminalRecord, event: unknown): void { + if (!record.codexRecoveryBlockedError) return + if (this.terminals.get(record.terminalId) !== record || record.status !== 'running') return + logger.error( + { err: record.codexRecoveryBlockedError, terminalId: record.terminalId, event }, + 'Closing Codex terminal because durable recovery is blocked after lifecycle loss', + ) + this.kill(record.terminalId) + } + + private broadcastCodexDurability(record: TerminalRecord, durability: CodexDurabilityRef): void { + for (const client of record.clients) { + this.safeSend(client, { + type: 'terminal.codex.durability.updated', + terminalId: record.terminalId, + durability, + }, { terminalId: record.terminalId, perf: record.perf }) + } + this.emit('terminal.codex.durability.updated', { + terminalId: record.terminalId, + durability, + }) + } + + private broadcastCodexSessionAssociated(record: TerminalRecord, sessionId: string): void { + for (const client of record.clients) { + this.safeSend(client, { + type: 'terminal.session.associated', + terminalId: record.terminalId, + sessionRef: { + provider: 'codex', + sessionId, + }, + }, { terminalId: record.terminalId, perf: record.perf }) + } } publishCodexSidecar(terminalId: string): void { @@ -1495,51 +2221,124 @@ export class TerminalRegistry extends EventEmitter { } if (!record.resumeSessionId || !record.codexRecovery) { - logger.warn( - { terminalId, event }, - 'Codex app-server reported terminal lifecycle loss without durable recovery; closing terminal', - ) - void this.killAndWait(terminalId).catch((err) => { - logger.error({ err, terminalId }, 'Failed to close terminal after Codex app-server lifecycle loss') - }) + void (async () => { + await this.proveCodexBeforeFinalLoss(record, 'lifecycle_loss') + if (record.status !== 'running') return + if (record.resumeSessionId && record.codexRecovery) { + if (!this.startCodexDurableRecovery(record, { source: 'lifecycle_loss', event })) { + this.closeCodexTerminalAfterBlockedLifecycleLoss(record, event) + } + return + } + logger.warn( + { terminalId, event }, + 'Codex app-server reported terminal lifecycle loss without durable recovery; closing terminal', + ) + await this.killAndWait(terminalId).catch((err) => { + logger.error({ err, terminalId }, 'Failed to close terminal after Codex app-server lifecycle loss') + }) + })() return } + if (!this.startCodexDurableRecovery(record, { source: 'lifecycle_loss', event })) { + this.closeCodexTerminalAfterBlockedLifecycleLoss(record, event) + } + } + + private startCodexDurableRecovery( + record: TerminalRecord, + trigger: { source: 'lifecycle_loss'; event: unknown } | { source: 'pty_exit'; exitCode: number; signal?: number }, + ): boolean { + if ( + record.mode !== 'codex' + || record.status !== 'running' + || record.codexRecoveryFinalClose + || !record.resumeSessionId + || !record.codexRecovery + ) { + return false + } + if (record.codexRecoveryBlockedError) { logger.error( - { err: record.codexRecoveryBlockedError, terminalId, event }, + { err: record.codexRecoveryBlockedError, terminalId: record.terminalId, trigger }, 'Codex durable recovery is blocked by a previous sidecar teardown failure', ) - return + return false } - if (record.codexRecoveryAttempt) return + if (record.codexRecoveryAttempt) return true logger.warn( - { terminalId, event, resumeSessionId: record.resumeSessionId }, - 'Codex app-server reported terminal lifecycle loss; starting durable recovery', + { terminalId: record.terminalId, trigger, resumeSessionId: record.resumeSessionId }, + 'Codex durable terminal lost its live worker; starting durable recovery', ) - const attempt = this.runCodexRecoveryLoop(terminalId) + const attempt = this.runCodexRecoveryLoop(record.terminalId) .catch((err) => { - logger.error({ err, terminalId }, 'Codex durable recovery loop failed') + logger.error({ err, terminalId: record.terminalId }, 'Codex durable recovery loop failed') + if (record.codexRecoveryBlockedError && this.terminals.get(record.terminalId) === record && record.status === 'running') { + if (trigger.source === 'pty_exit') { + this.finishTerminalPtyExit(record, { + exitCode: trigger.exitCode, + signal: trigger.signal, + }) + } else { + this.closeCodexTerminalAfterBlockedLifecycleLoss(record, trigger.event) + } + } }) .finally(() => { - const latest = this.terminals.get(terminalId) + const latest = this.terminals.get(record.terminalId) if (latest?.codexRecoveryAttempt === attempt) { latest.codexRecoveryAttempt = undefined } }) record.codexRecoveryAttempt = attempt + return true } private canContinueCodexRecovery(record: TerminalRecord | undefined, resumeSessionId?: string): record is TerminalRecord { - return !!record - && record.status === 'running' - && !record.codexRecoveryFinalClose - && !record.codexRecoveryBlockedError - && !!record.codexRecovery - && !!record.resumeSessionId - && (!resumeSessionId || record.resumeSessionId === resumeSessionId) + const expectedResumeSessionId = resumeSessionId ?? record?.resumeSessionId + if ( + !record + || record.status !== 'running' + || record.codexRecoveryFinalClose + || record.codexRecoveryBlockedError + || !record.codexRecovery + || !expectedResumeSessionId + ) { + return false + } + + return this.ensureCodexRecoverySessionBinding(record, expectedResumeSessionId) + } + + private ensureCodexRecoverySessionBinding(record: TerminalRecord, resumeSessionId: string): boolean { + if ( + record.status !== 'running' + || record.codexRecoveryFinalClose + || record.codexRecoveryBlockedError + || !record.codexRecovery + ) { + return false + } + + const provider = record.mode as CodingCliProviderName + const expectedKey = makeSessionKey(provider, resumeSessionId) + const owner = this.bindingAuthority.ownerForSession(provider, resumeSessionId) + if (owner && owner !== record.terminalId) return false + + const currentBinding = this.bindingAuthority.sessionForTerminal(record.terminalId) + if (currentBinding && currentBinding !== expectedKey) return false + + if (!currentBinding) { + const bound = this.bindSession(record.terminalId, provider, resumeSessionId, 'resume') + if (!bound.ok) return false + } + + record.resumeSessionId = resumeSessionId + return true } private async runCodexRecoveryLoop(terminalId: string): Promise<void> { @@ -1659,10 +2458,6 @@ export class TerminalRegistry extends EventEmitter { candidate = this.spawnCodexRecoveryPty(record, plan, resumeSessionId) await plan.sidecar.adopt({ terminalId: record.terminalId, generation }) - await plan.sidecar.waitForLoadedThread(resumeSessionId, { - ...(recovery.readinessTimeoutMs !== undefined ? { timeoutMs: recovery.readinessTimeoutMs } : {}), - ...(recovery.readinessPollMs !== undefined ? { pollMs: recovery.readinessPollMs } : {}), - }) if (candidate.exited) { throw new Error(`Codex recovery candidate PTY exited before publication with code ${candidate.exitCode ?? 'unknown'}.`) } @@ -1716,7 +2511,25 @@ export class TerminalRegistry extends EventEmitter { published = true try { + let oldPtyExited = false + let forceRetireTimer: NodeJS.Timeout | undefined + oldPty.onExit(() => { + oldPtyExited = true + if (forceRetireTimer) { + clearTimeout(forceRetireTimer) + forceRetireTimer = undefined + } + }) oldPty.kill('SIGTERM') + forceRetireTimer = setTimeout(() => { + if (oldPtyExited) return + try { + oldPty.kill('SIGKILL') + } catch { + // The old PTY may already be gone; the delayed kill is only a safety net. + } + }, 500) + forceRetireTimer.unref?.() } catch (err) { logger.warn({ err, terminalId: record.terminalId }, 'Failed to retire previous Codex recovery PTY') } @@ -1809,24 +2622,25 @@ export class TerminalRegistry extends EventEmitter { return } if (record.pty !== ptyProc || record.status === 'exited') return - this.markCodexRecoveryFinalClose(record) - record.status = 'exited' - record.exitCode = event.exitCode - const now = Date.now() - record.lastActivityAt = now - record.exitedAt = now - cleanupMcpConfig(record.terminalId, record.mode, record.mcpCwd) - for (const client of record.clients) { - this.flushOutputBuffer(client) - this.safeSend(client, { type: 'terminal.exit', terminalId: record.terminalId, exitCode: event.exitCode }, { terminalId: record.terminalId, perf: record.perf }) + const finishExit = () => { + if (this.startCodexDurableRecovery(record, { + source: 'pty_exit', + exitCode: event.exitCode, + signal: event.signal, + })) { + return + } + this.finishTerminalPtyExit(record, event) + } + if (this.needsCodexFinalDurabilityProof(record)) { + void (async () => { + await this.proveCodexBeforeFinalLoss(record, 'pty_exit') + if (record.pty !== ptyProc || record.status === 'exited') return + finishExit() + })() + return } - record.clients.clear() - record.suppressedOutputClients.clear() - record.pendingSnapshotClients.clear() - this.releaseBinding(record.terminalId, 'exit') - this.emit('terminal.exit', { terminalId: record.terminalId, exitCode: event.exitCode }) - void this.releaseCodexSidecar(record).catch(() => undefined) - this.reapExitedTerminals() + finishExit() }) } @@ -1861,9 +2675,33 @@ export class TerminalRegistry extends EventEmitter { return true } - input(terminalId: string, data: string): boolean { + input(terminalId: string, data: string): TerminalInputResult { const term = this.terminals.get(terminalId) - if (!term || term.status !== 'running') return false + if (!term) return { status: 'no_terminal' } + if ( + term.mode === 'codex' + && term.codexDurability?.state === 'non_restorable' + ) { + if (term.codexDurability.nonRestorableReason === 'candidate_capture_timeout') { + return { status: 'blocked_codex_identity_capture_timeout', terminalId } + } + return { + status: 'blocked_codex_identity_unavailable', + terminalId, + reason: term.codexDurability.nonRestorableReason, + } + } + if (term.status !== 'running') return { status: 'not_running' } + if (term.codexInputGate?.state === 'identity_pending') { + if (isCodexStartupTerminalControlInput(data)) { + term.pty.write(data) + return { status: 'written' } + } + return { status: 'blocked_codex_identity_pending', terminalId } + } + if (term.codexRecoveryAttempt) { + return { status: 'blocked_codex_recovery_pending', terminalId } + } const now = Date.now() term.lastActivityAt = now if (term.perf) { @@ -1882,6 +2720,32 @@ export class TerminalRegistry extends EventEmitter { data, at: now, } satisfies TerminalInputRawEvent) + return { status: 'written' } + } + + acknowledgeCodexCandidatePersisted(input: { + terminalId: string + candidateThreadId: string + rolloutPath: string + }): 'accepted' | 'missing_terminal' | 'mismatch' | 'no_candidate' { + const term = this.terminals.get(input.terminalId) + if (!term) return 'missing_terminal' + const candidate = term.codexDurability?.candidate + if (!candidate) return 'no_candidate' + if ( + candidate.candidateThreadId !== input.candidateThreadId + || candidate.rolloutPath !== input.rolloutPath + ) { + return 'mismatch' + } + return 'accepted' + } + + releaseCodexInputGateForTest(terminalId: string): boolean { + const term = this.terminals.get(terminalId) + if (!term) return false + term.codexInputGate = undefined + term.codexSidecar?.markCandidatePersisted?.() return true } @@ -1928,6 +2792,7 @@ export class TerminalRegistry extends EventEmitter { this.releaseBinding(terminalId, 'exit') this.emit('terminal.exit', { terminalId, exitCode: term.exitCode }) this.recordTerminalExitWithoutDurableSession(term, term.exitCode, 'user_final_close') + this.forgetCodexDurabilityStoreRecord(term, 'user_final_close') void this.releaseCodexSidecar(term).catch(() => undefined) this.reapExitedTerminals() return true @@ -1955,6 +2820,7 @@ export class TerminalRegistry extends EventEmitter { if (!term) return false this.kill(terminalId) this.terminals.delete(terminalId) + this.forgetCodexDurabilityStoreRecord(term, 'remove') return true } @@ -1962,6 +2828,7 @@ export class TerminalRegistry extends EventEmitter { const existing = this.sidecarShutdowns.get(this.sidecarShutdownKey(term.terminalId)) if (existing?.status === 'pending') return existing.promise + this.unwatchCodexRollout(term, 'sidecar_release') term.codexSidecarLifecycleUnsubscribe?.() term.codexSidecarLifecycleUnsubscribe = undefined const sidecar = term.codexSidecar @@ -2074,11 +2941,13 @@ export class TerminalRegistry extends EventEmitter { description?: string mode: TerminalMode resumeSessionId?: string + sessionRef?: { provider: CodingCliProviderName; sessionId: string } createdAt: number lastActivityAt: number status: 'running' | 'exited' hasClients: boolean cwd?: string + codexDurability?: CodexDurabilityRef }> { return Array.from(this.terminals.values()).map((t) => ({ terminalId: t.terminalId, @@ -2086,11 +2955,20 @@ export class TerminalRegistry extends EventEmitter { description: t.description, mode: t.mode, resumeSessionId: t.resumeSessionId, + sessionRef: modeSupportsResume(t.mode) + && t.resumeSessionId + && (t.mode !== 'codex' || ( + t.codexDurability?.state === 'durable' + && t.codexDurability.durableThreadId === t.resumeSessionId + )) + ? { provider: t.mode as CodingCliProviderName, sessionId: t.resumeSessionId } + : undefined, createdAt: t.createdAt, lastActivityAt: t.lastActivityAt, status: t.status, hasClients: t.clients.size > 0, cwd: t.cwd, + codexDurability: t.codexDurability, })) } @@ -2372,6 +3250,21 @@ export class TerminalRegistry extends EventEmitter { return matches[0] } + findRunningCodexTerminalByCandidate(candidateThreadId: string, rolloutPath: string): TerminalRecord | undefined { + for (const term of this.terminals.values()) { + const candidate = term.codexDurability?.candidate + if ( + term.mode === 'codex' + && term.status === 'running' + && candidate?.candidateThreadId === candidateThreadId + && candidate.rolloutPath === rolloutPath + ) { + return term + } + } + return undefined + } + repairLegacySessionOwners(mode: TerminalMode, sessionId: string, cwd?: string): RepairLegacySessionOwnersResult { if (!modeSupportsResume(mode)) { return { repaired: false, clearedTerminalIds: [] } diff --git a/server/terminal-view/service.ts b/server/terminal-view/service.ts index 969a26a15..2073a320f 100644 --- a/server/terminal-view/service.ts +++ b/server/terminal-view/service.ts @@ -4,6 +4,7 @@ import { type TerminalDirectoryQuery, } from '../../shared/read-models.js' import type { SessionLocator } from '../../shared/ws-protocol.js' +import type { CodexDurabilityRef } from '../../shared/codex-durability.js' import { TerminalViewMirror } from './mirror.js' import type { TerminalDirectoryItem, @@ -25,6 +26,8 @@ type TerminalListRecord = { description?: string mode: TerminalMode resumeSessionId?: string + sessionRef?: SessionLocator + codexDurability?: CodexDurabilityRef createdAt: number lastActivityAt: number status: 'running' | 'exited' @@ -116,12 +119,15 @@ function buildSessionRef(mode: TerminalMode, resumeSessionId?: string): SessionL } function buildDirectoryItem(terminal: TerminalListRecord): TerminalDirectoryItem { + const sessionRef = terminal.sessionRef + ?? (terminal.mode === 'codex' ? undefined : buildSessionRef(terminal.mode, terminal.resumeSessionId)) return { terminalId: terminal.terminalId, title: terminal.title, description: terminal.description, mode: terminal.mode, - sessionRef: buildSessionRef(terminal.mode, terminal.resumeSessionId), + sessionRef, + codexDurability: terminal.codexDurability, createdAt: terminal.createdAt, lastActivityAt: terminal.lastActivityAt, status: terminal.status, diff --git a/server/terminal-view/types.ts b/server/terminal-view/types.ts index d5abf8666..71462ed65 100644 --- a/server/terminal-view/types.ts +++ b/server/terminal-view/types.ts @@ -1,6 +1,7 @@ import type { TerminalMode } from '../terminal-registry.js' import type { TerminalDirectoryQuery } from '../../shared/read-models.js' import type { SessionLocator } from '../../shared/ws-protocol.js' +import type { CodexDurabilityRef } from '../../shared/codex-durability.js' export type TerminalDirectoryItem = { terminalId: string @@ -13,6 +14,7 @@ export type TerminalDirectoryItem = { status: 'running' | 'exited' hasClients: boolean cwd?: string + codexDurability?: CodexDurabilityRef } export type TerminalDirectoryPage = { diff --git a/server/ws-handler.ts b/server/ws-handler.ts index e7cee3ca8..7c68591b7 100644 --- a/server/ws-handler.ts +++ b/server/ws-handler.ts @@ -34,6 +34,10 @@ import type { TabsRegistryStore } from './tabs-registry/store.js' import type { ServerSettings } from '../shared/settings.js' import { stripAnsi } from './ai-prompts.js' import type { CodexLaunchPlan, CodexLaunchPlanner } from './coding-cli/codex-app-server/launch-planner.js' +import { + CODEX_INITIAL_LAUNCH_ATTEMPTS, + planCodexLaunchWithRetry, +} from './coding-cli/codex-app-server/launch-retry.js' import { CodexLaunchConfigError, getCodexSessionBindingReason, @@ -43,6 +47,7 @@ import { ErrorCode, ShellSchema, CodingCliProviderSchema, + SessionLocatorSchema, TerminalMetaUpdatedSchema, CodexActivityListResponseSchema, CodexActivityListSchema, @@ -53,6 +58,7 @@ import { HelloSchema, PingSchema, ClientDiagnosticSchema, + TerminalCodexCandidatePersistedSchema, TerminalAttachSchema, TerminalDetachSchema, TerminalInputSchema, @@ -72,8 +78,14 @@ import { UiScreenshotResultSchema, WS_PROTOCOL_VERSION, } from '../shared/ws-protocol.js' +import { LiveTerminalHandleSchema, type RestoreError } from '../shared/session-contract.js' +import { CODEX_DURABILITY_SCHEMA_VERSION, CodexDurabilityRefSchema } from '../shared/codex-durability.js' import { UiLayoutSyncSchema } from './agent-api/layout-schema.js' import type { LayoutStore } from './agent-api/layout-store.js' +import { + planCodexCreateRestoreDecision, + resolveCodexCreateRestoreDecision, +} from './coding-cli/codex-app-server/restore-decision.js' type WsHandlerConfig = { maxConnections: number @@ -222,6 +234,29 @@ function normalizeUiSessionLocator(value: unknown): SidebarSessionLocator | unde } } +function normalizeTerminalInventoryForClient(value: unknown): unknown { + if (!value || typeof value !== 'object') return value + const terminal = value as Record<string, unknown> + const { resumeSessionId: legacyResumeSessionId, ...rest } = terminal + const explicitSessionRef = normalizeUiSessionLocator(terminal.sessionRef) + const provider = typeof terminal.mode === 'string' && modeSupportsResume(terminal.mode as TerminalMode) + ? terminal.mode + : undefined + const codexDurability = terminal.codexDurability as { state?: unknown; durableThreadId?: unknown } | undefined + const canMigrateLegacySessionRef = provider !== 'codex' || ( + codexDurability?.state === 'durable' + && codexDurability.durableThreadId === legacyResumeSessionId + ) + const migratedSessionRef = provider && isNonEmptyString(legacyResumeSessionId) && canMigrateLegacySessionRef + ? { provider, sessionId: legacyResumeSessionId } + : undefined + const sessionRef = explicitSessionRef ?? migratedSessionRef + return { + ...rest, + ...(sessionRef ? { sessionRef } : {}), + } +} + function extractSessionLocatorsFromUiContent(content: Record<string, unknown>): SidebarSessionLocator[] { const locators: SidebarSessionLocator[] = [] @@ -403,6 +438,15 @@ export class WsHandler { if (!payload?.terminalId) return this.forgetCreatedRequestIdsForTerminal(payload.terminalId) } + private onCodexDurabilityUpdatedBound = (payload: { terminalId?: string; durability?: unknown }) => { + if (!payload?.terminalId || payload.durability === undefined) return + this.broadcast({ + type: 'terminal.codex.durability.updated', + terminalId: payload.terminalId, + durability: payload.durability, + }) + this.broadcastTerminalsChanged() + } private sessionRepairListeners?: { scanned: (result: SessionScanResult) => void repaired: (result: SessionRepairResult) => void @@ -439,6 +483,7 @@ export class WsHandler { ? options.serverInstanceId : `srv-${randomUUID()}` this.bootId = `boot-${randomUUID()}` + this.registry.setServerInstanceId?.(this.serverInstanceId) this.terminalStreamBroker = new TerminalStreamBroker(this.registry) // Build the set of valid CLI provider/mode names from extensions @@ -467,10 +512,13 @@ export class WsHandler { shell: ShellSchema.default('system'), cwd: z.string().optional(), resumeSessionId: z.string().optional(), + sessionRef: SessionLocatorSchema.optional(), + codexDurability: CodexDurabilityRefSchema.optional(), + liveTerminal: LiveTerminalHandleSchema.optional(), restore: z.boolean().optional(), tabId: z.string().min(1).optional(), paneId: z.string().min(1).optional(), - }) + }).strict() const dynamicProviderSchema = CodingCliProviderSchema.superRefine((val, ctx) => { if (!canEnumerateCliExtensions || extensionModes.includes(val)) return @@ -491,13 +539,14 @@ export class WsHandler { maxTurns: z.number().int().positive().optional(), permissionMode: z.enum(['default', 'plan', 'acceptEdits', 'bypassPermissions']).optional(), sandbox: z.enum(['read-only', 'workspace-write', 'danger-full-access']).optional(), - }) + }).strict() this.clientMessageSchema = z.discriminatedUnion('type', [ HelloSchema, PingSchema, ClientDiagnosticSchema, dynamicTerminalCreateSchema, + TerminalCodexCandidatePersistedSchema, TerminalAttachSchema, TerminalDetachSchema, TerminalInputSchema, @@ -526,6 +575,7 @@ export class WsHandler { on?: (event: string, listener: (...args: any[]) => void) => void } registryWithEvents.on?.('terminal.exit', this.onTerminalExitBound) + registryWithEvents.on?.('terminal.codex.durability.updated', this.onCodexDurabilityUpdatedBound) this.wss = new WebSocketServer({ server, path: '/ws', @@ -679,16 +729,23 @@ export class WsHandler { cwd: string | undefined, resumeSessionId: string | undefined, providerSettings: { model?: string; sandbox?: string; permissionMode?: string } | undefined, + attempts = 1, ) { if (!this.codexLaunchPlanner) { throw new Error('Codex terminal launch requires the app-server launch planner.') } - return this.codexLaunchPlanner.planCreate({ + const input = { cwd, resumeSessionId, model: providerSettings?.model, sandbox: normalizeCodexSandboxSetting(providerSettings?.sandbox), approvalPolicy: providerSettings?.permissionMode, + } + return planCodexLaunchWithRetry({ + planner: this.codexLaunchPlanner, + input, + attempts, + logger: log, }) } @@ -1231,6 +1288,13 @@ export class WsHandler { ws: LiveWebSocket, params: { code: z.infer<typeof ErrorCode>; message: string; requestId?: string; terminalId?: string } ) { + log.warn({ + connectionId: ws.connectionId || 'unknown', + code: params.code, + message: params.message, + ...(params.requestId ? { requestId: params.requestId } : {}), + ...(params.terminalId ? { terminalId: params.terminalId } : {}), + }, 'Sending WebSocket error') this.send(ws, { type: 'error', code: params.code, @@ -1627,7 +1691,7 @@ export class WsHandler { } // Send terminal inventory so the client knows what's alive - const terminals = this.registry.list() + const terminals = this.registry.list().map(normalizeTerminalInventoryForClient) const terminalMeta = this.terminalMetaListProvider?.() ?? [] this.safeSend(ws, { type: 'terminal.inventory', @@ -1821,8 +1885,8 @@ export class WsHandler { ...(m.cwd ? { cwd: m.cwd } : {}), mode: m.mode as TerminalMode, restoreRequested: m.restore === true, - hasRequestedSessionRef: false, - ...(m.resumeSessionId ? { requestedSessionId: m.resumeSessionId } : {}), + hasRequestedSessionRef: !!m.sessionRef, + ...(m.resumeSessionId || m.sessionRef?.sessionId ? { requestedSessionId: m.resumeSessionId ?? m.sessionRef.sessionId } : {}), }) const endCreateTimer = startPerfTimer( 'terminal_create', @@ -1834,7 +1898,101 @@ export class WsHandler { let reused = false let error = false let rateLimited = false - let effectiveResumeSessionId = m.resumeSessionId + const requestedSessionRef = normalizeUiSessionLocator(m.sessionRef) + let codexDurabilityForDecision = m.codexDurability + let codexDurabilityStoreRecordTerminalId: string | undefined + if (m.mode === 'codex' && m.restore === true && !requestedSessionRef && !codexDurabilityForDecision) { + try { + const restoreRecord = await this.registry.readCodexDurabilityRecordForRestoreLocator({ + ...(m.liveTerminal?.terminalId ? { terminalId: m.liveTerminal.terminalId } : {}), + ...(m.tabId ? { tabId: m.tabId } : {}), + ...(m.paneId ? { paneId: m.paneId } : {}), + ...(m.liveTerminal?.serverInstanceId ? { serverInstanceId: m.liveTerminal.serverInstanceId } : {}), + }) + codexDurabilityForDecision = restoreRecord?.durability + codexDurabilityStoreRecordTerminalId = restoreRecord?.terminalId + } catch (err) { + error = true + log.warn({ + err, + requestId: m.requestId, + connectionId: ws.connectionId, + tabId: m.tabId, + paneId: m.paneId, + terminalId: m.liveTerminal?.terminalId, + }, 'Failed to resolve Codex durability record for restore locator') + this.sendError(ws, { + code: 'RESTORE_UNAVAILABLE', + message: 'Codex restore identity is ambiguous or unavailable.', + requestId: m.requestId, + }) + endCreateTimer({ error, rateLimited }) + return + } + } + const codexRestorePlan = m.mode === 'codex' + ? planCodexCreateRestoreDecision({ + restoreRequested: m.restore === true, + legacyResumeSessionId: m.resumeSessionId, + sessionRef: requestedSessionRef, + codexDurability: codexDurabilityForDecision, + }) + : undefined + let effectiveResumeSessionId: string | undefined + if (codexRestorePlan?.kind === 'durable_session_ref_resume') { + effectiveResumeSessionId = codexRestorePlan.sessionId + } else if (m.mode !== 'codex') { + effectiveResumeSessionId = m.resumeSessionId + } + if (m.mode !== 'codex' && !effectiveResumeSessionId && requestedSessionRef && requestedSessionRef.provider === m.mode) { + effectiveResumeSessionId = requestedSessionRef.sessionId + } + if (codexRestorePlan?.kind === 'reject_invalid_raw_codex_resume_request') { + error = true + this.sendError(ws, { + code: codexRestorePlan.code, + message: codexRestorePlan.message, + requestId: m.requestId, + }) + endCreateTimer({ error, rateLimited }) + return + } + const hasCodexCapturedRestoreState = codexRestorePlan?.kind === 'proof_existing_candidate_first' + if ( + m.restore === true + && modeSupportsResume(m.mode as TerminalMode) + && m.mode !== 'codex' + && m.resumeSessionId + && !requestedSessionRef + ) { + error = true + this.sendError(ws, { + code: 'INVALID_MESSAGE', + message: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + requestId: m.requestId, + }) + endCreateTimer({ error, rateLimited }) + return + } + if ( + m.restore === true + && modeSupportsResume(m.mode as TerminalMode) + && !hasCodexCapturedRestoreState + && ( + !requestedSessionRef + || requestedSessionRef.provider !== m.mode + || (m.mode === 'claude' && !isValidClaudeSessionId(requestedSessionRef.sessionId)) + ) + ) { + error = true + this.sendError(ws, { + code: 'RESTORE_UNAVAILABLE', + message: 'Restore requires a canonical session reference.', + requestId: m.requestId, + }) + endCreateTimer({ error, rateLimited }) + return + } try { await this.withTerminalCreateLock( this.terminalCreateLockKey(m.mode as TerminalMode, m.requestId, effectiveResumeSessionId), @@ -1856,6 +2014,8 @@ export class WsHandler { terminalId: string createdAt: number effectiveResumeSessionId?: string + clearCodexDurability?: boolean + restoreError?: RestoreError }): Promise<boolean> => { if (opts.ws.readyState !== WebSocket.OPEN) { return false @@ -1866,7 +2026,8 @@ export class WsHandler { requestId: opts.requestId, terminalId: opts.terminalId, createdAt: opts.createdAt, - ...(opts.effectiveResumeSessionId ? { effectiveResumeSessionId: opts.effectiveResumeSessionId } : {}), + ...(opts.clearCodexDurability ? { clearCodexDurability: true } : {}), + ...(opts.restoreError ? { restoreError: opts.restoreError } : {}), }) return true } @@ -1905,6 +2066,50 @@ export class WsHandler { this.broadcastTerminalsChanged() return true } + const requestedLiveTerminal = (): TerminalRecord | undefined => { + if (m.liveTerminal?.serverInstanceId !== this.serverInstanceId) return undefined + const live = this.registry.get(m.liveTerminal.terminalId) + return live && live.status === 'running' && live.mode === m.mode ? live : undefined + } + const requestedLiveCodexCandidate = (candidate: { + candidateThreadId: string + rolloutPath: string + }): TerminalRecord | undefined => { + const live = requestedLiveTerminal() + if (!live) return undefined + const liveCandidate = live.codexDurability?.candidate + if ( + liveCandidate?.candidateThreadId !== candidate.candidateThreadId + || liveCandidate?.rolloutPath !== candidate.rolloutPath + ) { + log.warn({ + requestId: m.requestId, + connectionId: ws.connectionId, + terminalId: live.terminalId, + requestedCandidateThreadId: candidate.candidateThreadId, + liveCandidateThreadId: liveCandidate?.candidateThreadId, + }, 'Ignoring stale Codex live terminal handle with mismatched restore candidate') + return undefined + } + return live + } + const broadcastCodexSessionAssociated = (associatedTerminalId: string, sessionId: string) => { + this.broadcast({ + type: 'terminal.session.associated', + terminalId: associatedTerminalId, + sessionRef: { + provider: 'codex', + sessionId, + }, + }) + } + const broadcastCodexDurabilityUpdated = (associatedTerminalId: string, durability: unknown) => { + this.broadcast({ + type: 'terminal.codex.durability.updated', + terminalId: associatedTerminalId, + durability, + }) + } const existingId = resolveExistingRequestTerminalId(m.requestId) if (existingId) { @@ -1923,6 +2128,138 @@ export class WsHandler { this.forgetCreatedRequestId(m.requestId) } + let clearCodexDurabilityOnCreate = false + let restoreErrorOnCreate: RestoreError | undefined + let codexDurabilityStoreRecordToDeleteOnSuccessfulUse: string | undefined + const deleteCodexDurabilityStoreRecord = async (recordTerminalId: string | undefined, reason: string) => { + if (!recordTerminalId) return + await this.registry.deleteCodexDurabilityStoreRecord(recordTerminalId, reason) + if (codexDurabilityStoreRecordToDeleteOnSuccessfulUse === recordTerminalId) { + codexDurabilityStoreRecordToDeleteOnSuccessfulUse = undefined + } + } + if (m.mode === 'codex') { + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: m.restore === true, + legacyResumeSessionId: m.resumeSessionId, + sessionRef: requestedSessionRef, + codexDurability: codexDurabilityForDecision, + findLiveTerminalByCandidate: (candidate) => ( + this.registry.findRunningCodexTerminalByCandidate( + candidate.candidateThreadId, + candidate.rolloutPath, + ) ?? requestedLiveCodexCandidate(candidate) + ), + }) + + if ( + decision.kind === 'reject_invalid_raw_codex_resume_request' + || decision.kind === 'reject_missing_codex_session_ref' + ) { + error = true + this.sendError(ws, { + code: decision.code, + message: decision.message, + requestId: m.requestId, + }) + return + } + + if (decision.kind === 'durable_session_ref_resume') { + effectiveResumeSessionId = decision.sessionId + } else if (decision.kind === 'fresh_codex_launch') { + effectiveResumeSessionId = undefined + } else if (decision.kind === 'proof_succeeded_resume_durable') { + const { candidate, liveTerminal: live } = decision + if (live) { + if (codexDurabilityStoreRecordTerminalId && codexDurabilityStoreRecordTerminalId !== live.terminalId) { + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordTerminalId, + 'restore_proof_succeeded_attached_live', + ) + } + const promoted = typeof this.registry.promoteCodexDurabilityFromCreateProof === 'function' + ? await this.registry.promoteCodexDurabilityFromCreateProof(live.terminalId, decision.sessionId) + : undefined + const bound = promoted ?? this.registry.bindSession?.(live.terminalId, 'codex', decision.sessionId, 'association') + if (!bound || bound.ok) { + if (!promoted) { + live.resumeSessionId = decision.sessionId + live.codexDurability = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + durableThreadId: decision.sessionId, + } + } + broadcastCodexDurabilityUpdated(live.terminalId, live.codexDurability ?? { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + durableThreadId: decision.sessionId, + }) + await attachReusedTerminal(live.terminalId, live.createdAt, decision.sessionId) + broadcastCodexSessionAssociated(live.terminalId, decision.sessionId) + return + } + log.warn({ + requestId: m.requestId, + connectionId: ws.connectionId, + terminalId: live.terminalId, + sessionId: decision.sessionId, + reason: bound.reason, + }, 'Codex captured restore state proved durable but live terminal binding failed') + } + effectiveResumeSessionId = decision.sessionId + codexDurabilityStoreRecordToDeleteOnSuccessfulUse = codexDurabilityStoreRecordTerminalId + log.info({ + requestId: m.requestId, + connectionId: ws.connectionId, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + }, 'Codex captured restore state proved durable during terminal.create') + } else if (decision.kind === 'proof_failed_attach_live_candidate') { + const { candidate, proof, liveTerminal: live } = decision + log.warn({ + requestId: m.requestId, + connectionId: ws.connectionId, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + reason: proof.reason, + }, 'Codex captured restore state could not be proved during terminal.create') + if (codexDurabilityStoreRecordTerminalId && codexDurabilityStoreRecordTerminalId !== live.terminalId) { + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordTerminalId, + 'restore_proof_failed_attached_live', + ) + } + await attachReusedTerminal(live.terminalId, live.createdAt, live.resumeSessionId) + return + } else if (decision.kind === 'proof_failed_fresh_create') { + const { candidate, proof } = decision + log.warn({ + requestId: m.requestId, + connectionId: ws.connectionId, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + reason: proof.reason, + }, 'Codex captured restore state could not be proved during terminal.create') + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordTerminalId, + 'restore_proof_failed_fresh_create', + ) + clearCodexDurabilityOnCreate = decision.clearCodexDurability + restoreErrorOnCreate = decision.restoreError + effectiveResumeSessionId = undefined + } + } + + if (!codexDurabilityForDecision?.candidate) { + const live = requestedLiveTerminal() + if (live) { + await attachReusedTerminal(live.terminalId, live.createdAt, live.resumeSessionId) + return + } + } + if (modeSupportsResume(m.mode as TerminalMode) && effectiveResumeSessionId) { let existing = this.registry.getCanonicalRunningTerminalBySession( m.mode as TerminalMode, @@ -1939,6 +2276,10 @@ export class WsHandler { ) } if (existing) { + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordToDeleteOnSuccessfulUse, + 'restore_proof_succeeded_attached_existing', + ) await attachReusedTerminal(existing.terminalId, existing.createdAt, existing.resumeSessionId) return } @@ -1999,6 +2340,10 @@ export class WsHandler { ) } if (existing) { + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordToDeleteOnSuccessfulUse, + 'restore_proof_succeeded_attached_existing', + ) await attachReusedTerminal(existing.terminalId, existing.createdAt, existing.resumeSessionId) return } @@ -2057,13 +2402,15 @@ export class WsHandler { : undefined this.assertTerminalCreateAccepted() const codexPlan = m.mode === 'codex' - ? await this.planCodexLaunch(m.cwd, requestedCodexResumeSessionId, providerSettings) + ? await this.planCodexLaunch( + m.cwd, + requestedCodexResumeSessionId, + providerSettings, + CODEX_INITIAL_LAUNCH_ATTEMPTS, + ) : undefined pendingCodexPlan = codexPlan - if (codexPlan) { - effectiveResumeSessionId = codexPlan.sessionId - } this.assertTerminalCreateAccepted() const codexRecovery = codexPlan @@ -2126,17 +2473,13 @@ export class WsHandler { if (codexPlan) { await codexPlan.sidecar.adopt({ terminalId: record.terminalId, generation: 0 }) this.assertTerminalCreateAccepted() - if (requestedCodexResumeSessionId) { - await codexPlan.sidecar.waitForLoadedThread(requestedCodexResumeSessionId) - this.assertTerminalCreateAccepted() - } assertCodexCreateTerminalRunning(record) this.assertTerminalCreateAccepted() this.registry.publishCodexSidecar?.(record.terminalId) pendingCodexPlan = undefined if (effectiveResumeSessionId) { recordSessionLifecycleEvent({ - kind: 'codex_durable_session_observed', + kind: 'codex_durable_resume_started', provider: 'codex', terminalId: record.terminalId, sessionId: effectiveResumeSessionId, @@ -2145,6 +2488,10 @@ export class WsHandler { }) } } + await deleteCodexDurabilityStoreRecord( + codexDurabilityStoreRecordToDeleteOnSuccessfulUse, + 'restore_proof_succeeded_created_replacement', + ) this.assertTerminalCreateAccepted() if (m.mode !== 'shell' && typeof m.cwd === 'string' && m.cwd.trim()) { @@ -2163,6 +2510,8 @@ export class WsHandler { terminalId: record.terminalId, createdAt: record.createdAt, effectiveResumeSessionId, + clearCodexDurability: clearCodexDurabilityOnCreate, + restoreError: restoreErrorOnCreate, }) if (!sent) { // Terminal may still exist even if created delivery failed (for @@ -2171,6 +2520,9 @@ export class WsHandler { this.broadcastTerminalsChanged() return } + if (m.mode === 'codex' && effectiveResumeSessionId) { + broadcastCodexSessionAssociated(record.terminalId, effectiveResumeSessionId) + } recordSessionLifecycleEvent({ kind: 'terminal_created', @@ -2315,9 +2667,62 @@ export class WsHandler { } case 'terminal.input': { - const ok = this.registry.input(m.terminalId, m.data) - if (!ok) { - if (!this.registry.get(m.terminalId)) { + const result = this.registry.input(m.terminalId, m.data) + if (result.status === 'blocked_codex_identity_pending') { + log.debug({ + terminalId: m.terminalId, + connectionId: ws.connectionId, + attemptedInputBytes: Buffer.byteLength(m.data, 'utf8'), + }, 'Codex terminal input blocked until restore identity is captured') + this.send(ws, { + type: 'terminal.input.blocked', + terminalId: m.terminalId, + reason: 'codex_identity_pending', + }) + return + } + if (result.status === 'blocked_codex_identity_capture_timeout') { + log.warn({ + terminalId: m.terminalId, + connectionId: ws.connectionId, + attemptedInputBytes: Buffer.byteLength(m.data, 'utf8'), + }, 'Codex terminal input blocked after restore identity capture timed out') + this.send(ws, { + type: 'terminal.input.blocked', + terminalId: m.terminalId, + reason: 'codex_identity_capture_timeout', + }) + return + } + if (result.status === 'blocked_codex_identity_unavailable') { + log.warn({ + terminalId: m.terminalId, + connectionId: ws.connectionId, + attemptedInputBytes: Buffer.byteLength(m.data, 'utf8'), + reason: result.reason, + }, 'Codex terminal input blocked because restore identity is unavailable') + this.send(ws, { + type: 'terminal.input.blocked', + terminalId: m.terminalId, + reason: 'codex_identity_unavailable', + }) + return + } + if (result.status === 'blocked_codex_recovery_pending') { + log.debug({ + terminalId: m.terminalId, + connectionId: ws.connectionId, + attemptedInputBytes: Buffer.byteLength(m.data, 'utf8'), + }, 'Codex terminal input blocked while durable recovery is in progress') + this.send(ws, { + type: 'terminal.input.blocked', + terminalId: m.terminalId, + reason: 'codex_recovery_pending', + }) + return + } + if (result.status !== 'written') { + if (result.status === 'no_terminal') { recordSessionLifecycleEvent({ kind: 'invalid_terminal_id_without_session_ref', terminalId: m.terminalId, @@ -2331,6 +2736,20 @@ export class WsHandler { return } + case 'terminal.codex.candidate.persisted': { + const result = this.registry.acknowledgeCodexCandidatePersisted(m) + if (result !== 'accepted') { + log.warn({ + terminalId: m.terminalId, + candidateThreadId: m.candidateThreadId, + rolloutPath: m.rolloutPath, + connectionId: ws.connectionId, + reason: result, + }, 'Received Codex candidate persisted acknowledgement that did not match server state') + } + return + } + case 'terminal.resize': { const ok = this.registry.resize(m.terminalId, m.cols, m.rows) if (!ok) { @@ -3280,6 +3699,7 @@ export class WsHandler { off?: (event: string, listener: (...args: any[]) => void) => void } registryWithEvents.off?.('terminal.exit', this.onTerminalExitBound) + registryWithEvents.off?.('terminal.codex.durability.updated', this.onCodexDurabilityUpdatedBound) if (this.sessionRepairService && this.sessionRepairListeners) { this.sessionRepairService.off('scanned', this.sessionRepairListeners.scanned) diff --git a/shared/codex-durability.ts b/shared/codex-durability.ts new file mode 100644 index 000000000..02f3ef7a0 --- /dev/null +++ b/shared/codex-durability.ts @@ -0,0 +1,85 @@ +import { z } from 'zod' + +export const CODEX_DURABILITY_SCHEMA_VERSION = 1 as const + +export const CodexDurabilityStateNameSchema = z.enum([ + 'identity_pending', + 'captured_pre_turn', + 'turn_in_progress_unproven', + 'proof_checking', + 'durable', + 'durable_resuming', + 'durability_unproven_after_completion', + 'non_restorable', +]) + +export type CodexDurabilityStateName = z.infer<typeof CodexDurabilityStateNameSchema> + +export const CodexCandidateSourceSchema = z.enum([ + 'thread_start_response', + 'thread_started_notification', + 'restored_client_state', + 'durable_resume', +]) + +export type CodexCandidateSource = z.infer<typeof CodexCandidateSourceSchema> + +export const CodexRolloutProofFailureReasonSchema = z.enum([ + 'invalid_path', + 'missing', + 'not_regular_file', + 'empty', + 'malformed_json', + 'wrong_record_type', + 'missing_payload_id', + 'mismatched_thread_id', + 'read_error', +]) + +export type CodexRolloutProofFailureReason = z.infer<typeof CodexRolloutProofFailureReasonSchema> + +export const CodexCandidateIdentitySchema = z.object({ + provider: z.literal('codex'), + candidateThreadId: z.string().min(1), + rolloutPath: z.string().min(1), + source: CodexCandidateSourceSchema, + capturedAt: z.number().int().nonnegative(), + cliVersion: z.string().min(1).optional(), +}).strict() + +export type CodexCandidateIdentity = z.infer<typeof CodexCandidateIdentitySchema> + +export const CodexProofFailureSchema = z.object({ + reason: CodexRolloutProofFailureReasonSchema, + message: z.string().min(1), + checkedAt: z.number().int().nonnegative(), +}).strict() + +export type CodexProofFailure = z.infer<typeof CodexProofFailureSchema> + +export const CodexDurabilityRefSchema = z.object({ + schemaVersion: z.literal(CODEX_DURABILITY_SCHEMA_VERSION), + state: CodexDurabilityStateNameSchema, + candidate: CodexCandidateIdentitySchema.optional(), + turnCompletedAt: z.number().int().nonnegative().optional(), + lastProofFailure: CodexProofFailureSchema.optional(), + durableThreadId: z.string().min(1).optional(), + nonRestorableReason: z.string().min(1).optional(), +}).strict() + +export type CodexDurabilityRef = z.infer<typeof CodexDurabilityRefSchema> + +export const CodexDurabilityStoreRecordSchema = CodexDurabilityRefSchema.extend({ + terminalId: z.string().min(1), + tabId: z.string().min(1).optional(), + paneId: z.string().min(1).optional(), + serverInstanceId: z.string().min(1), + updatedAt: z.number().int().nonnegative(), +}).strict() + +export type CodexDurabilityStoreRecord = z.infer<typeof CodexDurabilityStoreRecordSchema> + +export function sanitizeCodexDurabilityRef(value: unknown): CodexDurabilityRef | undefined { + const parsed = CodexDurabilityRefSchema.safeParse(value) + return parsed.success ? parsed.data : undefined +} diff --git a/shared/ws-protocol.ts b/shared/ws-protocol.ts index bff6630f3..08e201096 100644 --- a/shared/ws-protocol.ts +++ b/shared/ws-protocol.ts @@ -9,7 +9,8 @@ import { z } from 'zod' import type { ClientExtensionEntry } from './extension-types.js' import type { ServerSettings } from './settings.js' -import { LiveTerminalHandleSchema, SessionRefSchema } from './session-contract.js' +import { LiveTerminalHandleSchema, SessionRefSchema, type RestoreError } from './session-contract.js' +import { CodexDurabilityRefSchema, type CodexDurabilityRef } from './codex-durability.js' // ────────────────────────────────────────────────────────────── // Shared enums and helpers @@ -219,12 +220,21 @@ export const TerminalCreateSchema = z.object({ shell: ShellSchema.default('system'), cwd: z.string().optional(), sessionRef: SessionLocatorSchema.optional(), + codexDurability: CodexDurabilityRefSchema.optional(), liveTerminal: LiveTerminalHandleSchema.optional(), restore: z.boolean().optional(), tabId: z.string().min(1).optional(), paneId: z.string().min(1).optional(), }).strict() +export const TerminalCodexCandidatePersistedSchema = z.object({ + type: z.literal('terminal.codex.candidate.persisted'), + terminalId: z.string().min(1), + candidateThreadId: z.string().min(1), + rolloutPath: z.string().min(1), + capturedAt: z.number().int().nonnegative(), +}).strict() + export const TerminalAttachIntentSchema = z.enum([ 'viewport_hydrate', 'keepalive_delta', @@ -416,6 +426,7 @@ export const ClientMessageSchema = z.discriminatedUnion('type', [ PingSchema, ClientDiagnosticSchema, TerminalCreateSchema, + TerminalCodexCandidatePersistedSchema, TerminalAttachSchema, TerminalDetachSchema, TerminalInputSchema, @@ -475,6 +486,8 @@ export type TerminalCreatedMessage = { requestId: string terminalId: string createdAt: number + clearCodexDurability?: boolean + restoreError?: RestoreError } export type TerminalAttachReadyMessage = { @@ -535,6 +548,18 @@ export type TerminalSessionAssociatedMessage = { sessionRef: SessionLocator } +export type TerminalCodexDurabilityUpdatedMessage = { + type: 'terminal.codex.durability.updated' + terminalId: string + durability: CodexDurabilityRef +} + +export type TerminalInputBlockedMessage = { + type: 'terminal.input.blocked' + terminalId: string + reason: 'codex_identity_pending' | 'codex_identity_capture_timeout' | 'codex_identity_unavailable' | 'codex_recovery_pending' +} + export type TerminalsChangedMessage = { type: 'terminals.changed' revision: number @@ -752,6 +777,7 @@ export type TerminalInventoryMessage = { status: 'running' | 'exited' runtimeStatus?: 'running' | 'recovering' cwd?: string + codexDurability?: CodexDurabilityRef }> terminalMeta: TerminalMetaRecord[] } @@ -771,6 +797,8 @@ export type ServerMessage = | TerminalOutputGapMessage | TerminalTitleUpdatedMessage | TerminalSessionAssociatedMessage + | TerminalCodexDurabilityUpdatedMessage + | TerminalInputBlockedMessage | TerminalsChangedMessage | TerminalMetaUpdatedMessage | TerminalInventoryMessage diff --git a/src/components/Sidebar.tsx b/src/components/Sidebar.tsx index 4a9bb6dd3..d9a036337 100644 --- a/src/components/Sidebar.tsx +++ b/src/components/Sidebar.tsx @@ -19,7 +19,7 @@ import { getInstalledPerfAuditBridge } from '@/lib/perf-audit-bridge' import { fetchSessionWindow } from '@/store/sessionsThunks' import { mergeSessionMetadataByKey } from '@/lib/session-metadata' import { collectBusySessionKeys } from '@/lib/pane-activity' -import { selectPrimaryTerminalIdForTab } from '@/store/selectors/paneTerminalSelectors' +import { selectPrimaryTerminalIdForTab, selectTabIdByTerminalId } from '@/store/selectors/paneTerminalSelectors' import type { ChatSessionState } from '@/store/agentChatTypes' import type { PaneRuntimeActivityRecord } from '@/store/paneRuntimeActivitySlice' @@ -39,6 +39,23 @@ function sameSessionRef( return a.provider === b.provider && a.sessionId === b.sessionId } +function sameCodexDurability( + a?: BackgroundTerminal['codexDurability'], + b?: BackgroundTerminal['codexDurability'], +): boolean { + if (a === b) return true + if (!a || !b) return false + return a.state === b.state + && a.durableThreadId === b.durableThreadId + && a.candidate?.candidateThreadId === b.candidate?.candidateThreadId + && a.candidate?.rolloutPath === b.candidate?.rolloutPath + && a.turnCompletedAt === b.turnCompletedAt + && a.nonRestorableReason === b.nonRestorableReason + && a.lastProofFailure?.reason === b.lastProofFailure?.reason + && a.lastProofFailure?.message === b.lastProofFailure?.message + && a.lastProofFailure?.checkedAt === b.lastProofFailure?.checkedAt +} + /** Compare two BackgroundTerminal arrays by sidebar-relevant fields only. * Ignores terminal `lastActivityAt` since it changes frequently but doesn't affect rendering. */ export function areTerminalsEqual(a: BackgroundTerminal[], b: BackgroundTerminal[]): boolean { @@ -53,7 +70,8 @@ export function areTerminalsEqual(a: BackgroundTerminal[], b: BackgroundTerminal ai.status !== bi.status || ai.hasClients !== bi.hasClients || ai.mode !== bi.mode || - !sameSessionRef(ai.sessionRef, bi.sessionRef) + !sameSessionRef(ai.sessionRef, bi.sessionRef) || + !sameCodexDurability(ai.codexDurability, bi.codexDurability) ) return false } return true @@ -84,6 +102,10 @@ export function areSessionItemsEqual(a: SessionItem[], b: SessionItem[]): boolea ai.cwd !== bi.cwd || ai.projectPath !== bi.projectPath || ai.isFallback !== bi.isFallback || + ai.isRestorable !== bi.isRestorable || + !sameCodexDurability(ai.codexDurability, bi.codexDurability) || + ai.codexDurabilityState !== bi.codexDurabilityState || + ai.codexDurabilityReason !== bi.codexDurabilityReason || ai.timestamp !== bi.timestamp ) return false } @@ -140,10 +162,35 @@ function isSessionItemEqual(a: SessionItem, b: SessionItem): boolean { a.hasTitle === b.hasTitle && a.isSubagent === b.isSubagent && a.isNonInteractive === b.isNonInteractive && - a.firstUserMessage === b.firstUserMessage + a.firstUserMessage === b.firstUserMessage && + a.isRestorable === b.isRestorable && + sameCodexDurability(a.codexDurability, b.codexDurability) && + a.codexDurabilityState === b.codexDurabilityState && + a.codexDurabilityReason === b.codexDurabilityReason ) } +function getCodexDurabilityStatusLabel(item: SessionItem): string | undefined { + if (item.provider !== 'codex') return undefined + switch (item.codexDurabilityState) { + case 'identity_pending': + return 'Preparing restore' + case 'captured_pre_turn': + case 'turn_in_progress_unproven': + return 'Restore pending' + case 'proof_checking': + return 'Checking restore' + case 'durable_resuming': + return 'Restoring' + case 'durability_unproven_after_completion': + return 'Restore not verified' + case 'non_restorable': + return 'Not restorable' + default: + return undefined + } +} + /** * Determine whether a sidebar session item should be highlighted as active. * Prefers activeSessionKey (derived from the active pane's content) when @@ -334,6 +381,31 @@ export default function Sidebar({ const runningTerminalId = item.isRunning ? item.runningTerminalId : undefined const localServerInstanceId = state.connection.serverInstanceId + if (runningTerminalId && item.isRestorable === false) { + const existingTabId = selectTabIdByTerminalId(state, runningTerminalId) + if (existingTabId) { + dispatch(setActiveTab(existingTabId)) + const activePaneId = state.panes.activePane[existingTabId] + if (activePaneId) { + dispatch(setActivePane({ tabId: existingTabId, paneId: activePaneId })) + } + onNavigate('terminal') + return + } + dispatch(openSessionTab({ + sessionId: item.sessionId, + title: item.title, + cwd: item.cwd, + provider, + sessionType: item.sessionType || provider, + terminalId: runningTerminalId, + isRestorable: false, + codexDurability: item.codexDurability, + })) + onNavigate('terminal') + return + } + // 1. Dedup: if session is already open in a pane, focus it const existing = findPaneForSession( state, @@ -367,9 +439,11 @@ export default function Sidebar({ provider, sessionType, terminalId: runningTerminalId, + isRestorable: item.isRestorable, firstUserMessage: item.firstUserMessage, isSubagent: item.isSubagent, isNonInteractive: item.isNonInteractive, + codexDurability: item.codexDurability, })) onNavigate('terminal') return @@ -385,22 +459,32 @@ export default function Sidebar({ provider, sessionType, terminalId: runningTerminalId, + isRestorable: item.isRestorable, firstUserMessage: item.firstUserMessage, isSubagent: item.isSubagent, isNonInteractive: item.isNonInteractive, + codexDurability: item.codexDurability, })) onNavigate('terminal') return } + const newContent = item.isRestorable === false && provider === 'codex' + ? { + kind: 'terminal' as const, + mode: provider, + initialCwd: item.cwd, + codexDurability: item.codexDurability, + } + : buildResumeContent({ + sessionType, + sessionId: item.sessionId, + cwd: item.cwd, + agentChatProviderSettings: providerSettings, + }) dispatch(addPane({ tabId: currentActiveTabId, - newContent: buildResumeContent({ - sessionType, - sessionId: item.sessionId, - cwd: item.cwd, - agentChatProviderSettings: providerSettings, - }), + newContent, })) const activeTab = state.tabs.tabs.find((tab) => tab.id === currentActiveTabId) const sessionMetadataByKey = mergeSessionMetadataByKey( @@ -414,7 +498,7 @@ export default function Sidebar({ isNonInteractive: item.isNonInteractive, }, ) - if (activeTab && sessionMetadataByKey !== activeTab.sessionMetadataByKey) { + if (activeTab && item.isRestorable !== false && sessionMetadataByKey !== activeTab.sessionMetadataByKey) { dispatch(updateTab({ id: currentActiveTabId, updates: { sessionMetadataByKey }, @@ -811,7 +895,11 @@ function areSidebarItemPropsEqual(prev: SidebarItemProps, next: SidebarItemProps a.projectColor === b.projectColor && a.cwd === b.cwd && a.projectPath === b.projectPath && - a.isFallback === b.isFallback + a.isFallback === b.isFallback && + a.isRestorable === b.isRestorable && + sameCodexDurability(a.codexDurability, b.codexDurability) && + a.codexDurabilityState === b.codexDurabilityState && + a.codexDurabilityReason === b.codexDurabilityReason ) } @@ -819,6 +907,7 @@ export const SidebarItem = memo(function SidebarItem(props: SidebarItemProps) { const { item, isActiveTab, isBusy = false, showProjectBadge, onClick } = props const extensionEntries = useAppSelector((s) => s.extensions?.entries) const { icon: SessionIcon, label: sessionLabel } = resolveSessionTypeConfig(item.sessionType, extensionEntries) + const codexStatusLabel = getCodexDurabilityStatusLabel(item) return ( <Tooltip> <TooltipTrigger asChild> @@ -851,7 +940,7 @@ export const SidebarItem = memo(function SidebarItem(props: SidebarItemProps) { {/* Content */} <div className="flex-1 min-w-0"> - <div className="flex items-center gap-2"> + <div className="flex items-center gap-2 min-w-0"> <span className={cn( 'text-sm truncate', @@ -863,6 +952,11 @@ export const SidebarItem = memo(function SidebarItem(props: SidebarItemProps) { {item.archived && ( <Archive className="h-3 w-3 text-muted-foreground/70" aria-label="Archived session" /> )} + {codexStatusLabel && ( + <span className="text-2xs text-muted-foreground/70 flex-shrink-0"> + {codexStatusLabel} + </span> + )} </div> {item.subtitle && showProjectBadge && ( <div className="text-2xs text-muted-foreground truncate"> @@ -880,6 +974,11 @@ export const SidebarItem = memo(function SidebarItem(props: SidebarItemProps) { <TooltipContent> <div>{sessionLabel}: {item.title}</div> <div className="text-muted-foreground">{item.subtitle || item.projectPath || sessionLabel}</div> + {codexStatusLabel && ( + <div className="text-muted-foreground"> + {codexStatusLabel}{item.codexDurabilityReason ? `: ${item.codexDurabilityReason}` : ''} + </div> + )} </TooltipContent> </Tooltip> ) diff --git a/src/components/TabsView.tsx b/src/components/TabsView.tsx index 5b036b353..9de45e271 100644 --- a/src/components/TabsView.tsx +++ b/src/components/TabsView.tsx @@ -35,6 +35,7 @@ import { import type { CodingCliProviderName, TabMode } from '@/store/types' import type { AgentChatProviderName } from '@/lib/agent-chat-types' import { migrateLegacyAgentChatDurableState } from '@shared/session-contract' +import { sanitizeCodexDurabilityRef } from '@shared/codex-durability' /* ------------------------------------------------------------------ */ /* Types */ @@ -111,11 +112,15 @@ function sanitizePaneSnapshot( const mode = (payload.mode as TabMode) || 'shell' const sessionRef = resolveSessionRef({ payload }) const liveTerminal = parseLiveTerminalHandle(payload.liveTerminal, record.serverInstanceId) + const codexDurability = mode === 'codex' + ? sanitizeCodexDurabilityRef(payload.codexDurability) + : undefined return { kind: 'terminal', mode, shell: (payload.shell as 'system' | 'cmd' | 'powershell' | 'wsl') || 'system', sessionRef, + ...(codexDurability ? { codexDurability } : {}), terminalId: sameServer ? liveTerminal?.terminalId : undefined, serverInstanceId: record.serverInstanceId, initialCwd: payload.initialCwd as string | undefined, diff --git a/src/components/TerminalView.tsx b/src/components/TerminalView.tsx index def853dc9..fdf218a4b 100644 --- a/src/components/TerminalView.tsx +++ b/src/components/TerminalView.tsx @@ -111,6 +111,26 @@ const LIGHT_THEME_MIN_CONTRAST_RATIO = 4.5 const DEFAULT_MIN_CONTRAST_RATIO = 1 const MAX_LAST_SENT_VIEWPORT_CACHE_ENTRIES = 200 const TRUNCATED_REPLAY_BYTES = 128 * 1024 +const INPUT_BLOCKED_NOTICE_THROTTLE_MS = 2000 + +type TerminalInputBlockedReason = + | 'codex_identity_pending' + | 'codex_identity_capture_timeout' + | 'codex_identity_unavailable' + | 'codex_recovery_pending' + +function terminalInputBlockedNotice(reason: TerminalInputBlockedReason): string { + switch (reason) { + case 'codex_identity_pending': + return 'Input not sent: Codex is still saving restore state. Try again in a moment.' + case 'codex_recovery_pending': + return 'Input not sent: Codex is still reconnecting. Try again in a moment.' + case 'codex_identity_capture_timeout': + return 'Input not sent: Codex did not provide restore state before startup timed out. Start a new Codex pane or resume inside Codex.' + case 'codex_identity_unavailable': + return 'Input not sent: Codex did not provide restorable session state. Start a new Codex pane or resume inside Codex.' + } +} type StartupProbeReplayDiscardState = { remainder: string | null @@ -368,6 +388,7 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) const tapCountRef = useRef(0) const terminalFirstOutputMarkedRef = useRef(false) const turnCompletedSinceLastInputRef = useRef(true) + const lastInputBlockedNoticeRef = useRef<{ reason: TerminalInputBlockedReason; at: number } | null>(null) // Extract terminal-specific fields (safe because we check kind later) const isTerminal = paneContent.kind === 'terminal' @@ -1761,6 +1782,7 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) sessionRef: createSessionState.sessionRef, liveTerminal: createSessionState.liveTerminal, contentRefResumeSessionId: contentRef.current?.resumeSessionId, + codexDurability: createSessionState.codexDurability, mode, }) ws.send({ @@ -1770,6 +1792,7 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) shell: shell || 'system', cwd: initialCwd, ...(createSessionState.sessionRef ? { sessionRef: createSessionState.sessionRef } : {}), + ...(createSessionState.codexDurability ? { codexDurability: createSessionState.codexDurability } : {}), ...(createSessionState.liveTerminal ? { liveTerminal: createSessionState.liveTerminal } : {}), tabId, paneId: paneIdRef.current, @@ -2033,11 +2056,22 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) currentResumeSessionId: contentRef.current?.resumeSessionId, }) terminalIdRef.current = newId - updateContent({ terminalId: newId, status: 'running' }) + updateContent({ + terminalId: newId, + status: 'running', + ...(msg.clearCodexDurability ? { codexDurability: undefined } : {}), + ...(msg.restoreError ? { restoreError: msg.restoreError } : {}), + }) // Also update tab status const currentTab = tabRef.current if (currentTab) { - dispatch(updateTab({ id: currentTab.id, updates: { status: 'running' } })) + dispatch(updateTab({ + id: currentTab.id, + updates: { + status: 'running', + ...(msg.clearCodexDurability ? { codexDurability: undefined } : {}), + }, + })) } applySeqState(createAttachSeqState({ lastSeq: 0 })) @@ -2141,17 +2175,72 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) paneResumeSessionId: contentRef.current?.resumeSessionId, tabResumeSessionId: currentTab?.resumeSessionId, }) + const paneCodexDurability = contentRef.current?.codexDurability + const nextPaneCodexDurability = sessionRef.provider === 'codex' + && paneCodexDurability?.state === 'durable' + && ( + paneCodexDurability.durableThreadId === sessionRef.sessionId + || paneCodexDurability.candidate?.candidateThreadId === sessionRef.sessionId + ) + ? paneCodexDurability + : undefined + const tabCodexDurability = currentTab?.codexDurability + const nextTabCodexDurability = sessionRef.provider === 'codex' + && tabCodexDurability?.state === 'durable' + && ( + tabCodexDurability.durableThreadId === sessionRef.sessionId + || tabCodexDurability.candidate?.candidateThreadId === sessionRef.sessionId + ) + ? tabCodexDurability + : undefined if (durableIdentityUpdate?.paneUpdates) { - updateContent(durableIdentityUpdate.paneUpdates) + updateContent({ ...durableIdentityUpdate.paneUpdates, codexDurability: nextPaneCodexDurability }) } if (currentTab && durableIdentityUpdate?.tabUpdates) { - dispatch(updateTab({ id: currentTab.id, updates: durableIdentityUpdate.tabUpdates })) + dispatch(updateTab({ id: currentTab.id, updates: { ...durableIdentityUpdate.tabUpdates, codexDurability: nextTabCodexDurability } })) } if (durableIdentityUpdate?.shouldFlush) { dispatch(flushPersistedLayoutNow()) } } + if (msg.type === 'terminal.codex.durability.updated' && msg.terminalId === tid) { + const durability = msg.durability + updateContent({ codexDurability: durability }) + const currentTab = tabHasSinglePaneRef.current ? tabRef.current : undefined + if (currentTab) { + dispatch(updateTab({ id: currentTab.id, updates: { codexDurability: durability } })) + } + dispatch(flushPersistedLayoutNow()) + const candidate = durability?.candidate + if (candidate) { + ws.send({ + type: 'terminal.codex.candidate.persisted', + terminalId: tid, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + capturedAt: candidate.capturedAt, + }) + } + } + + if (msg.type === 'terminal.input.blocked' && msg.terminalId === tid) { + const reason = msg.reason as TerminalInputBlockedReason + log.warn('terminal_input_blocked', { + tabId, + paneId: paneIdRef.current, + terminalId: tid, + reason, + }) + const now = Date.now() + const previous = lastInputBlockedNoticeRef.current + if (!previous || previous.reason !== reason || now - previous.at >= INPUT_BLOCKED_NOTICE_THROTTLE_MS) { + lastInputBlockedNoticeRef.current = { reason, at: now } + term.writeln(`\r\n[${terminalInputBlockedNotice(reason)}]\r\n`) + } + return + } + if (msg.type === 'error' && msg.requestId === reqId) { if (msg.code === 'RATE_LIMITED') { const scheduled = scheduleRateLimitRetry(reqId) @@ -2211,7 +2300,9 @@ function TerminalView({ tabId, paneId, paneContent, hidden }: TerminalViewProps) // This prevents an infinite respawn loop when terminals fail immediately // (e.g., due to permission errors on cwd). User must explicitly restart. if (currentTerminalId && current?.status !== 'exited') { - if (!current?.sessionRef) { + const hasCodexCapturedRestoreState = current?.mode === 'codex' && Boolean(current.codexDurability?.candidate) + const canAskServerForCodexRestoreState = current?.mode === 'codex' + if (!current?.sessionRef && !hasCodexCapturedRestoreState && !canAskServerForCodexRestoreState) { const restoreDiagnostic = { event: 'restore_unavailable' as const, reason: 'dead_live_handle' as const, diff --git a/src/components/agent-chat/AgentChatView.tsx b/src/components/agent-chat/AgentChatView.tsx index 5112fea10..469ab3d1b 100644 --- a/src/components/agent-chat/AgentChatView.tsx +++ b/src/components/agent-chat/AgentChatView.tsx @@ -56,6 +56,7 @@ import { } from '@/store/persistControl' import { useMobile } from '@/hooks/useMobile' import { useKeyboardInset } from '@/hooks/useKeyboardInset' +import { buildRestoreError } from '@shared/session-contract' /** Early lifecycle states that should not be re-entered once the session has advanced. */ const EARLY_STATES = new Set(['creating', 'starting']) @@ -86,6 +87,16 @@ function paneMatchesCurrentProviderDefaults( && pane.effort === providerDefaults?.effort } +function getCanonicalPaneResumeSessionId(pane: AgentChatPaneContent): string | undefined { + if (pane.sessionRef?.provider === 'claude' && isValidClaudeSessionId(pane.sessionRef.sessionId)) { + return pane.sessionRef.sessionId + } + if (isValidClaudeSessionId(pane.resumeSessionId)) { + return pane.resumeSessionId + } + return undefined +} + interface AgentChatViewProps { tabId: string paneId: string @@ -177,18 +188,34 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag const surfaceVisibleMarkedRef = useRef(false) const sessionRef = useRef(session) sessionRef.current = session - const persistedTimelineSessionId = isValidClaudeSessionId(paneContent.resumeSessionId) + const paneSessionRefResumeId = paneContent.sessionRef?.provider === 'claude' + ? paneContent.sessionRef.sessionId + : undefined + const canonicalPaneSessionRefResumeId = isValidClaudeSessionId(paneSessionRefResumeId) + ? paneSessionRefResumeId + : undefined + const persistedResumeSessionId = typeof paneContent.resumeSessionId === 'string' + && paneContent.resumeSessionId.trim().length > 0 ? paneContent.resumeSessionId : undefined - const canonicalDurableSessionId = getCanonicalDurableSessionId(session) ?? persistedTimelineSessionId - const timelineSessionId = getPreferredResumeSessionId(session) ?? persistedTimelineSessionId - const restoreHistoryQueryId = timelineSessionId ?? paneContent.sessionId + const persistedCanonicalResumeSessionId = isValidClaudeSessionId(persistedResumeSessionId) + ? persistedResumeSessionId + : undefined + const canonicalDurableSessionId = getCanonicalDurableSessionId(session) + ?? canonicalPaneSessionRefResumeId + ?? persistedCanonicalResumeSessionId + const preferredSessionResumeSessionId = getPreferredResumeSessionId(session) + const timelineSessionId = preferredSessionResumeSessionId + ?? paneSessionRefResumeId + ?? persistedResumeSessionId + const restoreHistoryQueryId = preferredSessionResumeSessionId + ?? canonicalPaneSessionRefResumeId + ?? persistedCanonicalResumeSessionId + ?? paneContent.sessionId + ?? persistedResumeSessionId const attachResumeSessionId = getPreferredResumeSessionId(session) - ?? ( - typeof paneContent.resumeSessionId === 'string' && paneContent.resumeSessionId.trim().length > 0 - ? paneContent.resumeSessionId - : undefined - ) + ?? paneSessionRefResumeId + ?? persistedResumeSessionId const attachPayload = useMemo(() => { if (!paneContent.sessionId) return null return { @@ -236,12 +263,29 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag ) const isRestoring = !!paneContent.sessionId && !session?.historyLoaded && !hasRestoreFailure - // Shared recovery logic: clears stale sessionId and resets to 'creating' so a new - // SDK session is spawned. Preserves resumeSessionId for CLI session continuity. + // Shared recovery logic for a lost live SDK handle. Only canonical Claude ids + // can be used for automatic recovery; mutable names are display state, not a + // deterministic restore target. const triggerRecovery = useCallback(() => { + const resumeSessionId = getCanonicalDurableSessionId(sessionRef.current) + ?? getCanonicalPaneResumeSessionId(paneContentRef.current) + if (!resumeSessionId) { + dispatch(updatePaneContent({ + tabId, + paneId, + content: { + ...paneContentRef.current, + sessionId: undefined, + status: 'idle' as const, + restoreError: buildRestoreError('dead_live_handle'), + }, + })) + createSentRef.current = false + attachSentRef.current = false + return + } + const newRequestId = nanoid() - const resumeSessionId = getPreferredResumeSessionId(sessionRef.current) - ?? paneContentRef.current.resumeSessionId dispatch(updatePaneContent({ tabId, paneId, @@ -249,8 +293,10 @@ export default function AgentChatView({ tabId, paneId, paneContent, hidden }: Ag ...paneContentRef.current, sessionId: undefined, resumeSessionId, + sessionRef: { provider: 'claude', sessionId: resumeSessionId }, createRequestId: newRequestId, status: 'creating' as const, + restoreError: undefined, }, })) createSentRef.current = false diff --git a/src/components/terminal-view-utils.ts b/src/components/terminal-view-utils.ts index ad96f028e..8abba7525 100644 --- a/src/components/terminal-view-utils.ts +++ b/src/components/terminal-view-utils.ts @@ -8,16 +8,19 @@ export function getResumeSessionIdFromRef(ref: TerminalContentRef): string | und export function getCreateSessionStateFromRef(ref: TerminalContentRef): { sessionRef?: TerminalPaneContent['sessionRef'] + codexDurability?: TerminalPaneContent['codexDurability'] liveTerminal?: { terminalId: string serverInstanceId: string } } { const sessionRef = ref.current?.sessionRef + const codexDurability = ref.current?.codexDurability const terminalId = ref.current?.terminalId const serverInstanceId = ref.current?.serverInstanceId return { ...(sessionRef ? { sessionRef } : {}), + ...(!sessionRef && codexDurability ? { codexDurability } : {}), ...(terminalId && serverInstanceId ? { liveTerminal: { diff --git a/src/lib/session-utils.ts b/src/lib/session-utils.ts index 6d5663f20..9d0863c3f 100644 --- a/src/lib/session-utils.ts +++ b/src/lib/session-utils.ts @@ -83,6 +83,13 @@ function extractExplicitSessionLocator(content: PaneContent): { return sanitizeSessionLocator(explicit) } +function extractCodexDurabilityLocator(content: PaneContent): SessionMatchLocator | undefined { + if (content.kind !== 'terminal' || content.mode !== 'codex') return undefined + const sessionId = content.codexDurability?.durableThreadId + ?? content.codexDurability?.candidate?.candidateThreadId + return sessionId ? { provider: 'codex', sessionId } : undefined +} + function extractSessionLocatorServerInstanceHint(content: PaneContent): string | undefined { return isNonEmptyString((content as { serverInstanceId?: unknown }).serverInstanceId) ? (content as { serverInstanceId: string }).serverInstanceId @@ -122,6 +129,10 @@ function extractSessionLocators(content: PaneContent): Array<{ if (content.kind !== 'terminal') return dedupeBy(locators, locatorIdentity) if (content.mode === 'shell') return dedupeBy(locators, locatorIdentity) if (!isNonShellMode(content.mode)) return dedupeBy(locators, locatorIdentity) + const codexDurabilityLocator = extractCodexDurabilityLocator(content) + if (codexDurabilityLocator) { + locators.push(codexDurabilityLocator) + } const sessionId = content.resumeSessionId if (!sessionId || content.mode !== 'claude' || !isValidClaudeSessionId(sessionId)) { return dedupeBy(locators, locatorIdentity) @@ -136,6 +147,11 @@ function buildTabFallbackLocator(tab: RootState['tabs']['tabs'][number]): Sessio return explicitSessionRef } const provider = tab.codingCliProvider || (tab.mode !== 'shell' ? tab.mode : undefined) + if (provider === 'codex') { + const sessionId = tab.codexDurability?.durableThreadId + ?? tab.codexDurability?.candidate?.candidateThreadId + if (sessionId) return sanitizeSessionLocator({ provider, sessionId }) + } const sessionId = tab.resumeSessionId if (provider !== 'claude' || !sessionId || !isValidClaudeSessionId(sessionId)) return undefined return sanitizeSessionLocator({ provider, sessionId }) diff --git a/src/lib/tab-registry-snapshot.ts b/src/lib/tab-registry-snapshot.ts index 7f77757f4..9d83fc10a 100644 --- a/src/lib/tab-registry-snapshot.ts +++ b/src/lib/tab-registry-snapshot.ts @@ -17,6 +17,7 @@ function stripPanePayload(content: PaneContent, serverInstanceId: string): Recor mode: content.mode, shell: content.shell, sessionRef: content.sessionRef, + codexDurability: content.mode === 'codex' ? content.codexDurability : undefined, liveTerminal: content.terminalId ? { terminalId: content.terminalId, diff --git a/src/store/agentChatSlice.ts b/src/store/agentChatSlice.ts index e57caee1a..c4eb8c9b7 100644 --- a/src/store/agentChatSlice.ts +++ b/src/store/agentChatSlice.ts @@ -214,9 +214,11 @@ const agentChatSlice = createSlice({ }>) { const session = ensureSession(state, action.payload.sessionId) const previousRestoreQueryId = getRestoreQueryId(session) - const nextTimelineSessionId = isValidClaudeSessionId(action.payload.timelineSessionId) + const payloadTimelineSessionId = typeof action.payload.timelineSessionId === 'string' + && action.payload.timelineSessionId.trim().length > 0 ? action.payload.timelineSessionId : undefined + const nextTimelineSessionId = payloadTimelineSessionId ?? session.timelineSessionId const nextRestoreQueryId = getRestoreQueryId({ cliSessionId: session.cliSessionId, timelineSessionId: nextTimelineSessionId ?? session.timelineSessionId, diff --git a/src/store/paneTreeValidation.ts b/src/store/paneTreeValidation.ts index 2055ae2ab..e55aeb1a7 100644 --- a/src/store/paneTreeValidation.ts +++ b/src/store/paneTreeValidation.ts @@ -1,4 +1,5 @@ import { isAgentChatModelSelection, normalizeAgentChatEffortOverride, type PaneNode } from './paneTypes' +import { CodexDurabilityRefSchema } from '@shared/codex-durability' function isRecord(value: unknown): value is Record<string, unknown> { return !!value && typeof value === 'object' @@ -25,6 +26,10 @@ function isRestoreErrorShape(value: unknown): boolean { && typeof (value as any).reason === 'string' } +function isCodexDurabilityShape(value: unknown): boolean { + return value === undefined || CodexDurabilityRefSchema.safeParse(value).success +} + function isPaneContentShape(content: unknown): boolean { if (!isRecord(content) || typeof content.kind !== 'string') { return false @@ -39,6 +44,7 @@ function isPaneContentShape(content: unknown): boolean { && isOptionalString(content.shell) && isOptionalString(content.resumeSessionId) && isSessionRefShape(content.sessionRef) + && isCodexDurabilityShape(content.codexDurability) && isRestoreErrorShape(content.restoreError) && isOptionalString(content.initialCwd) case 'browser': diff --git a/src/store/paneTypes.ts b/src/store/paneTypes.ts index 122aff019..fb074ed29 100644 --- a/src/store/paneTypes.ts +++ b/src/store/paneTypes.ts @@ -6,6 +6,7 @@ import { } from '@shared/agent-chat-capabilities' import type { SessionLocator as SharedSessionLocator } from '@shared/ws-protocol' import type { RestoreError } from '@shared/session-contract' +import type { CodexDurabilityRef } from '@shared/codex-durability' export type SessionLocator = SharedSessionLocator @@ -60,6 +61,8 @@ export type TerminalPaneContent = { resumeSessionId?: string /** Portable session reference for cross-device tab snapshots */ sessionRef?: SessionLocator + /** Non-canonical Codex restore durability state and proof metadata. */ + codexDurability?: CodexDurabilityRef /** Runtime-only server locality for same-server matching; never part of canonical durable identity. */ serverInstanceId?: string /** Explicit restore failure when no canonical durable target exists. */ diff --git a/src/store/panesSlice.ts b/src/store/panesSlice.ts index 09329ac67..10b0360fa 100644 --- a/src/store/panesSlice.ts +++ b/src/store/panesSlice.ts @@ -19,6 +19,7 @@ import { createLogger } from '@/lib/client-logger' import { patchBrowserPreferencesRecord } from '@/lib/browser-preferences' import { shouldPreserveLocalCanonicalResumeSessionId } from './persistControl' import { RestoreErrorSchema, sanitizeSessionRef } from '@shared/session-contract' +import { sanitizeCodexDurabilityRef } from '@shared/codex-durability' const log = createLogger('PanesSlice') @@ -49,6 +50,7 @@ function normalizePaneContent( : undefined const resumeSessionId = inputResumeSessionId const sessionRef = sanitizeSessionRef(input.sessionRef) + const codexDurability = sanitizeCodexDurabilityRef(input.codexDurability) const restoreError = RestoreErrorSchema.safeParse((input as { restoreError?: unknown }).restoreError) return { kind: 'terminal', @@ -61,6 +63,7 @@ function normalizePaneContent( shell: typeof input.shell === 'string' ? input.shell : 'system', resumeSessionId, ...(sessionRef ? { sessionRef } : {}), + ...(codexDurability ? { codexDurability } : {}), serverInstanceId: typeof input.serverInstanceId === 'string' ? input.serverInstanceId : undefined, ...(restoreError.success ? { restoreError: restoreError.data } : {}), initialCwd: typeof input.initialCwd === 'string' ? input.initialCwd : undefined, diff --git a/src/store/persistedState.ts b/src/store/persistedState.ts index 90eabefc6..d59d1d35e 100644 --- a/src/store/persistedState.ts +++ b/src/store/persistedState.ts @@ -6,6 +6,7 @@ import { migrateLegacyTerminalDurableState, sanitizeSessionRef, } from '@shared/session-contract' +import { sanitizeCodexDurabilityRef } from '@shared/codex-durability' export { LAYOUT_STORAGE_KEY, TABS_STORAGE_KEY, PANES_STORAGE_KEY } @@ -95,11 +96,13 @@ function normalizePersistedTab(tab: Record<string, unknown>): PersistedTab { sessionRef: tab.sessionRef, resumeSessionId: typeof tab.resumeSessionId === 'string' ? tab.resumeSessionId : undefined, }) + const codexDurability = sanitizeCodexDurabilityRef(tab.codexDurability) const { resumeSessionId: _resumeSessionId, sessionRef: _legacySessionRef, ...rest } = tab return { ...rest, ...(durableState.sessionRef ? { sessionRef: durableState.sessionRef } : {}), + ...(codexDurability ? { codexDurability } : {}), } as PersistedTab } @@ -164,6 +167,7 @@ function normalizeTerminalContent(content: Record<string, unknown>): Record<stri ? content.restoreError : undefined const { resumeSessionId: _resumeSessionId, sessionRef: _legacySessionRef, restoreError: _legacyRestoreError, ...rest } = content + const codexDurability = sanitizeCodexDurabilityRef(content.codexDurability) const isLegacyRecoveryFailed = ( rest.kind === 'terminal' && rest.mode === 'codex' @@ -180,6 +184,7 @@ function normalizeTerminalContent(content: Record<string, unknown>): Record<stri return { ...normalizedRuntime, ...(normalizedSessionRef ? { sessionRef: normalizedSessionRef } : {}), + ...(codexDurability ? { codexDurability } : {}), ...(normalizedRestoreError ? { restoreError: normalizedRestoreError } : {}), diff --git a/src/store/selectors/sidebarSelectors.ts b/src/store/selectors/sidebarSelectors.ts index 364d3aea6..2a1a4a430 100644 --- a/src/store/selectors/sidebarSelectors.ts +++ b/src/store/selectors/sidebarSelectors.ts @@ -7,6 +7,7 @@ import { getAgentChatProviderConfig } from '@/lib/agent-chat-utils' import { getSessionMetadata } from '@/lib/session-metadata' import type { SessionListMetadata } from '../types' import { getLeafDirectoryName, matchTitleTierMetadata } from '../../../shared/session-title-search.js' +import type { CodexDurabilityRef, CodexDurabilityStateName } from '../../../shared/codex-durability.js' export interface SidebarSessionItem { id: string @@ -30,6 +31,10 @@ export interface SidebarSessionItem { firstUserMessage?: string hasTitle: boolean isFallback?: true + isRestorable?: boolean + codexDurability?: CodexDurabilityRef + codexDurabilityState?: CodexDurabilityStateName + codexDurabilityReason?: string } const EMPTY_ACTIVITY: Record<string, number> = {} @@ -60,6 +65,28 @@ function getProjectName(projectPath: string): string { return getLeafDirectoryName(projectPath) ?? projectPath } +function getCodexDurabilitySessionId(durability?: CodexDurabilityRef): string | undefined { + return durability?.durableThreadId ?? durability?.candidate?.candidateThreadId +} + +function isCodexDurabilityRestorable(durability?: CodexDurabilityRef): boolean { + return Boolean(durability?.state === 'durable' && durability.durableThreadId) +} + +function getCodexDurabilityReason(durability?: CodexDurabilityRef): string | undefined { + return durability?.nonRestorableReason ?? durability?.lastProofFailure?.message ?? durability?.lastProofFailure?.reason +} + +type RunningSessionInfo = { + terminalId: string + createdAt: number + allTerminalIds: string[] + isRestorable?: boolean + codexDurability?: CodexDurabilityRef + codexDurabilityState?: CodexDurabilityStateName + codexDurabilityReason?: string +} + export function buildSessionItems( projects: RootState['sessions']['projects'], tabs: RootState['tabs']['tabs'], @@ -70,21 +97,57 @@ export function buildSessionItems( ): SidebarSessionItem[] { const items: SidebarSessionItem[] = [] const itemsByKey = new Map<string, SidebarSessionItem>() - const runningSessionMap = new Map<string, { terminalId: string; createdAt: number; allTerminalIds: string[] }>() + const runningSessionMap = new Map<string, RunningSessionInfo>() const tabSessionMap = new Map<string, { hasTab: boolean }>() for (const terminal of terminals || []) { - if (terminal.status === 'running' && terminal.sessionRef) { - const sessionKey = `${terminal.sessionRef.provider}:${terminal.sessionRef.sessionId}` + if (terminal.status === 'running') { + const codexDurabilitySessionId = terminal.mode === 'codex' + ? getCodexDurabilitySessionId(terminal.codexDurability) + : undefined + const sessionRef = terminal.sessionRef ?? ( + codexDurabilitySessionId + ? { provider: 'codex' as const, sessionId: codexDurabilitySessionId } + : undefined + ) + if (!sessionRef) continue + + const sessionKey = `${sessionRef.provider}:${sessionRef.sessionId}` + const isRestorable = sessionRef === terminal.sessionRef + ? true + : isCodexDurabilityRestorable(terminal.codexDurability) + const codexDurability = terminal.mode === 'codex' + ? terminal.codexDurability + : undefined + const codexDurabilityState = terminal.mode === 'codex' + ? terminal.codexDurability?.state + : undefined + const codexDurabilityReason = terminal.mode === 'codex' + ? getCodexDurabilityReason(terminal.codexDurability) + : undefined const existing = runningSessionMap.get(sessionKey) if (existing) { existing.allTerminalIds.push(terminal.terminalId) + existing.isRestorable = existing.isRestorable || isRestorable + existing.codexDurability = existing.codexDurability ?? codexDurability + if (!existing.codexDurabilityState || codexDurabilityState === 'durable') { + existing.codexDurabilityState = codexDurabilityState + } + existing.codexDurabilityReason = existing.codexDurabilityReason ?? codexDurabilityReason if (terminal.createdAt < existing.createdAt) { existing.terminalId = terminal.terminalId existing.createdAt = terminal.createdAt } } else { - runningSessionMap.set(sessionKey, { terminalId: terminal.terminalId, createdAt: terminal.createdAt, allTerminalIds: [terminal.terminalId] }) + runningSessionMap.set(sessionKey, { + terminalId: terminal.terminalId, + createdAt: terminal.createdAt, + allTerminalIds: [terminal.terminalId], + isRestorable, + codexDurability, + codexDurabilityState, + codexDurabilityReason, + }) } } } @@ -131,6 +194,10 @@ export function buildSessionItems( isNonInteractive: session.isNonInteractive, firstUserMessage: session.firstUserMessage, isFallback: undefined, + isRestorable: runningTerminal?.isRestorable, + codexDurability: runningTerminal?.codexDurability, + codexDurabilityState: runningTerminal?.codexDurabilityState, + codexDurabilityReason: runningTerminal?.codexDurabilityReason, } items.push(item) itemsByKey.set(key, item) @@ -147,11 +214,15 @@ export function buildSessionItems( cwd?: string timestamp?: number metadata?: SessionListMetadata + hasTab?: boolean + isRestorable?: boolean + codexDurability?: CodexDurabilityRef + codexDurabilityState?: CodexDurabilityStateName + codexDurabilityReason?: string }) => { const key = `${input.provider}:${input.sessionId}` const existing = itemsByKey.get(key) if (existing) { - existing.hasTab = true existing.timestamp = Math.max(existing.timestamp, input.timestamp ?? 0) const fallbackTitle = input.title?.trim() if (!existing.hasTitle && fallbackTitle) { @@ -168,6 +239,17 @@ export function buildSessionItems( if (!existing.firstUserMessage && input.metadata?.firstUserMessage) { existing.firstUserMessage = input.metadata.firstUserMessage } + existing.hasTab = existing.hasTab || (input.hasTab ?? true) + existing.isRestorable = existing.isRestorable || input.isRestorable + existing.codexDurability = existing.codexDurability + ?? input.codexDurability + ?? runningSessionMap.get(key)?.codexDurability + existing.codexDurabilityState = existing.codexDurabilityState + ?? input.codexDurabilityState + ?? runningSessionMap.get(key)?.codexDurabilityState + existing.codexDurabilityReason = existing.codexDurabilityReason + ?? input.codexDurabilityReason + ?? runningSessionMap.get(key)?.codexDurabilityReason if (existing.isSubagent === undefined && input.metadata?.isSubagent !== undefined) { existing.isSubagent = input.metadata.isSubagent } @@ -181,6 +263,7 @@ export function buildSessionItems( const runningTerminal = runningSessionMap.get(key) const runningTerminalId = runningTerminal?.terminalId const runningTerminalIds = runningTerminal?.allTerminalIds + const hasTab = input.hasTab ?? true const item: SidebarSessionItem = { id: `session-${input.provider}-${input.sessionId}`, sessionId: input.sessionId, @@ -192,7 +275,7 @@ export function buildSessionItems( projectPath: input.cwd, timestamp: input.timestamp ?? 0, cwd: input.cwd, - hasTab: true, + hasTab, ratchetedActivity: sessionActivity[key], isRunning: !!runningTerminalId, runningTerminalId, @@ -201,6 +284,10 @@ export function buildSessionItems( isNonInteractive: input.metadata?.isNonInteractive, firstUserMessage: input.metadata?.firstUserMessage, isFallback: true, + isRestorable: input.isRestorable ?? runningTerminal?.isRestorable, + codexDurability: input.codexDurability ?? runningTerminal?.codexDurability, + codexDurabilityState: input.codexDurabilityState ?? runningTerminal?.codexDurabilityState, + codexDurabilityReason: input.codexDurabilityReason ?? runningTerminal?.codexDurabilityReason, } items.push(item) itemsByKey.set(key, item) @@ -238,7 +325,26 @@ export function buildSessionItems( if (node.content.kind !== 'terminal') return if (node.content.mode === 'shell') return const sessionRef = node.content.sessionRef - if (!sessionRef) return + if (!sessionRef) { + const codexDurability = node.content.mode === 'codex' + ? node.content.codexDurability + : undefined + const codexSessionId = getCodexDurabilitySessionId(codexDurability) + if (!codexSessionId) return + pushFallbackItem({ + provider: 'codex', + sessionId: codexSessionId, + sessionType: 'codex', + title: paneTitle || tab.title, + cwd: node.content.initialCwd, + timestamp: fallbackTimestamp, + isRestorable: isCodexDurabilityRestorable(codexDurability), + codexDurability, + codexDurabilityState: codexDurability?.state, + codexDurabilityReason: getCodexDurabilityReason(codexDurability), + }) + return + } const metadata = getSessionMetadata(tab, sessionRef.provider, sessionRef.sessionId) pushFallbackItem({ @@ -275,6 +381,25 @@ export function buildSessionItems( }) } + for (const terminal of terminals || []) { + if (terminal.status !== 'running' || terminal.mode !== 'codex' || terminal.sessionRef) continue + const codexSessionId = getCodexDurabilitySessionId(terminal.codexDurability) + if (!codexSessionId) continue + pushFallbackItem({ + provider: 'codex', + sessionId: codexSessionId, + sessionType: 'codex', + title: terminal.title, + cwd: terminal.cwd, + timestamp: terminal.lastActivityAt, + hasTab: false, + isRestorable: isCodexDurabilityRestorable(terminal.codexDurability), + codexDurability: terminal.codexDurability, + codexDurabilityState: terminal.codexDurability?.state, + codexDurabilityReason: getCodexDurabilityReason(terminal.codexDurability), + }) + } + return items } diff --git a/src/store/storage-migration.ts b/src/store/storage-migration.ts index d2b6da6e0..9b939a0e0 100644 --- a/src/store/storage-migration.ts +++ b/src/store/storage-migration.ts @@ -22,6 +22,7 @@ import { migrateLegacyTerminalDurableState, sanitizeSessionRef, } from '@shared/session-contract' +import { sanitizeCodexDurabilityRef } from '@shared/codex-durability' const log = createLogger('StorageMigration') @@ -57,10 +58,12 @@ function normalizeLayoutTab(tab: Record<string, unknown>): Record<string, unknow sessionRef: tab.sessionRef, resumeSessionId: typeof tab.resumeSessionId === 'string' ? tab.resumeSessionId : undefined, }) + const codexDurability = sanitizeCodexDurabilityRef(tab.codexDurability) const { resumeSessionId: _resumeSessionId, sessionRef: _legacySessionRef, ...rest } = tab return { ...rest, ...(durableState.sessionRef ? { sessionRef: durableState.sessionRef } : {}), + ...(codexDurability ? { codexDurability } : {}), } } @@ -113,6 +116,7 @@ function normalizeLayoutNode(node: unknown): unknown { resumeSessionId: typeof content.resumeSessionId === 'string' ? content.resumeSessionId : undefined, }) const { resumeSessionId: _resumeSessionId, sessionRef: _legacySessionRef, restoreError: _legacyRestoreError, ...rest } = content + const codexDurability = sanitizeCodexDurabilityRef(content.codexDurability) const normalizedRuntime = normalizeLegacyRecoveryFailedTerminal(rest, durableState) const isLegacyRecoveryFailed = ( rest.kind === 'terminal' @@ -127,6 +131,7 @@ function normalizeLayoutNode(node: unknown): unknown { content: { ...normalizedRuntime, ...(normalizedSessionRef ? { sessionRef: normalizedSessionRef } : {}), + ...(codexDurability ? { codexDurability } : {}), ...(!isLegacyRecoveryFailed && durableState.restoreError ? { restoreError: durableState.restoreError } : {}), }, } diff --git a/src/store/tabsSlice.ts b/src/store/tabsSlice.ts index 48360b70f..96315f4a3 100644 --- a/src/store/tabsSlice.ts +++ b/src/store/tabsSlice.ts @@ -23,6 +23,8 @@ import { createLogger } from '@/lib/client-logger' import { mergeSessionMetadataByKey, sessionMetadataKey } from '@/lib/session-metadata' import { mergeSessionMetadataForPreferredResumeId } from './persistControl' import { migrateLegacyTerminalDurableState, sanitizeSessionRef } from '@shared/session-contract' +import type { CodexDurabilityRef } from '@shared/codex-durability' +import { sanitizeCodexDurabilityRef } from '@shared/codex-durability' import { sanitizeTabsAgainstLayouts } from '@/lib/tab-fallback-identity' @@ -69,6 +71,7 @@ function migrateTabFields(t: Tab): Tab { sessionRef: (t as any).sessionRef, resumeSessionId: t.resumeSessionId, }) + const codexDurability = sanitizeCodexDurabilityRef((t as any).codexDurability) return { ...rest, codingCliSessionId: t.codingCliSessionId || legacyClaudeSessionId, @@ -79,6 +82,7 @@ function migrateTabFields(t: Tab): Tab { mode: t.mode || 'shell', shell: t.shell || 'system', sessionRef: durableState.sessionRef, + codexDurability, resumeSessionId: undefined, lastInputAt: t.lastInputAt, } @@ -232,6 +236,7 @@ type AddTabPayload = { shell?: ShellType initialCwd?: string sessionRef?: Tab['sessionRef'] + codexDurability?: Tab['codexDurability'] serverInstanceId?: string resumeSessionId?: string sessionMetadataByKey?: Tab['sessionMetadataByKey'] @@ -254,6 +259,7 @@ export const tabsSlice = createSlice({ const codingCliProvider = payload.codingCliProvider || (legacyClaudeSessionId ? 'claude' : undefined) const sessionRef = sanitizeSessionRef(payload.sessionRef) + const codexDurability = sanitizeCodexDurabilityRef(payload.codexDurability) const tab: Tab = { id, createRequestId: payload.createRequestId || id, @@ -267,6 +273,7 @@ export const tabsSlice = createSlice({ shell: payload.shell || 'system', initialCwd: payload.initialCwd, sessionRef, + codexDurability, serverInstanceId: payload.serverInstanceId, resumeSessionId: undefined, sessionMetadataByKey: payload.sessionMetadataByKey, @@ -524,7 +531,7 @@ export const reopenClosedTab = createAsyncThunk( export const openSessionTab = createAsyncThunk( 'tabs/openSessionTab', async ( - { sessionId, title, cwd, provider, sessionType, terminalId, forceNew, firstUserMessage, isSubagent, isNonInteractive }: { + { sessionId, title, cwd, provider, sessionType, terminalId, forceNew, firstUserMessage, isSubagent, isNonInteractive, isRestorable, codexDurability }: { sessionId: string title?: string cwd?: string @@ -535,6 +542,8 @@ export const openSessionTab = createAsyncThunk( firstUserMessage?: string isSubagent?: boolean isNonInteractive?: boolean + isRestorable?: boolean + codexDurability?: CodexDurabilityRef }, { dispatch, getState } ) => { @@ -556,6 +565,7 @@ export const openSessionTab = createAsyncThunk( const buildSessionMetadataByKey = (existing?: Tab['sessionMetadataByKey']) => mergeSessionMetadataByKey(existing, resolvedProvider, sessionId, sessionMetadataInput) + const shouldPersistSessionRef = isRestorable !== false const desiredResumeContent = buildResumeContent({ sessionType: resolvedSessionType, @@ -563,6 +573,19 @@ export const openSessionTab = createAsyncThunk( cwd, agentChatProviderSettings: providerSettings, }) + const terminalCodexDurability = resolvedProvider === 'codex' + && !shouldPersistSessionRef + && codexDurability?.candidate + ? codexDurability + : undefined + const desiredOpenContent = shouldPersistSessionRef || desiredResumeContent.kind !== 'terminal' + ? desiredResumeContent + : ({ + kind: 'terminal' as const, + mode: resolvedProvider, + initialCwd: cwd, + codexDurability: terminalCodexDurability, + }) const updateExistingTabMetadata = (tab: Tab | undefined) => { if (!tab) return @@ -598,7 +621,16 @@ export const openSessionTab = createAsyncThunk( && resolvedProvider === 'claude' && content.resumeSessionId === sessionId ) - if (matchesExplicitSessionRef || matchesImplicitSessionRef) { + const matchesCodexDurability = ( + resolvedProvider === 'codex' + && content.kind === 'terminal' + && content.mode === 'codex' + && ( + content.codexDurability?.durableThreadId === sessionId + || content.codexDurability?.candidate?.candidateThreadId === sessionId + ) + ) + if (matchesExplicitSessionRef || matchesImplicitSessionRef || matchesCodexDurability) { matchingLeaves.push({ id: node.id, content }) } return @@ -647,10 +679,11 @@ export const openSessionTab = createAsyncThunk( mode: resolvedProvider, codingCliProvider: resolvedProvider, initialCwd: cwd, - sessionRef: desiredResumeContent.kind === 'terminal' || desiredResumeContent.kind === 'agent-chat' + sessionRef: shouldPersistSessionRef && (desiredResumeContent.kind === 'terminal' || desiredResumeContent.kind === 'agent-chat') ? desiredResumeContent.sessionRef : undefined, - sessionMetadataByKey: buildSessionMetadataByKey(), + codexDurability: terminalCodexDurability, + sessionMetadataByKey: shouldPersistSessionRef ? buildSessionMetadataByKey() : undefined, })) dispatch(initLayout({ tabId, @@ -658,7 +691,8 @@ export const openSessionTab = createAsyncThunk( kind: 'terminal', mode: resolvedProvider, terminalId, - sessionRef: desiredResumeContent.kind === 'terminal' ? desiredResumeContent.sessionRef : undefined, + sessionRef: shouldPersistSessionRef && desiredResumeContent.kind === 'terminal' ? desiredResumeContent.sessionRef : undefined, + codexDurability: terminalCodexDurability, initialCwd: cwd, status: 'running', }, @@ -708,12 +742,13 @@ export const openSessionTab = createAsyncThunk( mode: resolvedProvider, codingCliProvider: resolvedProvider, initialCwd: cwd, - sessionRef: desiredResumeContent.kind === 'terminal' ? desiredResumeContent.sessionRef : undefined, - sessionMetadataByKey: buildSessionMetadataByKey(), + sessionRef: shouldPersistSessionRef && desiredResumeContent.kind === 'terminal' ? desiredResumeContent.sessionRef : undefined, + codexDurability: terminalCodexDurability, + sessionMetadataByKey: shouldPersistSessionRef ? buildSessionMetadataByKey() : undefined, })) dispatch(initLayout({ tabId, - content: desiredResumeContent, + content: desiredOpenContent, })) } ) diff --git a/src/store/types.ts b/src/store/types.ts index 828985bd4..f8a020248 100644 --- a/src/store/types.ts +++ b/src/store/types.ts @@ -20,6 +20,7 @@ import type { WorktreeGrouping, } from '@shared/settings' import type { CodingCliProviderName, TokenSummary, SessionLocator } from '@shared/ws-protocol' +import type { CodexDurabilityRef } from '@shared/codex-durability' export type { CodingCliProviderName } // TabMode includes 'shell' for regular terminals, plus all coding CLI providers @@ -57,6 +58,7 @@ export interface Tab { shell?: ShellType initialCwd?: string sessionRef?: SessionLocator + codexDurability?: CodexDurabilityRef serverInstanceId?: string resumeSessionId?: string // Legacy migration field; canonical durable identity lives in sessionRef sessionMetadataByKey?: Record<string, SessionListMetadata> @@ -77,6 +79,7 @@ export interface BackgroundTerminal { hasClients: boolean mode?: TabMode sessionRef?: SessionLocator + codexDurability?: CodexDurabilityRef } export interface CodingCliSession { diff --git a/test/e2e/agent-chat-restore-flow.test.tsx b/test/e2e/agent-chat-restore-flow.test.tsx index 18fb21ea8..a1d48cb46 100644 --- a/test/e2e/agent-chat-restore-flow.test.tsx +++ b/test/e2e/agent-chat-restore-flow.test.tsx @@ -159,12 +159,14 @@ describe('agent chat restore flow', () => { content: pane, })) + const durableSessionId = '00000000-0000-4000-8000-000000000111' + getAgentTimelinePage.mockResolvedValue({ - sessionId: 'cli-session-1', + sessionId: durableSessionId, items: [ { turnId: 'turn-2', - sessionId: 'cli-session-1', + sessionId: durableSessionId, role: 'assistant', summary: 'Recent summary', timestamp: '2026-03-10T10:01:00.000Z', @@ -174,7 +176,7 @@ describe('agent chat restore flow', () => { revision: 2, bodies: { 'turn-2': { - sessionId: 'cli-session-1', + sessionId: durableSessionId, turnId: 'turn-2', message: { role: 'assistant', @@ -197,7 +199,7 @@ describe('agent chat restore flow', () => { sessionId: 'sdk-sess-1', latestTurnId: 'turn-2', status: 'running', - timelineSessionId: 'cli-session-1', + timelineSessionId: durableSessionId, revision: 2, streamingActive: true, streamingText: 'partial reply', @@ -209,7 +211,7 @@ describe('agent chat restore flow', () => { await waitFor(() => { expect(getAgentTimelinePage).toHaveBeenCalledWith( - 'cli-session-1', + durableSessionId, expect.objectContaining({ priority: 'visible', includeBodies: true }), expect.anything(), ) @@ -222,11 +224,18 @@ describe('agent chat restore flow', () => { await waitFor(() => { const root = store.getState().panes.layouts.t1 const leaf = root && findLeaf(root, 'p1') - expect(leaf?.content.kind === 'agent-chat' ? leaf.content.resumeSessionId : undefined).toBe('cli-session-1') + expect(leaf?.content.kind === 'agent-chat' ? leaf.content.sessionRef : undefined).toEqual({ + provider: 'claude', + sessionId: durableSessionId, + }) const tab = store.getState().tabs.tabs.find((entry) => entry.id === 't1') - expect(tab?.resumeSessionId).toBe('cli-session-1') - expect(tab?.sessionMetadataByKey?.['claude:cli-session-1']).toEqual(expect.objectContaining({ + expect(tab?.sessionRef).toEqual({ + provider: 'claude', + sessionId: durableSessionId, + }) + expect(tab?.resumeSessionId).toBeUndefined() + expect(tab?.sessionMetadataByKey?.[`claude:${durableSessionId}`]).toEqual(expect.objectContaining({ sessionType: 'freshclaude', firstUserMessage: 'Continue from the old tab', })) diff --git a/test/e2e/agent-cli-flow.test.ts b/test/e2e/agent-cli-flow.test.ts index 65a4c528f..eea80af56 100644 --- a/test/e2e/agent-cli-flow.test.ts +++ b/test/e2e/agent-cli-flow.test.ts @@ -138,6 +138,15 @@ async function waitForExpect(assertions: () => void, timeoutMs = 2000, intervalM throw lastError ?? new Error('Timed out waiting for expectations to pass') } +function findPaneContent(node: any, paneId: string): any | undefined { + if (!node) return undefined + if (node.type === 'leaf') return node.id === paneId ? node.content : undefined + if (node.type === 'split') { + return findPaneContent(node.children?.[0], paneId) ?? findPaneContent(node.children?.[1], paneId) + } + return undefined +} + describe('cli e2e flow', () => { it('runs list-tabs end-to-end', async () => { const { url, close } = await startTestServer() @@ -427,6 +436,87 @@ describe('cli e2e flow', () => { } }) + it('passes canonical Codex session refs through new-tab, split-pane, and respawn-pane', async () => { + const server = await startTestServerWithRealLayoutStore() + try { + const created = await runCliJson<{ data: { tabId: string; paneId: string } }>(server.url, [ + 'new-tab', + '--mode', + 'codex', + '--session-ref', + 'codex:thread-cli-new', + ]) + const tabId = created.data.tabId + const firstPaneId = created.data.paneId + + await waitForExpect(() => { + const snapshot = (server.layoutStore as any).snapshot + expect(findPaneContent(snapshot.layouts[tabId], firstPaneId)).toEqual(expect.objectContaining({ + mode: 'codex', + sessionRef: { provider: 'codex', sessionId: 'thread-cli-new' }, + })) + }) + + const split = await runCliJson<{ data: { paneId: string } }>(server.url, [ + 'split-pane', + '-t', + firstPaneId, + '--mode', + 'codex', + '--session-ref=codex:thread-cli-split', + ]) + + await runCliJson<{ data: { terminalId: string } }>(server.url, [ + 'respawn-pane', + '-t', + firstPaneId, + '--mode', + 'codex', + '--session-ref', + 'codex:thread-cli-respawn', + ]) + + await waitForExpect(() => { + const snapshot = (server.layoutStore as any).snapshot + expect(findPaneContent(snapshot.layouts[tabId], firstPaneId)).toEqual(expect.objectContaining({ + mode: 'codex', + sessionRef: { provider: 'codex', sessionId: 'thread-cli-respawn' }, + })) + expect(findPaneContent(snapshot.layouts[tabId], split.data.paneId)).toEqual(expect.objectContaining({ + mode: 'codex', + sessionRef: { provider: 'codex', sessionId: 'thread-cli-split' }, + })) + }) + } finally { + await server.close() + } + }) + + it('rejects raw Codex resume ids in new-tab, split-pane, and respawn-pane', async () => { + const server = await startTestServerWithRealLayoutStore() + try { + const created = await runCliJson<{ data: { paneId: string } }>(server.url, [ + 'new-tab', + '--mode', + 'codex', + ]) + + const commands = [ + ['new-tab', '--mode', 'codex', '--resume', 'thread-raw-new'], + ['split-pane', '-t', created.data.paneId, '--mode', 'codex', '--resume', 'thread-raw-split'], + ['respawn-pane', '-t', created.data.paneId, '--mode', 'codex', '--resume', 'thread-raw-respawn'], + ] + + for (const args of commands) { + const output = await runCliResult(server.url, args) + expect(output.code).toBe(1) + expect(output.stderr).toContain('Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.') + } + } finally { + await server.close() + } + }) + it('lists and resolves derived pane titles without an explicit rename', async () => { const server = await startTestServerWithRealLayoutStore() try { diff --git a/test/e2e/agent-cli-screenshot-smoke.test.ts b/test/e2e/agent-cli-screenshot-smoke.test.ts index 858d81587..851af8edb 100644 --- a/test/e2e/agent-cli-screenshot-smoke.test.ts +++ b/test/e2e/agent-cli-screenshot-smoke.test.ts @@ -45,7 +45,7 @@ function createFakeRegistry() { const input = vi.fn((terminalId: string, data: unknown) => { const record = records.get(terminalId) - if (!record || record.status !== 'running') return false + if (!record || record.status !== 'running') return { status: 'not_running' } const text = String(data ?? '') for (const ch of text) { @@ -61,7 +61,7 @@ function createFakeRegistry() { } record._pendingInput += ch } - return true + return { status: 'written' } }) const get = (terminalId: string) => records.get(terminalId) diff --git a/test/e2e/codex-refresh-rehydrate-flow.test.tsx b/test/e2e/codex-refresh-rehydrate-flow.test.tsx index 7585400ef..d5dcd213d 100644 --- a/test/e2e/codex-refresh-rehydrate-flow.test.tsx +++ b/test/e2e/codex-refresh-rehydrate-flow.test.tsx @@ -378,6 +378,168 @@ describe('codex refresh rehydrate flow (e2e)', () => { }) }) + it('recreates from captured Codex restore state after refresh when the live terminal id is gone', async () => { + const tabId = 'tab-codex-candidate-refresh' + const paneId = 'pane-codex-candidate-refresh' + const initialPaneContent: TerminalPaneContent = { + kind: 'terminal', + createRequestId: 'req-codex-candidate-refresh', + status: 'creating', + mode: 'codex', + shell: 'system', + } + const candidateDurability = { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-candidate-refresh', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout-candidate-refresh.jsonl', + source: 'thread_start_response', + capturedAt: 1715720000000, + }, + } + + const initialStore = createStore({ + tabs: { + tabs: [{ + id: tabId, + mode: 'codex', + status: 'creating', + title: 'Codex', + titleSetByUser: false, + createRequestId: 'req-codex-candidate-refresh', + }], + activeTabId: tabId, + }, + panes: { + layouts: { [tabId]: { type: 'leaf', id: paneId, content: initialPaneContent } }, + activePane: { [tabId]: paneId }, + paneTitles: {}, + }, + }) + + const firstRender = render( + <Provider store={initialStore}> + <TerminalViewFromStore tabId={tabId} paneId={paneId} /> + </Provider>, + ) + + act(() => { + wsHarness.emit({ + type: 'terminal.created', + requestId: 'req-codex-candidate-refresh', + terminalId: 'term-codex-candidate-old', + createdAt: 1, + }) + wsHarness.emit({ + type: 'terminal.codex.durability.updated', + terminalId: 'term-codex-candidate-old', + durability: candidateDurability, + }) + }) + + await waitFor(() => { + expect(sentMessages()).toContainEqual({ + type: 'terminal.codex.candidate.persisted', + terminalId: 'term-codex-candidate-old', + candidateThreadId: 'thread-candidate-refresh', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout-candidate-refresh.jsonl', + capturedAt: 1715720000000, + }) + const persisted = readPersistedLayoutSnapshotForTest() + expect(persisted?.tabs.tabs.find((tab) => tab.id === tabId)?.sessionRef).toBeUndefined() + expect((persisted?.panes.layouts[tabId] as any)?.content?.sessionRef).toBeUndefined() + expect((persisted?.panes.layouts[tabId] as any)?.content?.codexDurability).toEqual(candidateDurability) + }) + + const persisted = readPersistedLayoutSnapshotForTest() + expect(persisted).toBeTruthy() + + firstRender.unmount() + cleanup() + wsHarness.reset() + wsHarness.send.mockClear() + wsHarness.send.mockImplementation((msg: any) => { + wsHarness.rememberAttach(msg) + }) + resetPersistedLayoutCacheForTests() + + const restoredStore = createStore({ + tabs: { + tabs: persisted!.tabs.tabs, + activeTabId: persisted!.tabs.activeTabId, + }, + panes: { + layouts: persisted!.panes.layouts, + activePane: persisted!.panes.activePane, + paneTitles: persisted!.panes.paneTitles, + paneTitleSetByUser: persisted!.panes.paneTitleSetByUser, + }, + }) + + render( + <Provider store={restoredStore}> + <TerminalViewFromStore tabId={tabId} paneId={paneId} /> + </Provider>, + ) + + act(() => { + wsHarness.emit({ + type: 'error', + code: 'INVALID_TERMINAL_ID', + message: 'Unknown terminalId', + terminalId: 'term-codex-candidate-old', + }) + }) + + await waitFor(() => { + const recreated = sentMessages().find((msg) => ( + msg?.type === 'terminal.create' + && msg?.requestId !== 'req-codex-candidate-refresh' + )) + expect(recreated).toMatchObject({ + type: 'terminal.create', + mode: 'codex', + codexDurability: candidateDurability, + restore: true, + }) + expect(recreated?.sessionRef).toBeUndefined() + expect(recreated?.resumeSessionId).toBeUndefined() + }) + + const recreated = sentMessages().find((msg) => ( + msg?.type === 'terminal.create' + && msg?.requestId !== 'req-codex-candidate-refresh' + )) + expect(recreated?.requestId).toBeTruthy() + + act(() => { + wsHarness.emit({ + type: 'terminal.created', + requestId: recreated!.requestId, + terminalId: 'term-codex-candidate-fresh', + createdAt: 2, + clearCodexDurability: true, + restoreError: { + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }, + }) + }) + + await waitFor(() => { + const afterFreshCreate = readPersistedLayoutSnapshotForTest() + expect((afterFreshCreate?.panes.layouts[tabId] as any)?.content?.terminalId).toBe('term-codex-candidate-fresh') + expect((afterFreshCreate?.panes.layouts[tabId] as any)?.content?.codexDurability).toBeUndefined() + expect((afterFreshCreate?.panes.layouts[tabId] as any)?.content?.restoreError).toEqual({ + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }) + expect(afterFreshCreate?.tabs.tabs.find((tab) => tab.id === tabId)?.codexDurability).toBeUndefined() + }) + }) + it('reattaches a same-server live Codex terminal before any durable identity exists', async () => { const tabId = 'tab-codex-live' const paneId = 'pane-codex-live' @@ -429,7 +591,7 @@ describe('codex refresh rehydrate flow (e2e)', () => { expect(sentMessages().some((msg) => msg?.type === 'terminal.create')).toBe(false) }) - it('surfaces restore-unavailable instead of starting a fresh Codex session when a live-only terminal is gone', async () => { + it('asks the server to recover live-only Codex panes when the old terminal is gone', async () => { const tabId = 'tab-codex-live-only' const paneId = 'pane-codex-live-only' const store = createStore({ @@ -489,11 +651,14 @@ describe('codex refresh rehydrate flow (e2e)', () => { }) await waitFor(() => { - expect(sentMessages().slice(baselineMessages).some((msg) => msg?.type === 'terminal.create')).toBe(false) - expect((getTerminalPaneContent(store, tabId) as any)?.restoreError).toEqual({ - code: 'RESTORE_UNAVAILABLE', - reason: 'dead_live_handle', + const recreated = sentMessages().slice(baselineMessages).find((msg) => msg?.type === 'terminal.create') + expect(recreated).toMatchObject({ + type: 'terminal.create', + mode: 'codex', + restore: true, }) + expect(recreated?.sessionRef).toBeUndefined() + expect(recreated?.codexDurability).toBeUndefined() }) }) }) diff --git a/test/e2e/tabs-view-flow.test.tsx b/test/e2e/tabs-view-flow.test.tsx index a51ade9f4..6b34b13f4 100644 --- a/test/e2e/tabs-view-flow.test.tsx +++ b/test/e2e/tabs-view-flow.test.tsx @@ -153,6 +153,77 @@ describe('tabs view flow', () => { expect(copiedLayout?.content?.terminalId).toBeUndefined() }) + it('preserves candidate-only Codex durability state when pulling a registry tab', () => { + const store = configureStore({ + reducer: { + tabs: tabsReducer, + panes: panesReducer, + tabRegistry: tabRegistryReducer, + connection: connectionReducer, + }, + }) + store.dispatch(setServerInstanceId('srv-local')) + const codexDurability = { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: '019e2413-b8d0-7a98-b5fb-2f4af05baf58', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + source: 'thread_start_response', + capturedAt: 1778764200000, + }, + } as const + + store.dispatch(setTabRegistrySnapshot({ + localOpen: [], + remoteOpen: [{ + tabKey: 'remote:tab-codex-candidate', + tabId: 'tab-codex-candidate', + serverInstanceId: 'srv-remote', + deviceId: 'remote', + deviceLabel: 'remote-device', + tabName: 'codex candidate', + status: 'open', + revision: 2, + createdAt: 10, + updatedAt: 20, + paneCount: 1, + titleSetByUser: false, + panes: [{ + paneId: 'pane-codex-candidate', + kind: 'terminal', + payload: { + mode: 'codex', + codexDurability, + liveTerminal: { + terminalId: 'term-remote-candidate', + serverInstanceId: 'srv-remote', + }, + }, + }], + }], + closed: [], + })) + + render( + <Provider store={store}> + <TabsView /> + </Provider>, + ) + + const remoteCard = screen.getByLabelText('remote-device: codex candidate') + expect(remoteCard).toBeTruthy() + fireEvent.click(remoteCard) + + const copiedTab = store.getState().tabs.tabs[0] + expect(copiedTab?.title).toBe('codex candidate') + const copiedLayout = copiedTab ? (store.getState().panes.layouts[copiedTab.id] as any) : undefined + expect(copiedLayout?.content?.sessionRef).toBeUndefined() + expect(copiedLayout?.content?.codexDurability).toEqual(codexDurability) + expect(copiedLayout?.content?.terminalId).toBeUndefined() + }) + it('opens same-server tab copies with an explicit live terminal handle', () => { const store = configureStore({ reducer: { diff --git a/test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs b/test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs index 0c741f60b..fb1c478e4 100644 --- a/test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs +++ b/test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs @@ -3,6 +3,7 @@ import { WebSocketServer } from 'ws' import { spawn } from 'node:child_process' import fs from 'node:fs' +import path from 'node:path' if (process.argv[2] === 'fake-native-child') { process.on('SIGTERM', () => { @@ -69,9 +70,13 @@ function successResult(method, params) { } } if (method === 'thread/start') { + const threadId = behavior.threadStartThreadId || 'thread-new-1' + const rolloutPath = behavior.threadStartRolloutPath || behavior.rolloutPath return { thread: { - id: 'thread-new-1', + id: threadId, + ...(rolloutPath ? { path: rolloutPath } : {}), + ...(typeof behavior.threadStartEphemeral === 'boolean' ? { ephemeral: behavior.threadStartEphemeral } : {}), }, cwd: params?.cwd ?? process.cwd(), model: 'fixture-model', @@ -85,9 +90,12 @@ function successResult(method, params) { } } if (method === 'thread/resume') { + const rolloutPath = behavior.threadResumeRolloutPath || behavior.rolloutPath return { thread: { id: params?.threadId, + ...(rolloutPath ? { path: rolloutPath } : {}), + ...(typeof behavior.threadResumeEphemeral === 'boolean' ? { ephemeral: behavior.threadResumeEphemeral } : {}), }, cwd: params?.cwd ?? process.cwd(), model: 'fixture-model', @@ -108,8 +116,37 @@ function successResult(method, params) { return {} } +function maybeWriteRolloutForMethod(method, params) { + const spec = behavior.writeRolloutOnMethods?.[method] + if (!spec?.path) return + const threadId = spec.threadId || params?.threadId || behavior.threadStartThreadId || 'thread-new-1' + fs.mkdirSync(path.dirname(spec.path), { recursive: true }) + const line = JSON.stringify(spec.record || { + type: 'session_meta', + payload: { id: threadId }, + }) + '\n' + if (spec.append) { + fs.appendFileSync(spec.path, line, 'utf8') + } else { + fs.writeFileSync(spec.path, line, 'utf8') + } +} + const listenUrl = parseListenUrl(process.argv.slice(2)) const behavior = loadBehavior() +if (process.env.FAKE_CODEX_APP_SERVER_ARG_LOG) { + fs.writeFileSync(process.env.FAKE_CODEX_APP_SERVER_ARG_LOG, JSON.stringify({ + argv: process.argv.slice(2), + env: { + FRESHELL: process.env.FRESHELL, + FRESHELL_URL: process.env.FRESHELL_URL, + FRESHELL_TOKEN: process.env.FRESHELL_TOKEN, + FRESHELL_TERMINAL_ID: process.env.FRESHELL_TERMINAL_ID, + FRESHELL_TAB_ID: process.env.FRESHELL_TAB_ID, + FRESHELL_PANE_ID: process.env.FRESHELL_PANE_ID, + }, + }), 'utf8') +} const closeSocketAfterMethodsOnce = new Set(behavior.closeSocketAfterMethodsOnce || []) const url = new URL(listenUrl) const host = url.hostname @@ -196,6 +233,7 @@ wss.on('connection', (socket) => { id: message.id, result: override?.result ?? successResult(method, message.params), })) + maybeWriteRolloutForMethod(method, message.params) for (const notification of behavior.notificationsAfterMethods?.[method] || []) { socket.send(JSON.stringify(notification)) } diff --git a/test/helpers/coding-cli/fake-codex-launch-planner.ts b/test/helpers/coding-cli/fake-codex-launch-planner.ts index f669d025f..f1f32712c 100644 --- a/test/helpers/coding-cli/fake-codex-launch-planner.ts +++ b/test/helpers/coding-cli/fake-codex-launch-planner.ts @@ -3,8 +3,6 @@ export const DEFAULT_CODEX_REMOTE_WS_URL = 'ws://127.0.0.1:43123' export class FakeCodexLaunchSidecar { adoptCalls: Array<{ terminalId: string; generation: number }> = [] shutdownCalls = 0 - waitForLoadedThreadCalls: Array<{ threadId: string; options?: { timeoutMs?: number; pollMs?: number } }> = [] - waitForLoadedThreadError: Error | null = null shutdownError: Error | null = null shutdownStarted = false private lifecycleLossHandlers = new Set<(event: unknown) => void>() @@ -13,10 +11,6 @@ export class FakeCodexLaunchSidecar { this.adoptCalls.push(input) } - async listLoadedThreads() { - return ['thread-new-1'] - } - async shutdown() { if (this.shutdownStarted) return this.shutdownStarted = true @@ -24,11 +18,6 @@ export class FakeCodexLaunchSidecar { if (this.shutdownError) throw this.shutdownError } - async waitForLoadedThread(threadId: string, options?: { timeoutMs?: number; pollMs?: number }) { - this.waitForLoadedThreadCalls.push({ threadId, options }) - if (this.waitForLoadedThreadError) throw this.waitForLoadedThreadError - } - onLifecycleLoss(handler: (event: unknown) => void) { this.lifecycleLossHandlers.add(handler) return () => this.lifecycleLossHandlers.delete(handler) @@ -44,6 +33,7 @@ export class FakeCodexLaunchSidecar { export class FakeCodexLaunchPlanner { planCreateCalls: any[] = [] sidecar = new FakeCodexLaunchSidecar() + private failuresRemaining = 0 constructor( private readonly plan: { @@ -56,8 +46,16 @@ export class FakeCodexLaunchPlanner { }, ) {} + failNext(count: number) { + this.failuresRemaining = Math.max(0, count) + } + async planCreate(input: any) { this.planCreateCalls.push(input) + if (this.failuresRemaining > 0) { + this.failuresRemaining -= 1 + throw new Error('fake Codex launch failed') + } return { ...this.plan, sidecar: this.plan.sidecar ?? this.sidecar, diff --git a/test/helpers/coding-cli/real-session-contract-harness.ts b/test/helpers/coding-cli/real-session-contract-harness.ts index 1e5644896..e77dd8b55 100644 --- a/test/helpers/coding-cli/real-session-contract-harness.ts +++ b/test/helpers/coding-cli/real-session-contract-harness.ts @@ -74,6 +74,7 @@ type OpencodeFacts = { canonicalIdentity: 'session-id' runEventSessionIdMatchesDbId: boolean busyStatusUsesAuthoritativeSessionId: boolean + attachFormatJsonEmitsEvents: boolean titleOnResumeMutatesStoredTitle: boolean sessionSubcommands: string[] } @@ -514,23 +515,32 @@ export class ProbeWorkspace { stderr += chunk }) + const exitPromise = new Promise<ExitSummary>((resolve, reject) => { + child.once('error', reject) + child.once('close', (code, signal) => { + resolve({ + code, + signal, + }) + }) + }) + const waitForExit = (timeoutMs = 30_000) => new Promise<ExitSummary>((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error(`Timed out waiting for process ${command} (${child.pid}) to exit.`)) }, timeoutMs) - child.once('error', (error) => { - clearTimeout(timeout) - reject(error) - }) - child.once('close', (code, signal) => { - clearTimeout(timeout) - resolve({ - code, - signal, - }) - }) + exitPromise.then( + (summary) => { + clearTimeout(timeout) + resolve(summary) + }, + (error) => { + clearTimeout(timeout) + reject(error) + }, + ) }) const stop = async () => { diff --git a/test/integration/real/codex-app-server-readiness-contract.test.ts b/test/integration/real/codex-app-server-readiness-contract.test.ts index baa93e84e..8c222a6f0 100644 --- a/test/integration/real/codex-app-server-readiness-contract.test.ts +++ b/test/integration/real/codex-app-server-readiness-contract.test.ts @@ -242,7 +242,7 @@ describe('real Codex app-server durable readiness contract', () => { await actor.initialize() const resumed = await actor.resumeThread({ threadId: durableThreadId, cwd: process.cwd() }) - expect(resumed.thread.id).toBe(durableThreadId) + expect(resumed.threadId).toBe(durableThreadId) const readiness = await waitForLifecycle( lifecycle, diff --git a/test/integration/real/coding-cli-session-contract.test.ts b/test/integration/real/coding-cli-session-contract.test.ts index 699242022..3242dbf49 100644 --- a/test/integration/real/coding-cli-session-contract.test.ts +++ b/test/integration/real/coding-cli-session-contract.test.ts @@ -457,7 +457,7 @@ describe.sequential('coding cli real provider session contract', () => { opencodePath, [ 'run', - 'Explain the purpose of this repository in one sentence.', + 'Write ten short sentences about terminal multiplexers. Do not use bullets.', '--format', 'json', '--dangerously-skip-permissions', @@ -470,12 +470,13 @@ describe.sequential('coding cli real provider session contract', () => { ) const busyStatusPromise = waitForAnyHttpBusyStatus(statusUrl) - const attachedStepStart = await waitForJsonLine(attachedRun, (value) => value?.type === 'step_start', 60_000) - const attachedSessionId = attachedStepStart.sessionID as string const busyStatus = await busyStatusPromise - expect(busyStatus.sessionId).toBe(attachedSessionId) - expect(busyStatus.payload[attachedSessionId]).toEqual({ type: 'busy' }) + expect(busyStatus.payload[busyStatus.sessionId]).toEqual({ type: 'busy' }) + const attachedDbRow = await waitForOpencodeDbSession(homes.dbPath, busyStatus.sessionId) + expect(attachedDbRow.id).toBe(busyStatus.sessionId) expect((await attachedRun.waitForExit(120_000)).code).toBe(0) + expect(note.providers.opencode.attachFormatJsonEmitsEvents).toBe(false) + expect(attachedRun.stdout().trim()).toBe('') const titledRun = await workspace.spawnProcess( opencodePath, diff --git a/test/integration/server/codex-real-provider-smoke.test.ts b/test/integration/server/codex-real-provider-smoke.test.ts index 8fb0e4295..975210abe 100644 --- a/test/integration/server/codex-real-provider-smoke.test.ts +++ b/test/integration/server/codex-real-provider-smoke.test.ts @@ -171,10 +171,6 @@ async function prepareRealProviderCodexHome(targetCodexHome: string): Promise<{ return { sessionId: selected.id } } -function stripAnsi(value: string): string { - return value.replace(/\x1b\[[0-9;?]*[ -/]*[@-~]/g, '') -} - afterEach(async () => { await Promise.all([...registries].map(async (registry) => { registries.delete(registry) @@ -197,13 +193,6 @@ describe('Codex real-provider smoke', () => { const { sessionId } = await prepareRealProviderCodexHome(codexHome) const registry = new TerminalRegistry() registries.add(registry) - const outputChunks: string[] = [] - const outputHandler = (event: { data?: unknown }) => { - if (typeof event.data === 'string') { - outputChunks.push(stripAnsi(event.data)) - } - } - registry.on('terminal.output.raw', outputHandler) const previousCodexHome = process.env.CODEX_HOME process.env.CODEX_HOME = codexHome const planner = new CodexLaunchPlanner(() => new CodexAppServerRuntime({ @@ -235,14 +224,6 @@ describe('Codex real-provider smoke', () => { }, }) await resumePlan.sidecar.adopt({ terminalId: term.terminalId, generation: 0 }) - try { - await resumePlan.sidecar.waitForLoadedThread(sessionId, { timeoutMs: 20_000, pollMs: 250 }) - } catch (error) { - const outputTail = outputChunks.join('').slice(-1_000) - throw new Error( - `${error instanceof Error ? error.message : String(error)}\nCodex TUI output before failure:\n${outputTail}`, - ) - } const ownershipId = await readOwnershipId(metadataDir) await registry.killAndWait(term.terminalId) @@ -255,7 +236,6 @@ describe('Codex real-provider smoke', () => { } else { process.env.CODEX_HOME = previousCodexHome } - registry.off('terminal.output.raw', outputHandler) } }, 60_000) }) diff --git a/test/integration/server/codex-session-flow.test.ts b/test/integration/server/codex-session-flow.test.ts index e18eeb89e..1a276fc88 100644 --- a/test/integration/server/codex-session-flow.test.ts +++ b/test/integration/server/codex-session-flow.test.ts @@ -1,10 +1,11 @@ -import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' +import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' import fsp from 'fs/promises' import http from 'http' import os from 'os' import path from 'path' import express from 'express' import WebSocket from 'ws' +import { createRequire } from 'module' import { WsHandler } from '../../../server/ws-handler.js' import { TerminalRegistry } from '../../../server/terminal-registry.js' import { CodexAppServerRuntime } from '../../../server/coding-cli/codex-app-server/runtime.js' @@ -30,7 +31,7 @@ vi.mock('../../../server/logger', () => { child: vi.fn(), } logger.child.mockReturnValue(logger) - return { logger } + return { logger, sessionLifecycleLogger: logger } }) process.env.AUTH_TOKEN = 'test-token' @@ -40,16 +41,25 @@ const FAKE_APP_SERVER_PATH = path.resolve( process.cwd(), 'test/fixtures/coding-cli/codex-app-server/fake-app-server.mjs', ) +const requireForFixture = createRequire(import.meta.url) +const WS_MODULE_PATH = requireForFixture.resolve('ws') async function writeFakeCodexExecutable(binaryPath: string) { const script = `#!/usr/bin/env node const fs = require('fs') +let WebSocket function appendJsonLine(filePath, value) { if (!filePath) return fs.appendFileSync(filePath, JSON.stringify(value) + '\\n', 'utf8') } +function remoteUrlFromArgs(args) { + const index = args.indexOf('--remote') + if (index === -1 || index === args.length - 1) return undefined + return args[index + 1] +} + const argLogPath = process.env.FAKE_CODEX_ARG_LOG if (argLogPath) { fs.writeFileSync(argLogPath, JSON.stringify(process.argv.slice(2)), 'utf8') @@ -70,14 +80,64 @@ if (process.env.FAKE_CODEX_FIRST_LAUNCH_CLAIM_PATH) { } } +const remoteUrl = remoteUrlFromArgs(process.argv.slice(2)) +let remoteSocket +let remoteMessageId = 1 +let remoteThreadId +let remoteReady = false +if (process.env.FAKE_CODEX_CONNECT_REMOTE === '1' && remoteUrl) { + WebSocket = require(${JSON.stringify(WS_MODULE_PATH)}) + setTimeout(() => { + remoteSocket = new WebSocket(remoteUrl) + remoteSocket.on('open', () => { + remoteSocket.send(JSON.stringify({ + id: remoteMessageId++, + method: 'thread/start', + params: { cwd: process.cwd() }, + })) + }) + remoteSocket.on('message', (raw) => { + appendJsonLine(process.env.FAKE_CODEX_REMOTE_LOG, { + pid: process.pid, + message: JSON.parse(raw.toString('utf8')), + }) + const message = JSON.parse(raw.toString('utf8')) + const threadId = message && message.result && message.result.thread && message.result.thread.id + if (threadId) { + remoteThreadId = threadId + remoteReady = true + } + }) + remoteSocket.on('error', (error) => { + appendJsonLine(process.env.FAKE_CODEX_REMOTE_LOG, { + pid: process.pid, + error: error.message, + }) + }) + }, Number(process.env.FAKE_CODEX_REMOTE_CONNECT_DELAY_MS || 0)) +} + process.stdin.on('data', (chunk) => { appendJsonLine(process.env.FAKE_CODEX_INPUT_LOG, { pid: process.pid, data: chunk.toString('utf8'), }) + if (remoteSocket && remoteSocket.readyState === WebSocket.OPEN && remoteReady && remoteThreadId) { + remoteSocket.send(JSON.stringify({ + id: remoteMessageId++, + method: 'turn/start', + params: { + threadId: remoteThreadId, + input: chunk.toString('utf8'), + }, + })) + } }) -process.on('SIGTERM', () => process.exit(0)) +process.on('SIGTERM', () => { + if (remoteSocket) remoteSocket.close() + process.exit(0) +}) process.stdout.write('codex remote attached\\n') if (process.env.FAKE_CODEX_STAY_ALIVE === '1') { if ( @@ -142,6 +202,12 @@ function waitForMessage( }) } +async function killAllTerminals(registry: TerminalRegistry): Promise<void> { + await Promise.all( + registry.list().map((term) => registry.killAndWait(term.terminalId).catch(() => false)), + ) +} + async function waitForFile(filePath: string, timeoutMs = 3_000): Promise<void> { const deadline = Date.now() + timeoutMs while (Date.now() < deadline) { @@ -176,15 +242,6 @@ async function isProcessAlive(pid: number): Promise<boolean> { } } -async function waitForProcessExit(pid: number, timeoutMs = 5_000): Promise<void> { - const deadline = Date.now() + timeoutMs - while (Date.now() < deadline) { - if (!(await isProcessAlive(pid))) return - await new Promise((resolve) => setTimeout(resolve, 25)) - } - throw new Error(`Timed out waiting for process ${pid} to exit`) -} - async function readJsonLines(filePath: string): Promise<any[]> { const raw = await fsp.readFile(filePath, 'utf8').catch(() => '') return raw @@ -309,6 +366,7 @@ describe('Codex Session Flow Integration', () => { }) beforeEach(async () => { + await killAllTerminals(registry) delete process.env.FAKE_CODEX_APP_SERVER_BEHAVIOR await planner?.shutdown() await Promise.all([...runtimes].map((runtime) => runtime.shutdown())) @@ -330,6 +388,10 @@ describe('Codex Session Flow Integration', () => { await fsp.rm(argLogPath, { force: true }) }) + afterEach(async () => { + await killAllTerminals(registry) + }) + afterAll(async () => { if (previousCodexCmd === undefined) { delete process.env.CODEX_CMD @@ -351,7 +413,11 @@ describe('Codex Session Flow Integration', () => { await fsp.rm(tempDir, { recursive: true, force: true }) }) - it('starts the exact codex thread before PTY spawn and launches the TUI in remote mode', async () => { + it('launches fresh Codex in remote mode without treating the bootstrap id as durable', async () => { + const launchLogPath = path.join(tempDir, 'fresh-codex-launches.jsonl') + const previousLaunchLog = process.env.FAKE_CODEX_LAUNCH_LOG + process.env.FAKE_CODEX_LAUNCH_LOG = launchLogPath + await fsp.rm(launchLogPath, { force: true }) const ws = await createAuthenticatedWs(port) try { @@ -373,28 +439,143 @@ describe('Codex Session Flow Integration', () => { throw new Error(`terminal.create failed: ${created.message}`) } - expect(created.effectiveResumeSessionId).toBe('thread-new-1') + expect(created.effectiveResumeSessionId).toBeUndefined() const record = registry.get(created.terminalId) - expect(record?.resumeSessionId).toBe('thread-new-1') + expect(record?.resumeSessionId).toBeUndefined() - await waitForFile(argLogPath) - const recordedArgs = JSON.parse(await fsp.readFile(argLogPath, 'utf8')) + const launch = await waitForJsonLine(launchLogPath, (line) => line.pid === record?.pty.pid) + const recordedArgs = launch.args expect(recordedArgs.slice(0, 2)).toEqual([ '--remote', expect.stringMatching(/^ws:\/\/127\.0\.0\.1:\d+$/), ]) - expect(recordedArgs).toContain('resume') - expect(recordedArgs).toContain('thread-new-1') + const appsFlagIndex = recordedArgs.indexOf('features.apps=false') + expect(appsFlagIndex).toBeGreaterThan(0) + expect(recordedArgs[appsFlagIndex - 1]).toBe('-c') + expect(recordedArgs).not.toContain('resume') + expect(recordedArgs).not.toContain('thread-new-1') expect(recordedArgs).toContain('tui.notification_method=bel') expect(recordedArgs).not.toContain('--model') expect(recordedArgs).not.toContain('--sandbox') } finally { await closeWebSocket(ws) + if (previousLaunchLog === undefined) delete process.env.FAKE_CODEX_LAUNCH_LOG + else process.env.FAKE_CODEX_LAUNCH_LOG = previousLaunchLog } }) - it('restores a persisted Codex session without calling thread/resume on the app-server', async () => { + it('captures a fresh Codex restore identity from the fake TUI and promotes it after turn completion', async () => { + const testDir = await fsp.mkdtemp(path.join(tempDir, 'fresh-durable-flow-')) + const rolloutPath = path.join(testDir, 'rollout.jsonl') + const remoteLogPath = path.join(testDir, 'remote.jsonl') + const launchLogPath = path.join(testDir, 'codex-launches.jsonl') + const previousConnectRemote = process.env.FAKE_CODEX_CONNECT_REMOTE + const previousRemoteDelay = process.env.FAKE_CODEX_REMOTE_CONNECT_DELAY_MS + const previousRemoteLog = process.env.FAKE_CODEX_REMOTE_LOG + const previousStayAlive = process.env.FAKE_CODEX_STAY_ALIVE + const previousLaunchLog = process.env.FAKE_CODEX_LAUNCH_LOG + process.env.FAKE_CODEX_CONNECT_REMOTE = '1' + process.env.FAKE_CODEX_REMOTE_CONNECT_DELAY_MS = '25' + process.env.FAKE_CODEX_REMOTE_LOG = remoteLogPath + process.env.FAKE_CODEX_STAY_ALIVE = '1' + process.env.FAKE_CODEX_LAUNCH_LOG = launchLogPath + process.env.FAKE_CODEX_APP_SERVER_BEHAVIOR = JSON.stringify({ + threadStartThreadId: 'thread-fake-tui-durable', + threadStartRolloutPath: rolloutPath, + writeRolloutOnMethods: { + 'turn/start': { + path: rolloutPath, + threadId: 'thread-fake-tui-durable', + }, + }, + notificationsAfterMethods: { + 'turn/start': [{ + method: 'turn/completed', + params: { + threadId: 'thread-fake-tui-durable', + turnId: 'turn-1', + status: 'completed', + }, + }], + }, + }) + + const ws = await createAuthenticatedWs(port) + + try { + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId: 'test-req-codex-fake-tui', + mode: 'codex', + cwd: testDir, + })) + + const created = await waitForMessage( + ws, + (msg) => ( + msg.requestId === 'test-req-codex-fake-tui' + && (msg.type === 'terminal.created' || msg.type === 'error') + ), + ) + if (created.type === 'error') { + throw new Error(`terminal.create failed: ${created.message}`) + } + + await vi.waitFor(() => { + expect(registry.get(created.terminalId)?.codexDurability).toMatchObject({ + state: 'captured_pre_turn', + candidate: { + candidateThreadId: 'thread-fake-tui-durable', + rolloutPath, + }, + }) + }) + await waitForJsonLine(remoteLogPath, (line) => ( + line.pid === registry.get(created.terminalId)?.pty.pid + && line.message?.result?.thread?.id === 'thread-fake-tui-durable' + )) + + ws.send(JSON.stringify({ + type: 'terminal.input', + terminalId: created.terminalId, + data: 'hello from fake TUI\r', + })) + + await vi.waitFor(() => { + expect(registry.get(created.terminalId)?.resumeSessionId).toBe('thread-fake-tui-durable') + }) + expect(registry.get(created.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-fake-tui-durable', + }) + expect(registry.list().find((term) => term.terminalId === created.terminalId)?.sessionRef).toEqual({ + provider: 'codex', + sessionId: 'thread-fake-tui-durable', + }) + expect(await fsp.readFile(rolloutPath, 'utf8')).toContain('"thread-fake-tui-durable"') + } finally { + await closeWebSocket(ws) + if (previousConnectRemote === undefined) delete process.env.FAKE_CODEX_CONNECT_REMOTE + else process.env.FAKE_CODEX_CONNECT_REMOTE = previousConnectRemote + if (previousRemoteDelay === undefined) delete process.env.FAKE_CODEX_REMOTE_CONNECT_DELAY_MS + else process.env.FAKE_CODEX_REMOTE_CONNECT_DELAY_MS = previousRemoteDelay + if (previousRemoteLog === undefined) delete process.env.FAKE_CODEX_REMOTE_LOG + else process.env.FAKE_CODEX_REMOTE_LOG = previousRemoteLog + if (previousStayAlive === undefined) delete process.env.FAKE_CODEX_STAY_ALIVE + else process.env.FAKE_CODEX_STAY_ALIVE = previousStayAlive + if (previousLaunchLog === undefined) delete process.env.FAKE_CODEX_LAUNCH_LOG + else process.env.FAKE_CODEX_LAUNCH_LOG = previousLaunchLog + delete process.env.FAKE_CODEX_APP_SERVER_BEHAVIOR + await fsp.rm(testDir, { recursive: true, force: true }) + } + }) + + it('restores a persisted Codex session from canonical sessionRef', async () => { + const launchLogPath = path.join(tempDir, 'restore-codex-launches.jsonl') + const previousLaunchLog = process.env.FAKE_CODEX_LAUNCH_LOG + process.env.FAKE_CODEX_LAUNCH_LOG = launchLogPath + await fsp.rm(launchLogPath, { force: true }) process.env.FAKE_CODEX_APP_SERVER_BEHAVIOR = JSON.stringify({ loadedThreadIds: ['thread-existing-1'], overrides: { @@ -415,7 +596,11 @@ describe('Codex Session Flow Integration', () => { requestId: 'test-req-codex-restore', mode: 'codex', cwd: tempDir, - resumeSessionId: 'thread-existing-1', + restore: true, + sessionRef: { + provider: 'codex', + sessionId: 'thread-existing-1', + }, })) const created = await waitForMessage( @@ -429,13 +614,13 @@ describe('Codex Session Flow Integration', () => { throw new Error(`terminal.create failed: ${created.message}`) } - expect(created.effectiveResumeSessionId).toBe('thread-existing-1') + expect(created.effectiveResumeSessionId).toBeUndefined() const record = registry.get(created.terminalId) expect(record?.resumeSessionId).toBe('thread-existing-1') - await waitForFile(argLogPath) - const recordedArgs = JSON.parse(await fsp.readFile(argLogPath, 'utf8')) + const launch = await waitForJsonLine(launchLogPath, (line) => line.pid === record?.pty.pid) + const recordedArgs = launch.args expect(recordedArgs.slice(0, 2)).toEqual([ '--remote', expect.stringMatching(/^ws:\/\/127\.0\.0\.1:\d+$/), @@ -445,6 +630,8 @@ describe('Codex Session Flow Integration', () => { } finally { await closeWebSocket(ws) delete process.env.FAKE_CODEX_APP_SERVER_BEHAVIOR + if (previousLaunchLog === undefined) delete process.env.FAKE_CODEX_LAUNCH_LOG + else process.env.FAKE_CODEX_LAUNCH_LOG = previousLaunchLog } }) @@ -452,23 +639,19 @@ describe('Codex Session Flow Integration', () => { const testDir = await fsp.mkdtemp(path.join(tempDir, 'recovery-retire-')) const metadataDir = path.join(testDir, 'metadata') const oldNativePidFile = path.join(testDir, 'old-native.pid') - const replacementNativePidFile = path.join(testDir, 'replacement-native.pid') const launchLogPath = path.join(testDir, 'codex-launches.jsonl') const inputLogPath = path.join(testDir, 'codex-input.jsonl') - const oldSidecarShutdownSignalPath = path.join(testDir, 'old-sidecar-shutdown.signal') const firstLaunchClaimPath = path.join(testDir, 'first-tui.claim') await fsp.mkdir(metadataDir, { recursive: true }) const previousStayAlive = process.env.FAKE_CODEX_STAY_ALIVE const previousLaunchLog = process.env.FAKE_CODEX_LAUNCH_LOG const previousInputLog = process.env.FAKE_CODEX_INPUT_LOG - const previousExitWhenFileExists = process.env.FAKE_CODEX_EXIT_WHEN_FILE_EXISTS const previousFirstLaunchOnly = process.env.FAKE_CODEX_EXIT_WATCH_FIRST_LAUNCH_ONLY const previousFirstLaunchClaim = process.env.FAKE_CODEX_FIRST_LAUNCH_CLAIM_PATH process.env.FAKE_CODEX_STAY_ALIVE = '1' process.env.FAKE_CODEX_LAUNCH_LOG = launchLogPath process.env.FAKE_CODEX_INPUT_LOG = inputLogPath - process.env.FAKE_CODEX_EXIT_WHEN_FILE_EXISTS = oldSidecarShutdownSignalPath process.env.FAKE_CODEX_EXIT_WATCH_FIRST_LAUNCH_ONLY = '1' process.env.FAKE_CODEX_FIRST_LAUNCH_CLAIM_PATH = firstLaunchClaimPath @@ -481,8 +664,6 @@ describe('Codex Session Flow Integration', () => { FAKE_CODEX_APP_SERVER_BEHAVIOR: JSON.stringify({ spawnNativeChild: true, nativePidFile: oldNativePidFile, - wrapperLeavesNativeOnSigterm: true, - signalFileOnSigterm: oldSidecarShutdownSignalPath, delayExitOnSigtermMs: 200, loadedThreadIds: ['thread-existing-1'], }), @@ -496,7 +677,6 @@ describe('Codex Session Flow Integration', () => { env: { FAKE_CODEX_APP_SERVER_BEHAVIOR: JSON.stringify({ spawnNativeChild: true, - nativePidFile: replacementNativePidFile, wrapperLeavesNativeOnSigterm: true, loadedThreadIds: ['thread-existing-1'], }), @@ -507,15 +687,15 @@ describe('Codex Session Flow Integration', () => { const oldPlanner = new CodexLaunchPlanner(oldRuntime) const replacementPlanner = new CodexLaunchPlanner(replacementRuntime) let terminalId: string | undefined + let oldPtyPid: number | undefined try { const oldPlan = await oldPlanner.planCreate({ resumeSessionId: 'thread-existing-1' }) const oldNativePid = await waitForPidFile(oldNativePidFile) + expect(oldNativePid).toEqual(expect.any(Number)) const recovery = { planCreate: vi.fn(() => replacementPlanner.planCreate({ resumeSessionId: 'thread-existing-1' })), retryDelayMs: 0, - readinessTimeoutMs: 1_000, - readinessPollMs: 25, } const term = registry.create({ mode: 'codex', @@ -530,25 +710,28 @@ describe('Codex Session Flow Integration', () => { } as any, }) terminalId = term.terminalId - const oldPtyPid = term.pty.pid + await oldPlan.sidecar.adopt({ terminalId: term.terminalId, generation: 0 }) + oldPtyPid = term.pty.pid await waitForJsonLine(launchLogPath, (line) => line.pid === oldPtyPid) await (registry as any).runCodexRecoveryAttempt( registry.get(term.terminalId), 'thread-existing-1', ) - - const replacementNativePid = await waitForPidFile(replacementNativePidFile) - await waitForProcessExit(oldNativePid) - await waitForProcessExit(oldPtyPid) - expect(await isProcessAlive(replacementNativePid)).toBe(true) + const replacementLaunch = await waitForJsonLine( + launchLogPath, + (line) => line.pid !== oldPtyPid && Array.isArray(line.args) && line.args.includes('thread-existing-1'), + ) const latest = registry.get(term.terminalId) + expect(latest?.status).toBe('running') + expect(latest?.resumeSessionId).toBe('thread-existing-1') + expect(registry.findRunningTerminalBySession('codex', 'thread-existing-1')?.terminalId).toBe(term.terminalId) const replacementPtyPid = latest?.pty.pid expect(replacementPtyPid).toEqual(expect.any(Number)) - expect(replacementPtyPid).not.toBe(oldPtyPid) + expect(replacementPtyPid).toBe(replacementLaunch.pid) - expect(registry.input(term.terminalId, 'after recovery replacement\n')).toBe(true) + expect(registry.input(term.terminalId, 'after recovery replacement\n')).toEqual({ status: 'written' }) await waitForJsonLine( inputLogPath, (line) => line.pid === replacementPtyPid && line.data.includes('after recovery replacement'), @@ -556,6 +739,13 @@ describe('Codex Session Flow Integration', () => { const inputLines = await readJsonLines(inputLogPath) expect(inputLines.some((line) => line.pid === oldPtyPid && line.data.includes('after recovery replacement'))).toBe(false) } finally { + if (oldPtyPid && await isProcessAlive(oldPtyPid)) { + try { + process.kill(oldPtyPid, 'SIGKILL') + } catch { + // Best-effort cleanup for a fake PTY process that can outlive the assertion window under parallel load. + } + } if (terminalId) { await registry.killAndWait(terminalId).catch(() => undefined) } @@ -571,8 +761,6 @@ describe('Codex Session Flow Integration', () => { else process.env.FAKE_CODEX_LAUNCH_LOG = previousLaunchLog if (previousInputLog === undefined) delete process.env.FAKE_CODEX_INPUT_LOG else process.env.FAKE_CODEX_INPUT_LOG = previousInputLog - if (previousExitWhenFileExists === undefined) delete process.env.FAKE_CODEX_EXIT_WHEN_FILE_EXISTS - else process.env.FAKE_CODEX_EXIT_WHEN_FILE_EXISTS = previousExitWhenFileExists if (previousFirstLaunchOnly === undefined) delete process.env.FAKE_CODEX_EXIT_WATCH_FIRST_LAUNCH_ONLY else process.env.FAKE_CODEX_EXIT_WATCH_FIRST_LAUNCH_ONLY = previousFirstLaunchOnly if (previousFirstLaunchClaim === undefined) delete process.env.FAKE_CODEX_FIRST_LAUNCH_CLAIM_PATH diff --git a/test/server/agent-panes-write.test.ts b/test/server/agent-panes-write.test.ts index c05f0ac45..f54990fbf 100644 --- a/test/server/agent-panes-write.test.ts +++ b/test/server/agent-panes-write.test.ts @@ -3,6 +3,7 @@ import express from 'express' import request from 'supertest' import { createAgentApiRouter } from '../../server/agent-api/router' import { FakeCodexLaunchPlanner } from '../helpers/coding-cli/fake-codex-launch-planner.js' +import { INVALID_RAW_CODEX_RESUME_MESSAGE } from '../../server/coding-cli/codex-app-server/restore-decision.js' it('splits a pane horizontally', async () => { const app = express() @@ -127,6 +128,36 @@ it('kills the created Codex terminal when split adoption fails after registry.cr expect(attachPaneContent).not.toHaveBeenCalled() }) +it('rejects raw Codex resume ids before splitting a pane', async () => { + const app = express() + app.use(express.json()) + const splitPane = vi.fn(() => ({ newPaneId: 'pane_new', tabId: 'tab_1' })) + const attachPaneContent = vi.fn() + const registryCreate = vi.fn(() => ({ terminalId: 'term_new' })) + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + app.use('/api', createAgentApiRouter({ + layoutStore: { splitPane, attachPaneContent }, + registry: { create: registryCreate }, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/panes/pane_1/split').send({ + direction: 'horizontal', + mode: 'codex', + resumeSessionId: 'thread-raw-split', + }) + + expect(res.status).toBe(400) + expect(res.body).toEqual({ + status: 'error', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + expect(codexLaunchPlanner.planCreateCalls).toEqual([]) + expect(splitPane).not.toHaveBeenCalled() + expect(registryCreate).not.toHaveBeenCalled() + expect(attachPaneContent).not.toHaveBeenCalled() +}) + it('kills the created Codex split terminal without waiting for readiness when shutdown admission closes after adoption', async () => { const app = express() app.use(express.json()) @@ -158,13 +189,12 @@ it('kills the created Codex split terminal without waiting for readiness when sh const res = await request(app).post('/api/panes/pane_1/split').send({ direction: 'horizontal', mode: 'codex', - resumeSessionId: 'thread-split-shutdown', + sessionRef: { provider: 'codex', sessionId: 'thread-split-shutdown' }, }) expect(res.status).toBe(500) expect(res.body.message).toContain('Server is shutting down') expect(codexLaunchPlanner.sidecar.adoptCalls).toEqual([{ terminalId: 'term_split_shutdown', generation: 0 }]) - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toEqual([]) expect(registry.publishCodexSidecar).not.toHaveBeenCalled() expect(registry.killAndWait).toHaveBeenCalledWith('term_split_shutdown') expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) @@ -278,6 +308,38 @@ it('kills the created Codex terminal when respawn adoption fails after registry. expect(attachPaneContent).not.toHaveBeenCalled() }) +it('rejects raw Codex resume ids before respawning a pane', async () => { + const app = express() + app.use(express.json()) + const attachPaneContent = vi.fn() + const registryCreate = vi.fn(() => ({ terminalId: 'term_new' })) + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + const resolveTarget = vi.fn(() => ({ tabId: 'tab_1', paneId: 'pane_1' })) + app.use('/api', createAgentApiRouter({ + layoutStore: { + attachPaneContent, + resolveTarget, + } as any, + registry: { create: registryCreate }, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/panes/pane_1/respawn').send({ + mode: 'codex', + resumeSessionId: 'thread-raw-respawn', + }) + + expect(res.status).toBe(400) + expect(res.body).toEqual({ + status: 'error', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + expect(codexLaunchPlanner.planCreateCalls).toEqual([]) + expect(resolveTarget).toHaveBeenCalledWith('pane_1') + expect(registryCreate).not.toHaveBeenCalled() + expect(attachPaneContent).not.toHaveBeenCalled() +}) + it('kills the created Codex respawn terminal without waiting for readiness when shutdown admission closes after adoption', async () => { const app = express() app.use(express.json()) @@ -310,13 +372,12 @@ it('kills the created Codex respawn terminal without waiting for readiness when const res = await request(app).post('/api/panes/pane_1/respawn').send({ mode: 'codex', - resumeSessionId: 'thread-respawn-shutdown', + sessionRef: { provider: 'codex', sessionId: 'thread-respawn-shutdown' }, }) expect(res.status).toBe(500) expect(res.body.message).toContain('Server is shutting down') expect(codexLaunchPlanner.sidecar.adoptCalls).toEqual([{ terminalId: 'term_respawn_shutdown', generation: 0 }]) - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toEqual([]) expect(registry.publishCodexSidecar).not.toHaveBeenCalled() expect(registry.killAndWait).toHaveBeenCalledWith('term_respawn_shutdown') expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) diff --git a/test/server/agent-run.test.ts b/test/server/agent-run.test.ts index ebd24fbb4..92583da8f 100644 --- a/test/server/agent-run.test.ts +++ b/test/server/agent-run.test.ts @@ -1,12 +1,10 @@ import { it, expect, vi } from 'vitest' import express from 'express' import request from 'supertest' +import { EventEmitter } from 'node:events' import { createAgentApiRouter } from '../../server/agent-api/router' import { FakeCodexLaunchPlanner, DEFAULT_CODEX_REMOTE_WS_URL } from '../helpers/coding-cli/fake-codex-launch-planner.js' -const expectedFreshellToken = process.env.AUTH_TOKEN || '' -const expectedFreshellUrl = process.env.FRESHELL_URL || 'http://localhost:3001' - it('runs a command and returns captured output', async () => { let buffer = '' const registry = { @@ -14,7 +12,7 @@ it('runs a command and returns captured output', async () => { input: (_terminalId: string, data: string) => { const match = data.match(/__FRESHELL_DONE_[A-Za-z0-9_-]+__/) if (match) buffer = `done\n${match[0]}\n` - return true + return { status: 'written' } }, get: () => ({ buffer: { snapshot: () => buffer }, status: 'running' }), } @@ -38,7 +36,7 @@ it('allocates and passes an OpenCode control endpoint for /api/run in opencode m input: (_terminalId: string, data: string) => { const match = data.match(/__FRESHELL_DONE_[A-Za-z0-9_-]+__/) if (match) buffer = `done\n${match[0]}\n` - return true + return { status: 'written' } }, get: () => ({ buffer: { snapshot: () => buffer }, status: 'running' }), } @@ -67,13 +65,10 @@ it('allocates and passes an OpenCode control endpoint for /api/run in opencode m it('uses the Codex planner and marks fresh /api/run sessions as starts', async () => { const registry = { create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const app = express() app.use(express.json()) @@ -97,25 +92,14 @@ it('uses the Codex planner and marks fresh /api/run sessions as starts', async ( model: undefined, resumeSessionId: undefined, sandbox: undefined, - terminalId: expect.any(String), - env: expect.objectContaining({ - FRESHELL: '1', - FRESHELL_TERMINAL_ID: expect.any(String), - FRESHELL_TOKEN: expectedFreshellToken, - FRESHELL_URL: expectedFreshellUrl, - }), })) - expect(planCreate.env.FRESHELL_TERMINAL_ID).toBe(planCreate.terminalId) - expect(planCreate.env.FRESHELL_TAB_ID).toBe(createTab.mock.calls[0]?.[0]?.tabId) - expect(planCreate.env.FRESHELL_PANE_ID).toBe(createTab.mock.calls[0]?.[0]?.paneId) expect(registry.create).toHaveBeenCalledWith(expect.objectContaining({ mode: 'codex', - terminalId: planCreate.terminalId, - codexSidecar: codexLaunchPlanner.sidecar, resumeSessionId: undefined, sessionBindingReason: 'start', providerSettings: expect.objectContaining({ codexAppServer: expect.objectContaining({ + sidecar: codexLaunchPlanner.sidecar, wsUrl: DEFAULT_CODEX_REMOTE_WS_URL, }), }), @@ -123,12 +107,151 @@ it('uses the Codex planner and marks fresh /api/run sessions as starts', async ( expect(codexLaunchPlanner.sidecar.adoptCalls).toEqual([{ terminalId: 'term1', generation: 0 }]) }) +it('waits for fresh Codex /api/run restore identity before sending input', async () => { + const emitter = new EventEmitter() + let identityReady = false + const registry = Object.assign(emitter, { + create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), + get: vi.fn(() => ({ + status: 'running', + codexInputGate: { state: 'identity_pending' }, + })), + input: vi.fn(() => { + if (identityReady) return { status: 'written' } + queueMicrotask(() => { + identityReady = true + emitter.emit('terminal.codex.durability.updated', { terminalId: 'term1' }) + }) + return { status: 'blocked_codex_identity_pending', terminalId: 'term1' } + }), + killAndWait: vi.fn(async () => true), + }) + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { + createTab: () => ({ tabId: 't1', paneId: 'p1' }), + attachPaneContent: () => {}, + }, + registry, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/run').send({ command: 'echo done', mode: 'codex' }) + + expect(res.status).toBe(200) + expect(res.body.status).toBe('ok') + expect(res.body.message).toBe('command sent') + expect(registry.input).toHaveBeenCalledTimes(2) + expect(registry.killAndWait).not.toHaveBeenCalled() +}) + +it('does not buffer pending Codex /api/run input even if the terminal exits later', async () => { + const emitter = new EventEmitter() + const registry = Object.assign(emitter, { + create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), + get: vi.fn(() => ({ + status: 'running', + codexInputGate: { state: 'identity_pending' }, + })), + input: vi.fn(() => { + queueMicrotask(() => { + emitter.emit('terminal.exit', { terminalId: 'term1' }) + }) + return { status: 'blocked_codex_identity_pending', terminalId: 'term1' } + }), + killAndWait: vi.fn(async () => true), + }) + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { + createTab: () => ({ tabId: 't1', paneId: 'p1' }), + closeTab: vi.fn(), + attachPaneContent: () => {}, + }, + registry, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/run').send({ command: 'echo done', mode: 'codex' }) + + expect(res.status).toBe(500) + expect(res.body.message).toBe('Terminal is not running.') + expect(registry.input).toHaveBeenCalledTimes(1) + expect(registry.killAndWait).toHaveBeenCalledWith('term1') +}) + +it('fails when Codex restore identity is unavailable before /api/run input', async () => { + const registry = { + create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), + get: vi.fn(() => ({ + status: 'running', + codexDurability: { state: 'non_restorable' }, + })), + input: vi.fn(() => ({ status: 'blocked_codex_identity_unavailable', terminalId: 'term1' })), + killAndWait: vi.fn(async () => true), + } + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { + createTab: () => ({ tabId: 't1', paneId: 'p1' }), + closeTab: vi.fn(), + attachPaneContent: () => {}, + }, + registry, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/run').send({ command: 'echo done', mode: 'codex' }) + + expect(res.status).toBe(500) + expect(res.body.message).toBe('Codex restore identity could not be captured before input could be accepted.') + expect(registry.input).toHaveBeenCalledTimes(1) + expect(registry.killAndWait).toHaveBeenCalledWith('term1') +}) + +it('reports Codex recovery-pending input rejection for /api/run', async () => { + const registry = { + create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), + input: vi.fn(() => ({ status: 'blocked_codex_recovery_pending', terminalId: 'term1' })), + killAndWait: vi.fn(async () => true), + } + const codexLaunchPlanner = new FakeCodexLaunchPlanner() + + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { + createTab: () => ({ tabId: 't1', paneId: 'p1' }), + closeTab: vi.fn(), + attachPaneContent: () => {}, + }, + registry, + codexLaunchPlanner, + })) + + const res = await request(app).post('/api/run').send({ command: 'echo done', mode: 'codex' }) + + expect(res.status).toBe(500) + expect(res.body.message).toBe('Codex durable recovery is still in progress.') + expect(registry.input).toHaveBeenCalledTimes(1) + expect(registry.killAndWait).toHaveBeenCalledWith('term1') +}) + it('shuts down the pending Codex sidecar when /api/run fails after planning', async () => { const registry = { create: vi.fn(() => { throw new Error('spawn failed after planning') }), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() @@ -156,7 +279,7 @@ it('reports pending Codex sidecar shutdown failure when /api/run fails after pla create: vi.fn(() => { throw new Error('spawn failed after planning') }), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() codexLaunchPlanner.sidecar.shutdownError = new Error('verified sidecar teardown failed') @@ -184,7 +307,7 @@ it('reports pending Codex sidecar shutdown failure when /api/run fails after pla it('kills the created terminal and sidecar when /api/run fails after registry.create', async () => { const registry = { create: vi.fn(() => ({ terminalId: 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), killAndWait: vi.fn(async () => true), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() @@ -212,7 +335,7 @@ it('kills the created terminal and sidecar when /api/run fails after registry.cr it('reports created-terminal cleanup failure when /api/run fails after registry.create', async () => { const registry = { create: vi.fn(() => ({ terminalId: 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), killAndWait: vi.fn(async () => { throw new Error('terminal cleanup failed') }), @@ -243,14 +366,11 @@ it('reports created-terminal cleanup failure when /api/run fails after registry. it('retries initial Codex launch before starting a detached /api/run session', async () => { const registry = { create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() codexLaunchPlanner.failNext(2) - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const app = express() app.use(express.json()) @@ -280,14 +400,11 @@ it('retries initial Codex launch before starting a detached /api/run session', a it('fails detached /api/run without mutating layout when Codex launch retries are exhausted', async () => { const registry = { create: vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() codexLaunchPlanner.failNext(5) - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const app = express() app.use(express.json()) @@ -319,13 +436,10 @@ it('shuts down the planned Codex sidecar when /api/run terminal creation fails b create: vi.fn(() => { throw new Error('spawn failed') }), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const closeTab = vi.fn() const app = express() @@ -346,7 +460,7 @@ it('shuts down the planned Codex sidecar when /api/run terminal creation fails b expect(res.body).toEqual({ status: 'error', message: 'spawn failed' }) expect(codexLaunchPlanner.planCreateCalls).toHaveLength(1) expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) - expect(closeTab).toHaveBeenCalledWith(createTab.mock.calls[0]?.[0]?.tabId) + expect(closeTab).toHaveBeenCalledWith('t1') expect(registry.input).not.toHaveBeenCalled() }) @@ -354,7 +468,7 @@ it('rejects invalid Codex settings for /api/run before creating a tab', async () const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const registry = { create: vi.fn(() => ({ terminalId: 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() @@ -397,7 +511,7 @@ it('rejects Codex /api/run without planning when shutdown admission closes while const createTab = vi.fn(() => ({ tabId: 't1', paneId: 'p1' })) const registry = { create: vi.fn(() => ({ terminalId: 'term1' })), - input: vi.fn(() => true), + input: vi.fn(() => ({ status: 'written' })), killAndWait: vi.fn(async () => true), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() diff --git a/test/server/agent-send-keys.test.ts b/test/server/agent-send-keys.test.ts index 2438fa239..85ba71a99 100644 --- a/test/server/agent-send-keys.test.ts +++ b/test/server/agent-send-keys.test.ts @@ -1,3 +1,4 @@ +import { EventEmitter } from 'node:events' import { it, expect, vi } from 'vitest' import express from 'express' import request from 'supertest' @@ -8,7 +9,7 @@ it('sends input to a pane terminal', async () => { app.use(express.json()) app.use('/api', createAgentApiRouter({ layoutStore: { resolvePaneToTerminal: () => 'term_1' }, - registry: { input: () => true }, + registry: { input: () => ({ status: 'written' }) }, })) const res = await request(app).post('/api/panes/p1/send-keys').send({ data: 'ls\r' }) @@ -16,7 +17,7 @@ it('sends input to a pane terminal', async () => { }) it('resolves tmux-style target to a pane before sending', async () => { - const input = vi.fn(() => true) + const input = vi.fn(() => ({ status: 'written' })) const app = express() app.use(express.json()) app.use('/api', createAgentApiRouter({ @@ -31,3 +32,60 @@ it('resolves tmux-style target to a pane before sending', async () => { expect(res.body.status).toBe('ok') expect(input).toHaveBeenCalledWith('term_2', 'C-c') }) + +it('rejects blocked Codex input instead of reporting success', async () => { + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { resolvePaneToTerminal: () => 'term_1' }, + registry: { + input: () => ({ + status: 'blocked_codex_identity_unavailable', + terminalId: 'term_1', + reason: 'candidate_persist_failed', + }), + }, + })) + + const res = await request(app).post('/api/panes/p1/send-keys').send({ data: 'ls\r' }) + + expect(res.status).toBe(409) + expect(res.body.status).toBe('error') + expect(res.body.message).toBe('Codex restore identity could not be captured before input could be accepted.') +}) + +it('waits for Codex identity capture before sending a seeded prompt when requested', async () => { + const events = new EventEmitter() + let identityReady = false + const input = vi.fn(() => ( + identityReady + ? { status: 'written' } + : { + status: 'blocked_codex_identity_pending', + terminalId: 'term_1', + } + )) + const app = express() + app.use(express.json()) + app.use('/api', createAgentApiRouter({ + layoutStore: { resolvePaneToTerminal: () => 'term_1' }, + registry: Object.assign(events, { input }), + })) + + const response = request(app) + .post('/api/panes/p1/send-keys') + .send({ data: 'build the thing\r', waitForCodexIdentity: true }) + const responsePromise = response.then((res) => res) + + await vi.waitFor(() => expect(input).toHaveBeenCalled()) + identityReady = true + events.emit('terminal.codex.durability.updated', { + terminalId: 'term_1', + durability: { state: 'captured_pre_turn' }, + }) + + const res = await responsePromise + expect(res.status).toBe(200) + expect(res.body.status).toBe('ok') + expect(input).toHaveBeenLastCalledWith('term_1', 'build the thing\r') +}) diff --git a/test/server/agent-tabs-write.test.ts b/test/server/agent-tabs-write.test.ts index 68e455c3f..d34a44685 100644 --- a/test/server/agent-tabs-write.test.ts +++ b/test/server/agent-tabs-write.test.ts @@ -3,9 +3,7 @@ import express from 'express' import request from 'supertest' import { createAgentApiRouter } from '../../server/agent-api/router' import { FakeCodexLaunchPlanner } from '../helpers/coding-cli/fake-codex-launch-planner.js' - -const expectedFreshellToken = process.env.AUTH_TOKEN || '' -const expectedFreshellUrl = process.env.FRESHELL_URL || 'http://localhost:3001' +import { INVALID_RAW_CODEX_RESUME_MESSAGE } from '../../server/coding-cli/codex-app-server/restore-decision.js' class FakeRegistry { create = vi.fn((opts?: { terminalId?: string }) => ({ terminalId: opts?.terminalId ?? 'term_1' })) @@ -134,10 +132,7 @@ describe('tab endpoints', () => { app.use(express.json()) const registry = new FakeRegistry() const codexLaunchPlanner = new FakeCodexLaunchPlanner() - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 'tab_1', paneId: 'pane_1' })) const layoutStore = { createTab, attachPaneContent: vi.fn(), @@ -161,23 +156,12 @@ describe('tab endpoints', () => { model: undefined, resumeSessionId: undefined, sandbox: undefined, - terminalId: expect.any(String), - env: expect.objectContaining({ - FRESHELL: '1', - FRESHELL_TERMINAL_ID: expect.any(String), - FRESHELL_TOKEN: expectedFreshellToken, - FRESHELL_URL: expectedFreshellUrl, - }), })) - expect(planCreate.env.FRESHELL_TERMINAL_ID).toBe(planCreate.terminalId) - expect(planCreate.env.FRESHELL_TAB_ID).toBe(createTab.mock.calls[0]?.[0]?.tabId) - expect(planCreate.env.FRESHELL_PANE_ID).toBe(createTab.mock.calls[0]?.[0]?.paneId) expect(registry.create).toHaveBeenCalledWith(expect.objectContaining({ mode: 'codex', - terminalId: planCreate.terminalId, - codexSidecar: codexLaunchPlanner.sidecar, providerSettings: expect.objectContaining({ codexAppServer: expect.objectContaining({ + sidecar: codexLaunchPlanner.sidecar, wsUrl: expect.any(String), }), }), @@ -188,12 +172,12 @@ describe('tab endpoints', () => { const app = express() app.use(express.json()) const registry = new FakeRegistry() - const codexLaunchPlanner = new FakeCodexLaunchPlanner() + const codexLaunchPlanner = new FakeCodexLaunchPlanner({ + sessionId: 'thread-canonical', + remote: { wsUrl: 'ws://127.0.0.1:43123' }, + }) codexLaunchPlanner.failNext(2) - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 'tab_1', paneId: 'pane_1' })) const layoutStore = { createTab, attachPaneContent: vi.fn(), @@ -253,10 +237,7 @@ describe('tab endpoints', () => { throw new Error('spawn failed') }) const codexLaunchPlanner = new FakeCodexLaunchPlanner() - const createTab = vi.fn((input: { tabId: string; paneId: string }) => ({ - tabId: input.tabId, - paneId: input.paneId, - })) + const createTab = vi.fn(() => ({ tabId: 'tab_1', paneId: 'pane_1' })) const closeTab = vi.fn() const layoutStore = { createTab, @@ -276,7 +257,7 @@ describe('tab endpoints', () => { expect(res.body).toEqual({ status: 'error', message: 'spawn failed' }) expect(codexLaunchPlanner.planCreateCalls).toHaveLength(1) expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) - expect(closeTab).toHaveBeenCalledWith(createTab.mock.calls[0]?.[0]?.tabId) + expect(closeTab).toHaveBeenCalledWith('tab_1') }) it('rejects invalid Codex sandbox values with a 400 before spawning', async () => { @@ -428,21 +409,16 @@ describe('tab endpoints', () => { expect(layoutStore.attachPaneContent).not.toHaveBeenCalled() }) - it('kills the created Codex terminal when resume readiness returns after the PTY exited', async () => { + it('rejects raw Codex resume ids instead of fresh-creating tabs', async () => { const app = express() app.use(express.json()) - const terminal = { terminalId: 'term_exited_before_publish', status: 'running' } const registry = { - create: vi.fn(() => terminal), + create: vi.fn(), killAndWait: vi.fn(async () => true), } const codexLaunchPlanner = new FakeCodexLaunchPlanner() - vi.spyOn(codexLaunchPlanner.sidecar, 'waitForLoadedThread').mockImplementation(async (threadId, options) => { - codexLaunchPlanner.sidecar.waitForLoadedThreadCalls.push({ threadId, options }) - terminal.status = 'exited' - }) const layoutStore = { - createTab: () => ({ tabId: 'tab_1', paneId: 'pane_1' }), + createTab: vi.fn(() => ({ tabId: 'tab_1', paneId: 'pane_1' })), attachPaneContent: vi.fn(), selectTab: () => ({}), renameTab: () => ({}), @@ -459,13 +435,63 @@ describe('tab endpoints', () => { resumeSessionId: 'thread-resume-exits', }) - expect(res.status).toBe(500) - expect(res.body.message).toContain('Codex terminal PTY exited before create completed') - expect(registry.killAndWait).toHaveBeenCalledWith('term_exited_before_publish') - expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) + expect(res.status).toBe(400) + expect(res.body).toEqual({ + status: 'error', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + expect(codexLaunchPlanner.planCreateCalls).toEqual([]) + expect(registry.create).not.toHaveBeenCalled() + expect(registry.killAndWait).not.toHaveBeenCalled() + expect(layoutStore.createTab).not.toHaveBeenCalled() expect(layoutStore.attachPaneContent).not.toHaveBeenCalled() }) + it('uses canonical Codex sessionRef as the durable resume path', async () => { + const app = express() + app.use(express.json()) + const terminal = { terminalId: 'term_codex_canonical', status: 'running' } + const registry = { + create: vi.fn(() => terminal), + killAndWait: vi.fn(async () => true), + } + const codexLaunchPlanner = new FakeCodexLaunchPlanner({ + sessionId: 'thread-canonical', + remote: { wsUrl: 'ws://127.0.0.1:43123' }, + }) + const layoutStore = { + createTab: () => ({ tabId: 'tab_1', paneId: 'pane_1' }), + attachPaneContent: vi.fn(), + selectTab: () => ({}), + renameTab: () => ({}), + closeTab: () => ({}), + hasTab: () => true, + selectNextTab: () => ({ tabId: 'tab_1' }), + selectPrevTab: () => ({ tabId: 'tab_1' }), + } + app.use('/api', createAgentApiRouter({ layoutStore, registry, codexLaunchPlanner })) + + const sessionRef = { provider: 'codex', sessionId: 'thread-canonical' } + const res = await request(app).post('/api/tabs').send({ + mode: 'codex', + name: 'resume tab', + sessionRef, + }) + + expect(res.status).toBe(200) + expect(codexLaunchPlanner.planCreateCalls[0]).toEqual(expect.objectContaining({ + resumeSessionId: 'thread-canonical', + })) + expect(registry.create).toHaveBeenCalledWith(expect.objectContaining({ + mode: 'codex', + resumeSessionId: 'thread-canonical', + })) + expect(layoutStore.attachPaneContent).toHaveBeenCalledWith('tab_1', 'pane_1', expect.objectContaining({ + sessionRef, + })) + expect(layoutStore.attachPaneContent.mock.calls[0]?.[2]).not.toHaveProperty('resumeSessionId') + }) + it('kills the created Codex terminal without waiting for readiness when shutdown admission closes after adoption', async () => { const app = express() app.use(express.json()) @@ -506,13 +532,12 @@ describe('tab endpoints', () => { const res = await request(app).post('/api/tabs').send({ mode: 'codex', name: 'resume tab', - resumeSessionId: 'thread-resume-shutdown', + sessionRef: { provider: 'codex', sessionId: 'thread-resume-shutdown' }, }) expect(res.status).toBe(500) expect(res.body.message).toContain('Server is shutting down') expect(codexLaunchPlanner.sidecar.adoptCalls).toEqual([{ terminalId: 'term_shutdown_after_adopt', generation: 0 }]) - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toEqual([]) expect(registry.publishCodexSidecar).not.toHaveBeenCalled() expect(registry.killAndWait).toHaveBeenCalledWith('term_shutdown_after_adopt') expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) diff --git a/test/server/codex-activity-exact-subset.test.ts b/test/server/codex-activity-exact-subset.test.ts index f8c20c903..757daeb5d 100644 --- a/test/server/codex-activity-exact-subset.test.ts +++ b/test/server/codex-activity-exact-subset.test.ts @@ -292,6 +292,7 @@ describe('Codex activity exact subset wiring', () => { mode: 'codex', cwd: '/repo/project', }) + registry.releaseCodexInputGateForTest(canonical.terminalId) registry.get(canonical.terminalId)!.resumeSessionId = 'codex-session-repair-pending' registry.emit('terminal.session.bound', { terminalId: canonical.terminalId, @@ -353,6 +354,7 @@ describe('Codex activity exact subset wiring', () => { }) const term = registry.create({ mode: 'codex', cwd: '/repo/project' }) + registry.releaseCodexInputGateForTest(term.terminalId) registry.setResumeSessionId(term.terminalId, 'codex-session-2') vi.setSystemTime(2_000) diff --git a/test/server/session-association.test.ts b/test/server/session-association.test.ts index 1db22992c..e6536f57f 100644 --- a/test/server/session-association.test.ts +++ b/test/server/session-association.test.ts @@ -1,3 +1,6 @@ +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' import { beforeEach, describe, it, expect, vi } from 'vitest' import { TerminalRegistry } from '../../server/terminal-registry' import { CodingCliSessionIndexer } from '../../server/coding-cli/session-indexer' @@ -6,6 +9,8 @@ import { SessionAssociationCoordinator } from '../../server/session-association- import { TerminalMetadataService } from '../../server/terminal-metadata-service' import { collectAppliedSessionAssociations } from '../../server/session-association-updates' import { recordSessionLifecycleEvent } from '../../server/session-observability' +import { CodexDurabilityStore } from '../../server/coding-cli/codex-app-server/durability-store' +import { CODEX_DURABILITY_SCHEMA_VERSION } from '../../shared/codex-durability' vi.mock('node-pty', () => ({ spawn: vi.fn(() => ({ @@ -188,39 +193,60 @@ describe('SessionAssociationCoordinator integration', () => { registry.shutdown() }) - it('records a lifecycle event when the Codex sidecar reports durable identity', () => { - let onDurableSession: ((sessionId: string) => void) | undefined - const sidecar = { - attachTerminal: vi.fn((callbacks: { onDurableSession: (sessionId: string) => void }) => { - onDurableSession = callbacks.onDurableSession - }), - shutdown: vi.fn(async () => undefined), - } - const registry = new TerminalRegistry() - const terminal = registry.create({ - mode: 'codex', - cwd: '/home/user/project', - codexSidecar: sidecar, + it('records a lifecycle event when Codex durable identity is proven from the rollout file', async () => { + const testDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-proof-')) + const durabilityDir = path.join(testDir, 'durability') + const rolloutPath = path.join(testDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + `${JSON.stringify({ type: 'session_meta', payload: { id: 'codex-thread-1' } })}\n`, + 'utf8', + ) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), }) - onDurableSession?.('codex-thread-1') - onDurableSession?.('codex-thread-1') - - const durableObservationCalls = vi.mocked(recordSessionLifecycleEvent).mock.calls.filter(([event]) => - event.kind === 'codex_durable_session_observed' - ) - expect(durableObservationCalls).toEqual([[ - { - kind: 'codex_durable_session_observed', - provider: 'codex', - terminalId: terminal.terminalId, - sessionId: 'codex-thread-1', - generation: 1, - source: 'sidecar', - }, - ]]) + try { + const terminal = registry.create({ + mode: 'codex', + cwd: '/home/user/project', + codexSidecar: { + shutdown: vi.fn(async () => undefined), + } as any, + }) + const record = registry.get(terminal.terminalId) + expect(record).toBeTruthy() + record!.codexDurability = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'codex-thread-1', + rolloutPath, + source: 'thread_start_response', + capturedAt: 1_000, + }, + } - registry.shutdown() + await (registry as any).runCodexDurabilityProof(terminal.terminalId, 'test') + + const durableObservationCalls = vi.mocked(recordSessionLifecycleEvent).mock.calls.filter(([event]) => + event.kind === 'codex_durable_session_observed' + ) + expect(durableObservationCalls).toEqual([[ + { + kind: 'codex_durable_session_observed', + provider: 'codex', + terminalId: terminal.terminalId, + sessionId: 'codex-thread-1', + generation: 0, + source: 'sidecar', + }, + ]]) + } finally { + registry.shutdown() + await fsp.rm(testDir, { recursive: true, force: true }) + } }) }) diff --git a/test/server/ws-handshake-snapshot.test.ts b/test/server/ws-handshake-snapshot.test.ts index 1289420fa..9698428c3 100644 --- a/test/server/ws-handshake-snapshot.test.ts +++ b/test/server/ws-handshake-snapshot.test.ts @@ -360,6 +360,113 @@ describe('ws handshake snapshot', () => { } }) + it('does not synthesize Codex sessionRef from resumeSessionId until durability is proven', async () => { + registry.setTerminals([ + { + terminalId: 'term-codex-unproven', + title: 'Codex CLI', + mode: 'codex', + resumeSessionId: 'thread-unproven', + codexDurability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-unproven', + rolloutPath: '/home/user/.codex/sessions/unproven.jsonl', + source: 'thread_start_response', + capturedAt: 1, + }, + }, + createdAt: 1, + lastActivityAt: 2, + status: 'running', + }, + { + terminalId: 'term-codex-durable', + title: 'Codex CLI', + mode: 'codex', + resumeSessionId: 'thread-durable', + codexDurability: { + schemaVersion: 1, + state: 'durable', + durableThreadId: 'thread-durable', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-durable', + rolloutPath: '/home/user/.codex/sessions/durable.jsonl', + source: 'thread_start_response', + capturedAt: 1, + }, + turnCompletedAt: 2, + }, + createdAt: 3, + lastActivityAt: 4, + status: 'running', + }, + { + terminalId: 'term-codex-mismatch', + title: 'Codex CLI', + mode: 'codex', + resumeSessionId: 'thread-legacy', + codexDurability: { + schemaVersion: 1, + state: 'durable', + durableThreadId: 'thread-proof', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-proof', + rolloutPath: '/home/user/.codex/sessions/mismatch.jsonl', + source: 'thread_start_response', + capturedAt: 1, + }, + turnCompletedAt: 2, + }, + createdAt: 5, + lastActivityAt: 6, + status: 'running', + }, + { + terminalId: 'term-claude-legacy', + title: 'Claude CLI', + mode: 'claude', + resumeSessionId: '550e8400-e29b-41d4-a716-446655440000', + createdAt: 7, + lastActivityAt: 8, + status: 'running', + }, + ]) + + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + + try { + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + + const inventoryPromise = waitForMessage(ws, (m) => m.type === 'terminal.inventory', 10_000) + await waitForReady(ws, 10_000) + + const inventory = await inventoryPromise + const byId = new Map(inventory.terminals.map((terminal: any) => [terminal.terminalId, terminal])) + expect(byId.get('term-codex-unproven')).not.toHaveProperty('sessionRef') + expect(byId.get('term-codex-unproven')).not.toHaveProperty('resumeSessionId') + expect(byId.get('term-codex-durable')).toMatchObject({ + sessionRef: { + provider: 'codex', + sessionId: 'thread-durable', + }, + }) + expect(byId.get('term-codex-mismatch')).not.toHaveProperty('sessionRef') + expect(byId.get('term-claude-legacy')).toMatchObject({ + sessionRef: { + provider: 'claude', + sessionId: '550e8400-e29b-41d4-a716-446655440000', + }, + }) + } finally { + await closeWs(ws) + } + }) + it('keeps inventory lifetime status separate from runtime recovery status', async () => { registry.setTerminals([ { diff --git a/test/server/ws-protocol.test.ts b/test/server/ws-protocol.test.ts index 7af2b1e1a..e1941c27a 100644 --- a/test/server/ws-protocol.test.ts +++ b/test/server/ws-protocol.test.ts @@ -171,9 +171,9 @@ class FakeRegistry { input(terminalId: string, data: string) { const rec = this.records.get(terminalId) - if (!rec) return false + if (!rec) return { status: 'no_terminal' } this.inputCalls.push({ terminalId, data }) - return true + return { status: 'written' } } resize(terminalId: string, cols: number, rows: number) { @@ -229,6 +229,16 @@ class FakeRegistry { return undefined } + async readCodexDurabilityRecordForRestoreLocator() { + return null + } + + async readCodexDurabilityForRestoreLocator() { + return null + } + + async deleteCodexDurabilityStoreRecord() {} + repairLegacySessionOwners() { return { repaired: false, clearedTerminalIds: [] } } @@ -340,8 +350,6 @@ describe('ws protocol', () => { codexLaunchPlanner.sidecar.shutdownCalls = 0 codexLaunchPlanner.sidecar.shutdownStarted = false codexLaunchPlanner.sidecar.shutdownError = null - codexLaunchPlanner.sidecar.waitForLoadedThreadCalls = [] - codexLaunchPlanner.sidecar.waitForLoadedThreadError = null }) afterAll(async () => { @@ -505,7 +513,7 @@ describe('ws protocol', () => { resumeSessionId: undefined, sandbox: 'workspace-write', }]) - expect(registry.createCalls[0]?.resumeSessionId).toBe('thread-new-1') + expect(registry.createCalls[0]?.resumeSessionId).toBeUndefined() expect(registry.createCalls[0]?.providerSettings).toEqual({ codexAppServer: expect.objectContaining({ wsUrl: DEFAULT_CODEX_REMOTE_WS_URL, @@ -788,56 +796,7 @@ describe('ws protocol', () => { expect(localRegistry.records.size).toBe(0) }) - it('aborts in-flight Codex resume terminal.create when shutdown starts during loaded-list readiness', async () => { - const localServer = http.createServer((_req, res) => { - res.statusCode = 404 - res.end() - }) - const localRegistry = new FakeRegistry() - const sidecar = new FakeCodexLaunchSidecar() - const readiness = deferred() - const originalWaitForLoadedThread = sidecar.waitForLoadedThread.bind(sidecar) - const localPlanner = new FakeCodexLaunchPlanner({ - sessionId: 'thread-during-readiness', - remote: { wsUrl: DEFAULT_CODEX_REMOTE_WS_URL }, - sidecar, - }) - const localHandler = new WsHandler(localServer, localRegistry as any, { codexLaunchPlanner: localPlanner }) - vi.spyOn(sidecar, 'waitForLoadedThread').mockImplementation(async (threadId, options) => { - await originalWaitForLoadedThread(threadId, options) - localHandler.close() - await readiness.promise - }) - const sent: any[] = [] - const ws = createOpenFakeWs('shutdown-during-readiness', sent) - const state = createAuthenticatedState() - - const message = (localHandler as any).onMessage( - ws, - state, - Buffer.from(JSON.stringify({ - type: 'terminal.create', - requestId: 'shutdown-during-readiness', - mode: 'codex', - resumeSessionId: 'thread-during-readiness', - })), - ) - await vi.waitFor(() => expect(sidecar.waitForLoadedThreadCalls).toHaveLength(1)) - readiness.resolve() - await message - - expect(sent).toContainEqual(expect.objectContaining({ - type: 'error', - requestId: 'shutdown-during-readiness', - })) - expect(sidecar.adoptCalls).toHaveLength(1) - expect(localRegistry.publishCalls).toEqual([]) - expect(localRegistry.killCalls).toHaveLength(1) - expect(sidecar.shutdownCalls).toBe(1) - expect(localRegistry.records.size).toBe(0) - }) - - it('waits for candidate-local loaded-thread readiness before reporting Codex resume create success', async () => { + it('reports Codex resume create success without loaded-thread readiness polling', async () => { const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) await new Promise<void>((resolve) => ws.on('open', () => resolve())) ws.send(JSON.stringify({ type: 'hello', token: 'testtoken-testtoken', protocolVersion: WS_PROTOCOL_VERSION })) @@ -848,7 +807,7 @@ describe('ws protocol', () => { type: 'terminal.create', requestId, mode: 'codex', - resumeSessionId: 'thread-resume-1', + sessionRef: { provider: 'codex', sessionId: 'thread-resume-1' }, })) const created = await waitForMessage( ws, @@ -859,48 +818,15 @@ describe('ws protocol', () => { expect(codexLaunchPlanner.planCreateCalls[0]).toEqual(expect.objectContaining({ resumeSessionId: 'thread-resume-1', })) - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toEqual([{ - threadId: 'thread-resume-1', - options: undefined, - }]) expect(registry.publishCalls).toEqual([created.terminalId]) await closeWebSocket(ws) }) - it('kills the created terminal and sidecar when Codex resume loaded-list readiness fails', async () => { - codexLaunchPlanner.sidecar.waitForLoadedThreadError = new Error('resume thread never loaded') - const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) - await new Promise<void>((resolve) => ws.on('open', () => resolve())) - ws.send(JSON.stringify({ type: 'hello', token: 'testtoken-testtoken', protocolVersion: WS_PROTOCOL_VERSION })) - await waitForMessage(ws, (msg) => msg.type === 'ready', 5000) - - const requestId = 'codex-resume-loaded-list-fails' - ws.send(JSON.stringify({ - type: 'terminal.create', - requestId, - mode: 'codex', - resumeSessionId: 'thread-missing', - })) - const error = await waitForMessage( - ws, - (msg) => msg.type === 'error' && msg.requestId === requestId, - 5000, - ) - - expect(error.message).toContain('resume thread never loaded') - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toHaveLength(1) - expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) - expect(registry.killCalls).toHaveLength(1) - expect(registry.records.size).toBe(0) - - await closeWebSocket(ws) - }) - it('kills the created terminal and sidecar when the Codex resume PTY exits before publication', async () => { - const originalWaitForLoadedThread = codexLaunchPlanner.sidecar.waitForLoadedThread.bind(codexLaunchPlanner.sidecar) - const waitSpy = vi.spyOn(codexLaunchPlanner.sidecar, 'waitForLoadedThread').mockImplementation(async (threadId, options) => { - await originalWaitForLoadedThread(threadId, options) + const originalAdopt = codexLaunchPlanner.sidecar.adopt.bind(codexLaunchPlanner.sidecar) + const adoptSpy = vi.spyOn(codexLaunchPlanner.sidecar, 'adopt').mockImplementation(async (input) => { + await originalAdopt(input) const terminalId = codexLaunchPlanner.sidecar.adoptCalls[0]?.terminalId const record = terminalId ? registry.get(terminalId) : null if (record) record.status = 'exited' @@ -916,7 +842,7 @@ describe('ws protocol', () => { type: 'terminal.create', requestId, mode: 'codex', - resumeSessionId: 'thread-resume-exits', + sessionRef: { provider: 'codex', sessionId: 'thread-resume-exits' }, })) const error = await waitForMessage( ws, @@ -925,12 +851,11 @@ describe('ws protocol', () => { ) expect(error.message).toContain('Codex terminal PTY exited before create completed') - expect(codexLaunchPlanner.sidecar.waitForLoadedThreadCalls).toHaveLength(1) expect(codexLaunchPlanner.sidecar.shutdownCalls).toBe(1) expect(registry.killCalls).toHaveLength(1) expect(registry.records.size).toBe(0) } finally { - waitSpy.mockRestore() + adoptSpy.mockRestore() await closeWebSocket(ws) } }) @@ -1225,6 +1150,31 @@ describe('ws protocol', () => { await close() }) + it('terminal.input reports Codex identity capture timeout as blocked input', async () => { + const { ws, close } = await createAuthenticatedConnection() + + const terminalId = await createTerminal(ws, 'create-for-codex-timeout-input') + const originalInput = registry.input.bind(registry) + registry.input = vi.fn(() => ({ + status: 'blocked_codex_identity_capture_timeout', + terminalId, + })) as any + + try { + ws.send(JSON.stringify({ type: 'terminal.input', terminalId, data: 'test' })) + + const blocked = await waitForMessage(ws, (msg) => msg.type === 'terminal.input.blocked') + expect(blocked).toEqual({ + type: 'terminal.input.blocked', + terminalId, + reason: 'codex_identity_capture_timeout', + }) + } finally { + registry.input = originalInput as any + await close() + } + }) + it('terminal.resize changes terminal dimensions', async () => { const { ws, close } = await createAuthenticatedConnection() diff --git a/test/server/ws-session-observability.test.ts b/test/server/ws-session-observability.test.ts index f6cde257d..1f1918de5 100644 --- a/test/server/ws-session-observability.test.ts +++ b/test/server/ws-session-observability.test.ts @@ -306,7 +306,7 @@ describe('websocket session observability', () => { }) it('records stale terminal input without logging input data', async () => { - registry.input.mockReturnValue(false) + registry.input.mockReturnValue({ status: 'no_terminal' }) const ws = await connectReady(port) try { diff --git a/test/server/ws-terminal-create-reuse-running-codex.test.ts b/test/server/ws-terminal-create-reuse-running-codex.test.ts index 809d26cbc..c351b2e75 100644 --- a/test/server/ws-terminal-create-reuse-running-codex.test.ts +++ b/test/server/ws-terminal-create-reuse-running-codex.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest' import http from 'http' +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' +import { EventEmitter } from 'node:events' import WebSocket from 'ws' import { WS_PROTOCOL_VERSION } from '../../shared/ws-protocol' import { FakeCodexLaunchPlanner, DEFAULT_CODEX_REMOTE_WS_URL } from '../helpers/coding-cli/fake-codex-launch-planner.js' @@ -169,16 +173,28 @@ type FakeTerminal = { cols: number rows: number resumeSessionId?: string + codexDurability?: any clients: Set<WebSocket> } -class FakeRegistry { +class FakeRegistry extends EventEmitter { records: FakeTerminal[] attachCalls: Array<{ terminalId: string; opts?: any }> = [] createCalls: any[] = [] repairCalls: Array<{ mode: string; sessionId: string }> = [] + candidatePersistedAcks: any[] = [] + promoteCalls: Array<{ terminalId: string; durableThreadId: string }> = [] + deletedDurabilityRecords: Array<{ terminalId: string; reason: string }> = [] + durabilityRestoreRecords: Array<{ + terminalId: string + tabId?: string + paneId?: string + serverInstanceId?: string + durability: any + }> = [] constructor(terminalIds: string[]) { + super() const createdAt = Date.now() this.records = terminalIds.map((terminalId, idx) => ({ terminalId, @@ -226,10 +242,78 @@ class FakeRegistry { }) } + bindSession(terminalId: string, mode: string, sessionId: string) { + const record = this.findById(terminalId) + if (!record || mode !== 'codex') return { ok: false, reason: 'terminal_missing' } + record.resumeSessionId = sessionId + return { ok: true, terminalId, sessionId } + } + + async promoteCodexDurabilityFromCreateProof(terminalId: string, durableThreadId: string) { + this.promoteCalls.push({ terminalId, durableThreadId }) + const bound = this.bindSession(terminalId, 'codex', durableThreadId) + if (!bound.ok) return bound + const record = this.findById(terminalId) + if (record) { + record.codexDurability = { + schemaVersion: 1, + state: 'durable', + durableThreadId, + } + this.emit('terminal.codex.durability.updated', { + terminalId, + durability: record.codexDurability, + }) + } + return bound + } + findRunningClaudeTerminalBySession(sessionId: string) { return this.findRunningTerminalBySession('claude', sessionId) } + findRunningCodexTerminalByCandidate(candidateThreadId: string, rolloutPath: string) { + return this.records.find((record) => ( + record.status === 'running' + && record.codexDurability?.candidate?.candidateThreadId === candidateThreadId + && record.codexDurability?.candidate?.rolloutPath === rolloutPath + )) + } + + async readCodexDurabilityRecordForRestoreLocator(locator: { + terminalId?: string + tabId?: string + paneId?: string + serverInstanceId?: string + }) { + if (locator.terminalId) { + const record = this.durabilityRestoreRecords.find((candidate) => candidate.terminalId === locator.terminalId) + return record ? { terminalId: record.terminalId, durability: record.durability } : undefined + } + if (!locator.tabId || !locator.paneId) return undefined + const matches = this.durabilityRestoreRecords.filter((record) => ( + record.tabId === locator.tabId + && record.paneId === locator.paneId + && (!locator.serverInstanceId || record.serverInstanceId === locator.serverInstanceId) + )) + if (matches.length > 1) throw new Error('ambiguous restore locator') + return matches[0] ? { terminalId: matches[0].terminalId, durability: matches[0].durability } : undefined + } + + async readCodexDurabilityForRestoreLocator(locator: { + terminalId?: string + tabId?: string + paneId?: string + serverInstanceId?: string + }) { + return (await this.readCodexDurabilityRecordForRestoreLocator(locator))?.durability + } + + async deleteCodexDurabilityStoreRecord(terminalId: string, reason: string) { + this.deletedDurabilityRecords.push({ terminalId, reason }) + this.durabilityRestoreRecords = this.durabilityRestoreRecords.filter((record) => record.terminalId !== terminalId) + } + attach(terminalId: string, ws: WebSocket, opts?: any) { this.attachCalls.push({ terminalId, opts }) const record = this.findById(terminalId) @@ -259,6 +343,11 @@ class FakeRegistry { } list() { return [] } + + acknowledgeCodexCandidatePersisted(input: any) { + this.candidatePersistedAcks.push(input) + return 'accepted' + } } describe('terminal.create reuse running codex terminal', () => { @@ -289,6 +378,10 @@ describe('terminal.create reuse running codex terminal', () => { registry.attachCalls = [] registry.createCalls = [] registry.repairCalls = [] + registry.candidatePersistedAcks = [] + registry.promoteCalls = [] + registry.deletedDurabilityRecords = [] + registry.durabilityRestoreRecords = [] }, HOOK_TIMEOUT_MS) afterEach(async () => { @@ -400,12 +493,75 @@ describe('terminal.create reuse running codex terminal', () => { } }) - it('existingId branch returns created only and requires explicit attach', async () => { + it('rejects raw Codex resume ids on restore instead of creating a fresh terminal', async () => { const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) try { await new Promise<void>((resolve) => ws.on('open', () => resolve())) await waitForReady(ws) + const requestId = 'codex-raw-resume-restore' + const errorPromise = waitForMessage(ws, (m) => m.type === 'error' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + resumeSessionId: 'thread-raw-restore', + })) + + const error = await errorPromise + expect(error).toMatchObject({ + type: 'error', + code: 'INVALID_MESSAGE', + message: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + requestId, + }) + expect(codexLaunchPlanner.planCreateCalls).toHaveLength(0) + expect(registry.createCalls).toHaveLength(0) + } finally { + await closeWebSocket(ws) + } + }) + + it.each([ + ['omitted', undefined], + ['false', false], + ] as const)('rejects raw Codex resume ids when restore is %s', async (_label, restore) => { + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = `codex-raw-resume-create-${_label}` + const errorPromise = waitForMessage(ws, (m) => m.type === 'error' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + ...(restore === undefined ? {} : { restore }), + resumeSessionId: 'thread-raw-create', + })) + + const error = await errorPromise + expect(error).toMatchObject({ + type: 'error', + code: 'INVALID_MESSAGE', + message: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + requestId, + }) + expect(codexLaunchPlanner.planCreateCalls).toHaveLength(0) + expect(registry.createCalls).toHaveLength(0) + } finally { + await closeWebSocket(ws) + } + }) + + it('existingId branch returns created only and requires explicit attach', async () => { + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + const helloReady = await waitForReady(ws) + const firstCreatedPromise = waitForMessage( ws, (m) => m.type === 'terminal.created' && m.requestId === 'reuse-existingId-split', @@ -504,18 +660,9 @@ describe('terminal.create reuse running codex terminal', () => { model: undefined, sandbox: undefined, approvalPolicy: undefined, - terminalId: expect.any(String), - env: expect.objectContaining({ - FRESHELL: '1', - FRESHELL_TERMINAL_ID: expect.any(String), - FRESHELL_TOKEN: 'testtoken-testtoken', - FRESHELL_URL: 'http://localhost:3001', - }), })) - expect(planCreate.env.FRESHELL_TERMINAL_ID).toBe(planCreate.terminalId) expect(registry.createCalls).toHaveLength(1) expect(registry.createCalls[0]).toMatchObject({ - terminalId: planCreate.terminalId, mode: 'codex', cwd: '/repo/worktree', resumeSessionId: undefined, @@ -530,6 +677,497 @@ describe('terminal.create reuse running codex terminal', () => { } }) + it('proof-reads captured Codex durability and resumes only after proof succeeds', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-proof-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-proved"}}\n', + 'utf8', + ) + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-proved-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + const associatedPromise = waitForMessage(ws, (m) => ( + m.type === 'terminal.session.associated' + && m.sessionRef?.provider === 'codex' + && m.sessionRef?.sessionId === 'thread-proved' + )) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + codexDurability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-proved', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + }, + })) + + const created = await createdPromise + const associated = await associatedPromise + expect(created).not.toHaveProperty('effectiveResumeSessionId') + expect(associated.terminalId).toBe(created.terminalId) + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: 'thread-proved', + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: 'thread-proved', + }) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('proof-reads server-stored Codex durability when the client has not persisted candidate state', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-store-proof-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-store-proved"}}\n', + 'utf8', + ) + registry.durabilityRestoreRecords.push({ + terminalId: 'old-store-terminal', + tabId: 'tab-bridge', + paneId: 'pane-bridge', + durability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-store-proved', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + }, + }) + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-store-proved-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + tabId: 'tab-bridge', + paneId: 'pane-bridge', + })) + + await createdPromise + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: 'thread-store-proved', + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: 'thread-store-proved', + }) + expect(registry.deletedDurabilityRecords).toEqual([{ + terminalId: 'old-store-terminal', + reason: 'restore_proof_succeeded_created_replacement', + }]) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('does not use server-stored Codex durability for non-restore fresh creates', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-store-fresh-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-store-stale"}}\n', + 'utf8', + ) + registry.durabilityRestoreRecords.push({ + terminalId: 'old-store-terminal', + tabId: 'tab-fresh', + paneId: 'pane-fresh', + durability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-store-stale', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + }, + }) + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-fresh-ignores-store-record' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + tabId: 'tab-fresh', + paneId: 'pane-fresh', + })) + + await createdPromise + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: undefined, + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: undefined, + }) + expect(registry.deletedDurabilityRecords).toEqual([]) + expect(registry.durabilityRestoreRecords).toHaveLength(1) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('fresh-creates with restore failure when server-stored Codex durability cannot be proved', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-store-proof-missing-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'missing.jsonl') + registry.durabilityRestoreRecords.push({ + terminalId: 'old-store-terminal', + tabId: 'tab-bridge', + paneId: 'pane-bridge', + durability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-store-missing', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + }, + }) + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-store-unproved-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + tabId: 'tab-bridge', + paneId: 'pane-bridge', + })) + + const created = await createdPromise + expect(created).toMatchObject({ + type: 'terminal.created', + requestId, + clearCodexDurability: true, + restoreError: { + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }, + }) + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: undefined, + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: undefined, + }) + expect(registry.deletedDurabilityRecords).toEqual([{ + terminalId: 'old-store-terminal', + reason: 'restore_proof_failed_fresh_create', + }]) + expect(registry.durabilityRestoreRecords).toHaveLength(0) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('proof-reads a same-server live Codex candidate before reattaching it', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-proof-live-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-live-proved"}}\n', + 'utf8', + ) + registry.records[0].resumeSessionId = undefined + registry.records[0].codexDurability = { + schemaVersion: 1, + state: 'durability_unproven_after_completion', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-live-proved', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + turnCompletedAt: Date.now(), + } + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + const helloReady = await waitForReady(ws) + + const requestId = 'codex-proved-live-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + const associatedPromise = waitForMessage(ws, (m) => ( + m.type === 'terminal.session.associated' + && m.terminalId === 'term-codex-existing' + && m.sessionRef?.sessionId === 'thread-live-proved' + )) + const durabilityPromise = waitForMessage(ws, (m) => ( + m.type === 'terminal.codex.durability.updated' + && m.terminalId === 'term-codex-existing' + && m.durability?.state === 'durable' + && m.durability?.durableThreadId === 'thread-live-proved' + )) + const terminalsChangedPromise = waitForMessage(ws, (m) => m.type === 'terminals.changed') + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + liveTerminal: { + terminalId: 'term-codex-existing', + serverInstanceId: helloReady.serverInstanceId, + }, + codexDurability: registry.records[0].codexDurability, + })) + + const created = await createdPromise + await associatedPromise + await durabilityPromise + await terminalsChangedPromise + expect(created.terminalId).toBe('term-codex-existing') + expect(registry.records[0].resumeSessionId).toBe('thread-live-proved') + expect(registry.records[0].codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-live-proved', + }) + expect(registry.promoteCalls).toEqual([{ + terminalId: 'term-codex-existing', + durableThreadId: 'thread-live-proved', + }]) + expect(codexLaunchPlanner.planCreateCalls).toHaveLength(0) + expect(registry.createCalls).toHaveLength(0) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('does not promote a stale same-server live Codex handle when its candidate differs', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-proof-live-mismatch-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'rollout.jsonl') + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-proved-mismatch"}}\n', + 'utf8', + ) + registry.records[0].resumeSessionId = undefined + registry.records[0].codexDurability = { + schemaVersion: 1, + state: 'durability_unproven_after_completion', + candidate: { + provider: 'codex', + candidateThreadId: 'different-live-thread', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + turnCompletedAt: Date.now(), + } + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + const helloReady = await waitForReady(ws) + + const requestId = 'codex-proved-live-mismatch-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + liveTerminal: { + terminalId: 'term-codex-existing', + serverInstanceId: helloReady.serverInstanceId, + }, + codexDurability: { + schemaVersion: 1, + state: 'durability_unproven_after_completion', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-proved-mismatch', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + turnCompletedAt: Date.now(), + }, + })) + + await createdPromise + expect(registry.promoteCalls).toEqual([]) + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: 'thread-proved-mismatch', + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: 'thread-proved-mismatch', + }) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('does not resume a captured Codex candidate when proof fails', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-proof-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'missing.jsonl') + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-unproved-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + codexDurability: { + schemaVersion: 1, + state: 'durability_unproven_after_completion', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-missing', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + turnCompletedAt: Date.now(), + }, + })) + + const created = await createdPromise + expect(created).toMatchObject({ + type: 'terminal.created', + requestId, + clearCodexDurability: true, + restoreError: { + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }, + }) + expect(codexLaunchPlanner.planCreateCalls[0]).toMatchObject({ + resumeSessionId: undefined, + }) + expect(registry.createCalls[0]).toMatchObject({ + mode: 'codex', + resumeSessionId: undefined, + }) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('attaches exact live Codex candidate when captured proof fails and live terminal exists', async () => { + const tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-ws-codex-proof-')) + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + const rolloutPath = path.join(tempDir, 'missing-live.jsonl') + registry.records[0].codexDurability = { + schemaVersion: 1, + state: 'durability_unproven_after_completion', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-live-unproved', + rolloutPath, + source: 'thread_started_notification', + capturedAt: Date.now(), + }, + turnCompletedAt: Date.now(), + } + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const requestId = 'codex-unproved-live-reopen' + const createdPromise = waitForMessage(ws, (m) => m.type === 'terminal.created' && m.requestId === requestId) + ws.send(JSON.stringify({ + type: 'terminal.create', + requestId, + mode: 'codex', + restore: true, + codexDurability: registry.records[0].codexDurability, + })) + + const created = await createdPromise + expect(created.terminalId).toBe('term-codex-existing') + expect(codexLaunchPlanner.planCreateCalls).toHaveLength(0) + expect(registry.createCalls).toHaveLength(0) + } finally { + await closeWebSocket(ws) + await fsp.rm(tempDir, { recursive: true, force: true }) + } + }) + + it('accepts Codex candidate persisted acknowledgements through the dynamic websocket schema', async () => { + const ws = new WebSocket(`ws://127.0.0.1:${port}/ws`) + try { + await new Promise<void>((resolve) => ws.on('open', () => resolve())) + await waitForReady(ws) + + const messagesPromise = collectMessages(ws, 75) + ws.send(JSON.stringify({ + type: 'terminal.codex.candidate.persisted', + terminalId: 'term-codex-existing', + candidateThreadId: 'thread-ack', + rolloutPath: '/tmp/codex/thread-ack.jsonl', + capturedAt: Date.now(), + })) + + const messages = await messagesPromise + expect(registry.candidatePersistedAcks).toHaveLength(1) + expect(registry.candidatePersistedAcks[0]).toMatchObject({ + terminalId: 'term-codex-existing', + candidateThreadId: 'thread-ack', + rolloutPath: '/tmp/codex/thread-ack.jsonl', + }) + expect(messages.some((message) => message.type === 'error' && message.code === 'INVALID_MESSAGE')).toBe(false) + } finally { + await closeWebSocket(ws) + } + }) + it('reuses canonical owner and repairs duplicate session records before reuse', async () => { const { WsHandler } = await import('../../server/ws-handler') const dupeServer = http.createServer((_req, res) => { res.statusCode = 404; res.end() }) diff --git a/test/unit/client/agentChatSlice.test.ts b/test/unit/client/agentChatSlice.test.ts index a87b0d627..98f813c4b 100644 --- a/test/unit/client/agentChatSlice.test.ts +++ b/test/unit/client/agentChatSlice.test.ts @@ -506,7 +506,7 @@ describe('agentChatSlice', () => { expect(state.sessions['sdk-live']).toMatchObject({ historyLoaded: true, - timelineSessionId: undefined, + timelineSessionId: 'named-resume', timelineRevision: 1, }) diff --git a/test/unit/client/components/TabsView.test.tsx b/test/unit/client/components/TabsView.test.tsx index cb1e5ef5a..38070d665 100644 --- a/test/unit/client/components/TabsView.test.tsx +++ b/test/unit/client/components/TabsView.test.tsx @@ -169,6 +169,11 @@ describe('TabsView', () => { payload: { provider: 'freshclaude', resumeSessionId: '00000000-0000-4000-8000-000000000444', + sessionRef: { + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000444', + serverInstanceId: 'srv-remote', + }, modelSelection: { kind: 'tracked', modelId: 'opus[1m]' }, permissionMode: 'plan', effort: 'turbo', @@ -199,8 +204,8 @@ describe('TabsView', () => { sessionRef: { provider: 'claude', sessionId: '00000000-0000-4000-8000-000000000444', - serverInstanceId: 'srv-remote', }, + serverInstanceId: 'srv-remote', modelSelection: { kind: 'tracked', modelId: 'opus[1m]' }, permissionMode: 'plan', effort: 'turbo', @@ -366,8 +371,8 @@ describe('TabsView', () => { expect(layout?.content?.sessionRef).toEqual({ provider: 'codex', sessionId: 'codex-session-123', - serverInstanceId: 'srv-remote', }) + expect(layout?.content?.serverInstanceId).toBe('srv-remote') }) it('shows pane kind icons with distinct colors', () => { diff --git a/test/unit/client/components/TerminalView.lifecycle.test.tsx b/test/unit/client/components/TerminalView.lifecycle.test.tsx index 572373112..566245f1b 100644 --- a/test/unit/client/components/TerminalView.lifecycle.test.tsx +++ b/test/unit/client/components/TerminalView.lifecycle.test.tsx @@ -2486,6 +2486,38 @@ describe('TerminalView lifecycle updates', () => { expect(writelnCalls.some((s: string) => s.includes('Terminal exited'))).toBe(true) }) + it('shows feedback when Codex input is blocked by the restore identity gate', async () => { + const { store, tabId, paneId, paneContent } = setupThemeTerminal({ + terminalId: 'term-codex', + status: 'running', + mode: 'codex', + }) + + render( + <Provider store={store}> + <TerminalView tabId={tabId} paneId={paneId} paneContent={paneContent} /> + </Provider> + ) + + await waitFor(() => { + expect(messageHandler).not.toBeNull() + expect(terminalInstances.length).toBeGreaterThan(0) + }) + + act(() => { + messageHandler!({ + type: 'terminal.input.blocked', + terminalId: 'term-codex', + reason: 'codex_identity_pending', + }) + }) + + const term = terminalInstances[0] + expect(term.writeln).toHaveBeenCalledWith( + expect.stringContaining('Input not sent: Codex is still saving restore state. Try again in a moment.'), + ) + }) + it('mirrors canonical durable identity to pane and tab on terminal.session.associated', async () => { const tabId = 'tab-session-assoc' const paneId = 'pane-session-assoc' diff --git a/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx b/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx index 7d7176938..2cb399ba2 100644 --- a/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx +++ b/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx @@ -840,7 +840,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sess-reload-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', revision: 12, })) @@ -856,14 +856,14 @@ describe('AgentChatView reload/restore behavior', () => { expect(attachCalls[1]?.[0]).toEqual({ type: 'sdk.attach', sessionId: 'sess-reload-1', - resumeSessionId: 'cli-sess-1', + resumeSessionId: '00000000-0000-4000-8000-000000000101', }) }) }) it('clears stale hydrated timeline content and waits for a fresh snapshot before rereading after a stale restore retry', async () => { getAgentTimelinePage.mockResolvedValue({ - sessionId: 'cli-sess-1', + sessionId: '00000000-0000-4000-8000-000000000101', items: [], nextCursor: null, revision: 13, @@ -874,14 +874,14 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sess-reload-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', revision: 12, })) store.dispatch(timelinePageReceived({ sessionId: 'sess-reload-1', items: [ makeTimelineItem('turn-2', 'assistant', 'Old stale summary', { - sessionId: 'cli-sess-1', + sessionId: '00000000-0000-4000-8000-000000000101', ordinal: 2, timestamp: '2026-03-10T10:01:00.000Z', }), @@ -891,7 +891,7 @@ describe('AgentChatView reload/restore behavior', () => { replace: true, bodies: { 'turn-2': makeTimelineTurn('turn-2', 'assistant', 'Old hydrated body', { - sessionId: 'cli-sess-1', + sessionId: '00000000-0000-4000-8000-000000000101', ordinal: 2, timestamp: '2026-03-10T10:01:00.000Z', }), @@ -939,7 +939,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sess-reload-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', revision: 12, })) @@ -954,7 +954,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sess-reload-1', items: [ makeTimelineItem('turn-2', 'user', 'Hydrated summary', { - sessionId: 'cli-sess-1', + sessionId: '00000000-0000-4000-8000-000000000101', ordinal: 2, timestamp: '2026-03-10T10:01:00.000Z', }), @@ -966,7 +966,7 @@ describe('AgentChatView reload/restore behavior', () => { store.dispatch(turnBodyReceived({ sessionId: 'sess-reload-1', turn: makeTimelineTurn('turn-2', 'user', 'Hydrated body', { - sessionId: 'cli-sess-1', + sessionId: '00000000-0000-4000-8000-000000000101', ordinal: 2, timestamp: '2026-03-10T10:01:00.000Z', }), @@ -980,7 +980,7 @@ describe('AgentChatView reload/restore behavior', () => { await act(async () => { await store.dispatch(loadAgentTurnBody({ sessionId: 'sess-reload-1', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', turnId: 'turn-7', })) }) @@ -1077,14 +1077,19 @@ describe('AgentChatView reload/restore behavior', () => { }) it('uses timelineSessionId from sdk.session.snapshot for visible restore hydration', async () => { - getAgentTimelinePage.mockResolvedValue({ sessionId: 'cli-sess-1', items: [], nextCursor: null, revision: 1 }) + getAgentTimelinePage.mockResolvedValue({ + sessionId: '00000000-0000-4000-8000-000000000101', + items: [], + nextCursor: null, + revision: 1, + }) const store = makeStore() store.dispatch(sessionSnapshotReceived({ sessionId: 'sess-reload-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', revision: 2, })) @@ -1096,7 +1101,7 @@ describe('AgentChatView reload/restore behavior', () => { await waitFor(() => { expect(getAgentTimelinePage).toHaveBeenCalledWith( - 'cli-sess-1', + '00000000-0000-4000-8000-000000000101', expect.objectContaining({ includeBodies: true, revision: 2 }), expect.anything(), ) @@ -1164,15 +1169,22 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sdk-sess-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-session-abc-123', + timelineSessionId: '00000000-0000-4000-8000-000000000201', revision: 2, })) }) - expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't1', 'p1')?.resumeSessionId).toBe('cli-session-abc-123') + expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't1', 'p1')?.sessionRef).toEqual({ + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000201', + }) const tab = store.getState().tabs.tabs.find((entry) => entry.id === 't1') - expect(tab?.resumeSessionId).toBe('cli-session-abc-123') - expect(tab?.sessionMetadataByKey?.['claude:cli-session-abc-123']).toEqual(expect.objectContaining({ + expect(tab?.sessionRef).toEqual({ + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000201', + }) + expect(tab?.resumeSessionId).toBeUndefined() + expect(tab?.sessionMetadataByKey?.['claude:00000000-0000-4000-8000-000000000201']).toEqual(expect.objectContaining({ sessionType: 'freshclaude', firstUserMessage: 'Continue from the old tab', })) @@ -1212,16 +1224,23 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sdk-shell-1', latestTurnId: 'turn-2', status: 'idle', - timelineSessionId: 'cli-shell-abc-123', + timelineSessionId: '00000000-0000-4000-8000-000000000202', revision: 2, })) }) - expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't-shell', 'p1')?.resumeSessionId).toBe('cli-shell-abc-123') + expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't-shell', 'p1')?.sessionRef).toEqual({ + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000202', + }) const tab = store.getState().tabs.tabs.find((entry) => entry.id === 't-shell') - expect(tab?.resumeSessionId).toBe('cli-shell-abc-123') + expect(tab?.sessionRef).toEqual({ + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000202', + }) + expect(tab?.resumeSessionId).toBeUndefined() expect(tab?.codingCliProvider).toBe('claude') - expect(tab?.sessionMetadataByKey?.['claude:cli-shell-abc-123']).toEqual(expect.objectContaining({ + expect(tab?.sessionMetadataByKey?.['claude:00000000-0000-4000-8000-000000000202']).toEqual(expect.objectContaining({ sessionType: 'freshclaude', firstUserMessage: 'Continue from shell fallback', })) @@ -1440,9 +1459,16 @@ describe('AgentChatView reload/restore behavior', () => { expect(screen.queryByText('Live-only full body')).not.toBeInTheDocument() expect(screen.getAllByText('Post-watermark live delta')).toHaveLength(1) - expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't-meta', 'p1')?.resumeSessionId).toBe(canonicalSessionId) + expect(getPaneContent(store as unknown as ReturnType<typeof makeStore>, 't-meta', 'p1')?.sessionRef).toEqual({ + provider: 'claude', + sessionId: canonicalSessionId, + }) const tab = store.getState().tabs.tabs.find((entry) => entry.id === 't-meta') - expect(tab?.resumeSessionId).toBe(canonicalSessionId) + expect(tab?.sessionRef).toEqual({ + provider: 'claude', + sessionId: canonicalSessionId, + }) + expect(tab?.resumeSessionId).toBeUndefined() expect(tab?.sessionMetadataByKey?.['claude:00000000-0000-4000-8000-000000000321']).toEqual(expect.objectContaining({ sessionType: 'freshclaude', firstUserMessage: 'Continue from metadata upgrade', @@ -1503,7 +1529,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sdk-sess-1', latestTurnId: 'turn-2', status: 'running', - timelineSessionId: 'cli-sess-1', + timelineSessionId: '00000000-0000-4000-8000-000000000101', streamingActive: true, streamingText: 'partial reply', })) @@ -1524,7 +1550,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sdk-sess-running', latestTurnId: 'turn-2', status: 'running', - timelineSessionId: 'cli-sess-running', + timelineSessionId: '00000000-0000-4000-8000-000000000301', streamingActive: true, streamingText: 'partial reply', })) @@ -1540,7 +1566,7 @@ describe('AgentChatView reload/restore behavior', () => { act(() => { store.dispatch(sessionInit({ sessionId: 'sdk-sess-running', - cliSessionId: 'cli-sess-running', + cliSessionId: '00000000-0000-4000-8000-000000000301', model: 'claude-opus-4-6', })) }) @@ -1558,7 +1584,7 @@ describe('AgentChatView reload/restore behavior', () => { sessionId: 'sdk-sess-2', latestTurnId: 'turn-3', status: 'running', - timelineSessionId: 'cli-sess-2', + timelineSessionId: '00000000-0000-4000-8000-000000000102', streamingActive: false, streamingText: 'partial reply', })) @@ -1893,14 +1919,18 @@ describe('AgentChatView server-restart recovery', () => { store.dispatch(sessionCreated({ requestId: 'req-1', sessionId: 'sdk-sess-1' })) store.dispatch(sessionInit({ sessionId: 'sdk-sess-1', - cliSessionId: 'cli-session-abc-123', + cliSessionId: '00000000-0000-4000-8000-000000000201', model: 'claude-opus-4-6', })) }) - // Pane content should now have resumeSessionId persisted + // Pane content should now have the durable Claude sessionRef persisted. const content = getPaneContent(store, 't1', 'p1') - expect(content?.resumeSessionId).toBe('cli-session-abc-123') + expect(content?.sessionRef).toEqual({ + provider: 'claude', + sessionId: '00000000-0000-4000-8000-000000000201', + }) + expect(content?.resumeSessionId).toBeUndefined() }) it('does not reset the pane or send sdk.create when restore remains pending past the legacy timeout window', () => { diff --git a/test/unit/client/components/agent-chat/AgentChatView.split-pane.test.tsx b/test/unit/client/components/agent-chat/AgentChatView.split-pane.test.tsx index 79751321a..45f1fa3db 100644 --- a/test/unit/client/components/agent-chat/AgentChatView.split-pane.test.tsx +++ b/test/unit/client/components/agent-chat/AgentChatView.split-pane.test.tsx @@ -386,7 +386,7 @@ describe('AgentChatView — split pane (Bug 2)', () => { await waitFor(() => { expect(getAgentTimelinePage).toHaveBeenCalledWith( - 'sess-1', + 'cli-abc', expect.objectContaining({ priority: 'visible', includeBodies: true, revision: 2 }), expect.objectContaining({ signal: expect.any(AbortSignal) }), ) @@ -709,7 +709,7 @@ describe('AgentChatView — split pane (Bug 2)', () => { await waitFor(() => { expect(getAgentTimelinePage).toHaveBeenCalledWith( - 'sess-1', + 'cli-abc', expect.objectContaining({ priority: 'visible', includeBodies: true, revision: 2 }), expect.objectContaining({ signal: expect.any(AbortSignal) }), ) @@ -723,7 +723,7 @@ describe('AgentChatView — split pane (Bug 2)', () => { await waitFor(() => { expect(getAgentTurnBody).toHaveBeenCalledWith( - 'sess-1', + 'cli-abc', 'turn-2', expect.objectContaining({ signal: expect.any(AbortSignal), revision: 2 }), ) diff --git a/test/unit/client/components/terminal-view-utils.test.ts b/test/unit/client/components/terminal-view-utils.test.ts index 8595dc23a..f5be683ca 100644 --- a/test/unit/client/components/terminal-view-utils.test.ts +++ b/test/unit/client/components/terminal-view-utils.test.ts @@ -54,4 +54,44 @@ describe('terminal-view-utils', () => { }, }) }) + + it('uses Codex durability state for create only when no durable sessionRef exists', () => { + const codexDurability = { + schemaVersion: 1 as const, + state: 'captured_pre_turn' as const, + candidate: { + provider: 'codex' as const, + candidateThreadId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + source: 'thread_started_notification' as const, + capturedAt: 1778743920000, + }, + } + const ref: { current: TerminalPaneContent | null } = { + current: { + kind: 'terminal', + createRequestId: 'req-3', + status: 'creating', + mode: 'codex', + shell: 'system', + codexDurability, + }, + } + + expect(getCreateSessionStateFromRef(ref)).toEqual({ codexDurability }) + + ref.current = { + ...ref.current, + sessionRef: { + provider: 'codex', + sessionId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + }, + } + expect(getCreateSessionStateFromRef(ref)).toEqual({ + sessionRef: { + provider: 'codex', + sessionId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + }, + }) + }) }) diff --git a/test/unit/client/lib/tab-registry-snapshot.test.ts b/test/unit/client/lib/tab-registry-snapshot.test.ts index 276422099..0c40cde96 100644 --- a/test/unit/client/lib/tab-registry-snapshot.test.ts +++ b/test/unit/client/lib/tab-registry-snapshot.test.ts @@ -37,6 +37,54 @@ describe('shouldKeepClosedTab', () => { }) describe('collectPaneSnapshots', () => { + it('serializes candidate-only Codex durability state for registry reopen surfaces', () => { + const codexDurability = { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: '019e2413-b8d0-7a98-b5fb-2f4af05baf58', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + source: 'thread_start_response', + capturedAt: 1778764200000, + }, + } as const + const node: PaneNode = { + type: 'leaf', + id: 'pane-codex', + content: { + kind: 'terminal', + createRequestId: 'req-codex', + status: 'running', + mode: 'codex', + shell: 'system', + terminalId: 'term-codex', + serverInstanceId: 'server-1', + codexDurability, + initialCwd: '/home/user/code/freshell', + }, + } + + const snapshots = collectPaneSnapshots(node, 'server-1') + + expect(snapshots).toEqual([{ + paneId: 'pane-codex', + kind: 'terminal', + title: undefined, + payload: { + mode: 'codex', + shell: 'system', + sessionRef: undefined, + codexDurability, + liveTerminal: { + terminalId: 'term-codex', + serverInstanceId: 'server-1', + }, + initialCwd: '/home/user/code/freshell', + }, + }]) + }) + it('serializes agent-chat selection strategies and explicit effort overrides', () => { const node: PaneNode = { type: 'leaf', diff --git a/test/unit/client/store/persistedState.test.ts b/test/unit/client/store/persistedState.test.ts index f85470e16..6bff37372 100644 --- a/test/unit/client/store/persistedState.test.ts +++ b/test/unit/client/store/persistedState.test.ts @@ -11,6 +11,18 @@ import { import { PERSIST_BROADCAST_CHANNEL_NAME } from '../../../../src/store/persistBroadcast' import { STORAGE_KEYS } from '../../../../src/store/storage-keys' +const codexDurability = { + schemaVersion: 1 as const, + state: 'captured_pre_turn' as const, + candidate: { + provider: 'codex' as const, + candidateThreadId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + source: 'thread_started_notification' as const, + capturedAt: 1778743920000, + }, +} + describe('persistedState parsers', () => { it('uses v2 namespaced storage and broadcast keys', () => { expect(TABS_STORAGE_KEY).toBe('freshell.tabs.v2') @@ -41,6 +53,26 @@ describe('persistedState parsers', () => { expect(parsed!.version).toBe(0) expect(parsed!.tabs.tabs[0].id).toBe('t1') }) + + it('preserves valid Codex durability state on tabs', () => { + const raw = JSON.stringify({ + version: TABS_SCHEMA_VERSION, + tabs: { + activeTabId: 't1', + tabs: [{ + id: 't1', + title: 'Codex', + createdAt: 1, + type: 'terminal', + mode: 'codex', + codexDurability, + }], + }, + }) + + const parsed = parsePersistedTabsRaw(raw) + expect(parsed?.tabs.tabs[0].codexDurability).toEqual(codexDurability) + }) }) describe('parsePersistedPanesRaw', () => { @@ -77,6 +109,33 @@ describe('persistedState parsers', () => { expect(Object.keys(parsed!.layouts)).toEqual(['tab-1']) }) + it('preserves valid Codex durability state on terminal pane content', () => { + const raw = JSON.stringify({ + version: PANES_SCHEMA_VERSION, + layouts: { + 'tab-1': { + type: 'leaf', + id: 'pane-1', + content: { + kind: 'terminal', + createRequestId: 'req-1', + status: 'creating', + mode: 'codex', + shell: 'system', + codexDurability, + }, + }, + }, + activePane: { 'tab-1': 'pane-1' }, + paneTitles: {}, + paneTitleSetByUser: {}, + }) + + const parsed = parsePersistedPanesRaw(raw) + const content = (parsed!.layouts['tab-1'] as any).content + expect(content.codexDurability).toEqual(codexDurability) + }) + it('normalizes legacy Codex recovery_failed panes to creating resume panes', () => { const parsed = parsePersistedPanesRaw(JSON.stringify({ version: 1, diff --git a/test/unit/client/store/selectors/sidebarSelectors.test.ts b/test/unit/client/store/selectors/sidebarSelectors.test.ts index 67ad7fc04..f53568437 100644 --- a/test/unit/client/store/selectors/sidebarSelectors.test.ts +++ b/test/unit/client/store/selectors/sidebarSelectors.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from 'vitest' import type { SidebarSessionItem } from '@/store/selectors/sidebarSelectors' -import type { ProjectGroup, CodingCliSession } from '@/store/types' +import type { ProjectGroup, CodingCliSession, BackgroundTerminal } from '@/store/types' import { buildSessionItems, @@ -390,6 +390,163 @@ describe('sidebarSelectors', () => { ]) }) + it('shows running Codex terminals with captured identity as non-restorable live rows', () => { + const terminals: BackgroundTerminal[] = [ + { + terminalId: 'term-codex-a', + title: 'Codex CLI', + createdAt: 2_000, + lastActivityAt: 2_100, + status: 'running', + hasClients: true, + cwd: '/repo', + mode: 'codex', + codexDurability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-candidate', + rolloutPath: '/home/user/.codex/sessions/rollout.jsonl', + source: 'thread_start_response', + capturedAt: 2_000, + }, + }, + }, + { + terminalId: 'term-codex-b', + title: 'Codex CLI', + createdAt: 2_050, + lastActivityAt: 2_200, + status: 'running', + hasClients: false, + cwd: '/repo', + mode: 'codex', + codexDurability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-candidate', + rolloutPath: '/home/user/.codex/sessions/rollout.jsonl', + source: 'thread_start_response', + capturedAt: 2_000, + }, + }, + }, + ] + + const items = buildSessionItems([], emptyTabs, emptyPanes, terminals, emptyActivity) + + expect(items).toEqual([ + expect.objectContaining({ + sessionId: 'thread-candidate', + provider: 'codex', + title: 'Codex CLI', + cwd: '/repo', + hasTab: false, + isRunning: true, + runningTerminalId: 'term-codex-a', + runningTerminalIds: ['term-codex-a', 'term-codex-b'], + isRestorable: false, + codexDurabilityState: 'captured_pre_turn', + isFallback: true, + }), + ]) + }) + + it('shows durable Codex terminal identity as restorable even before the server window includes history', () => { + const terminals: BackgroundTerminal[] = [ + { + terminalId: 'term-codex-durable', + title: 'Codex CLI', + createdAt: 2_000, + lastActivityAt: 2_100, + status: 'running', + hasClients: true, + cwd: '/repo', + mode: 'codex', + codexDurability: { + schemaVersion: 1, + state: 'durable', + durableThreadId: 'thread-durable', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-durable', + rolloutPath: '/home/user/.codex/sessions/rollout.jsonl', + source: 'thread_start_response', + capturedAt: 2_000, + }, + turnCompletedAt: 2_050, + }, + }, + ] + + const items = buildSessionItems([], emptyTabs, emptyPanes, terminals, emptyActivity) + + expect(items).toEqual([ + expect.objectContaining({ + sessionId: 'thread-durable', + provider: 'codex', + hasTab: false, + isRunning: true, + runningTerminalId: 'term-codex-durable', + isRestorable: true, + codexDurabilityState: 'durable', + }), + ]) + }) + + it('shows persisted Codex pane identity without treating it as a durable resume target', () => { + const tabs = [ + { id: 'tab-codex', title: 'Current Codex', mode: 'codex', createdAt: 2_000 }, + ] as any + const panes = { + layouts: { + 'tab-codex': { + type: 'leaf', + id: 'pane-codex', + content: { + kind: 'terminal', + mode: 'codex', + status: 'running', + createRequestId: 'req-codex', + initialCwd: '/repo', + codexDurability: { + schemaVersion: 1, + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-pre-durable', + rolloutPath: '/home/user/.codex/sessions/rollout.jsonl', + source: 'restored_client_state', + capturedAt: 2_000, + }, + }, + }, + }, + }, + activePane: { + 'tab-codex': 'pane-codex', + }, + } as any + + const items = buildSessionItems([], tabs, panes, emptyTerminals, emptyActivity) + + expect(items).toEqual([ + expect.objectContaining({ + sessionId: 'thread-pre-durable', + provider: 'codex', + title: 'Current Codex', + cwd: '/repo', + hasTab: true, + isRunning: false, + isRestorable: false, + codexDurabilityState: 'captured_pre_turn', + }), + ]) + }) + it('marks synthesized rows as fallback-only while leaving server-backed rows unmarked', () => { const fallback = createFallbackTab('tab-restored', 'codex-restored', 'Restored Session', '/tmp/restored-project') const items = buildSessionItems( diff --git a/test/unit/server/coding-cli/claude-provider.test.ts b/test/unit/server/coding-cli/claude-provider.test.ts index b5a953893..04d343e61 100644 --- a/test/unit/server/coding-cli/claude-provider.test.ts +++ b/test/unit/server/coding-cli/claude-provider.test.ts @@ -16,7 +16,7 @@ import { getClaudeHome } from '../../../../server/claude-home' import { looksLikePath } from '../../../../server/coding-cli/utils' const VALID_CLAUDE_SESSION_ID = '550e8400-e29b-41d4-a716-446655440000' -const SESSION_A = '11111111-1111-1111-1111-111111111111' +const SESSION_A = '11111111-1111-4111-8111-111111111111' const SESSION_B = '22222222-2222-2222-2222-222222222222' const SESSION_C = '33333333-3333-3333-3333-333333333333' const SESSION_D = '44444444-4444-4444-4444-444444444444' diff --git a/test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts b/test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts new file mode 100644 index 000000000..e5985260c --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/durability-proof.test.ts @@ -0,0 +1,81 @@ +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { proofCodexRollout } from '../../../../../server/coding-cli/codex-app-server/durability-proof.js' + +let tempDir: string + +beforeEach(async () => { + tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-proof-')) +}) + +afterEach(async () => { + await fsp.rm(tempDir, { recursive: true, force: true }) +}) + +async function writeRollout(name: string, content: string): Promise<string> { + const filePath = path.join(tempDir, name) + await fsp.writeFile(filePath, content, 'utf8') + return filePath +} + +describe('proofCodexRollout', () => { + it('succeeds when the first JSONL record is matching session_meta', async () => { + const filePath = await writeRollout( + 'rollout.jsonl', + '{"type":"session_meta","payload":{"id":"thread-1","timestamp":"2026-05-14T00:00:00Z"}}\n{"type":"event_msg"}\n', + ) + + await expect(proofCodexRollout({ + rolloutPath: filePath, + candidateThreadId: 'thread-1', + })).resolves.toMatchObject({ + ok: true, + rolloutProofId: 'thread-1', + }) + }) + + it.each([ + ['missing', async () => path.join(tempDir, 'missing.jsonl')], + ['not_regular_file', async () => tempDir], + ['empty', async () => writeRollout('empty.jsonl', '')], + ['malformed_json', async () => writeRollout('malformed.jsonl', '{"type":')], + ['wrong_record_type', async () => writeRollout('wrong-type.jsonl', '{"type":"event_msg","payload":{"id":"thread-1"}}\n')], + ['missing_payload_id', async () => writeRollout('missing-id.jsonl', '{"type":"session_meta","payload":{}}\n')], + ['mismatched_thread_id', async () => writeRollout('mismatch.jsonl', '{"type":"session_meta","payload":{"id":"other"}}\n')], + ] as const)('returns %s for invalid proof files', async (reason, makePath) => { + await expect(proofCodexRollout({ + rolloutPath: await makePath(), + candidateThreadId: 'thread-1', + })).resolves.toMatchObject({ + ok: false, + reason, + }) + }) + + it('requires the first record to match instead of scanning later records', async () => { + const filePath = await writeRollout( + 'later-match.jsonl', + '{"type":"event_msg","payload":{"id":"noise"}}\n{"type":"session_meta","payload":{"id":"thread-1"}}\n', + ) + + await expect(proofCodexRollout({ + rolloutPath: filePath, + candidateThreadId: 'thread-1', + })).resolves.toMatchObject({ + ok: false, + reason: 'wrong_record_type', + }) + }) + + it('rejects relative rollout paths', async () => { + await expect(proofCodexRollout({ + rolloutPath: 'relative/rollout.jsonl', + candidateThreadId: 'thread-1', + })).resolves.toMatchObject({ + ok: false, + reason: 'invalid_path', + }) + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/durability-store.test.ts b/test/unit/server/coding-cli/codex-app-server/durability-store.test.ts new file mode 100644 index 000000000..d5e1d2971 --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/durability-store.test.ts @@ -0,0 +1,178 @@ +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { + CodexDurabilityRestoreAmbiguousError, + CodexDurabilityStore, +} from '../../../../../server/coding-cli/codex-app-server/durability-store.js' +import type { CodexDurabilityStoreRecord } from '../../../../../shared/codex-durability.js' + +let tempDir: string + +beforeEach(async () => { + tempDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-store-')) +}) + +afterEach(async () => { + await fsp.rm(tempDir, { recursive: true, force: true }) +}) + +function record(overrides: Partial<CodexDurabilityStoreRecord> = {}): CodexDurabilityStoreRecord { + const now = Date.now() + return { + schemaVersion: 1, + terminalId: 'term-1', + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-1', + state: 'captured_pre_turn', + candidate: { + provider: 'codex', + candidateThreadId: 'thread-1', + rolloutPath: path.join(tempDir, 'rollout.jsonl'), + source: 'thread_start_response', + capturedAt: now, + }, + updatedAt: now, + ...overrides, + } +} + +async function writeRawRecordFile(terminalId: string, content: string): Promise<void> { + await fsp.writeFile(path.join(tempDir, `${encodeURIComponent(terminalId)}.json`), content) +} + +describe('CodexDurabilityStore', () => { + it('atomically writes and reads a record', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + const written = await store.write(record()) + + await expect(store.read('term-1')).resolves.toEqual(written) + }) + + it('treats a duplicate matching candidate as idempotent', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + const first = record() + await store.write(first) + const second = record({ state: 'turn_in_progress_unproven', updatedAt: first.updatedAt + 1 }) + + await expect(store.write(second)).resolves.toEqual(second) + }) + + it('rejects a mismatched candidate for the same terminal', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + await store.write(record()) + + await expect(store.write(record({ + candidate: { + provider: 'codex', + candidateThreadId: 'thread-2', + rolloutPath: path.join(tempDir, 'other.jsonl'), + source: 'thread_start_response', + capturedAt: Date.now(), + }, + }))).rejects.toThrow(/candidate mismatch/) + }) + + it('returns undefined for older layouts with no durability store record', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + + await expect(store.read('legacy-terminal')).resolves.toBeUndefined() + }) + + it('finds restore records by terminal id', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + const stored = await store.write(record()) + + await expect(store.readForRestoreLocator({ terminalId: 'term-1' })).resolves.toEqual(stored) + }) + + it('finds restore records by exact tab and pane identity', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + const stored = await store.write(record()) + + await expect(store.readForRestoreLocator({ + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-1', + })).resolves.toEqual(stored) + }) + + it('skips bad records during tab and pane restore scans', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + const stored = await store.write(record()) + await writeRawRecordFile('malformed-record', '{not-json') + await writeRawRecordFile('schema-invalid-record', JSON.stringify({ + schemaVersion: 1, + terminalId: 'schema-invalid-record', + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-1', + state: 'not-a-durability-state', + updatedAt: Date.now(), + })) + await fsp.mkdir(path.join(tempDir, `${encodeURIComponent('directory-record')}.json`)) + + await expect(store.readForRestoreLocator({ + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-1', + })).resolves.toEqual(stored) + }) + + it('keeps exact terminal id restore lookups strict for bad records', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + await writeRawRecordFile('malformed-record', '{not-json') + await writeRawRecordFile('schema-invalid-record', JSON.stringify({ + schemaVersion: 1, + terminalId: 'schema-invalid-record', + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-1', + state: 'not-a-durability-state', + updatedAt: Date.now(), + })) + + await expect(store.readForRestoreLocator({ terminalId: 'malformed-record' })).rejects.toThrow(SyntaxError) + await expect(store.readForRestoreLocator({ terminalId: 'schema-invalid-record' })) + .rejects.toThrow(/invalid for terminal schema-invalid-record/) + }) + + it('does not match a wrong pane or server instance', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + await store.write(record()) + + await expect(store.readForRestoreLocator({ + tabId: 'tab-1', + paneId: 'pane-other', + serverInstanceId: 'srv-1', + })).resolves.toBeUndefined() + await expect(store.readForRestoreLocator({ + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-other', + })).resolves.toBeUndefined() + }) + + it('reports ambiguity instead of choosing by time', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + await store.write(record({ terminalId: 'term-1' })) + await store.write(record({ terminalId: 'term-2', updatedAt: Date.now() + 10 })) + await writeRawRecordFile('malformed-record', '{not-json') + + await expect(store.readForRestoreLocator({ + tabId: 'tab-1', + paneId: 'pane-1', + })).rejects.toBeInstanceOf(CodexDurabilityRestoreAmbiguousError) + }) + + it('deletes records idempotently', async () => { + const store = new CodexDurabilityStore({ dir: tempDir }) + await store.write(record()) + + await expect(store.delete('term-1')).resolves.toBeUndefined() + await expect(store.delete('term-1')).resolves.toBeUndefined() + await expect(store.read('term-1')).resolves.toBeUndefined() + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts b/test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts index 8e8061986..2fdf0b05b 100644 --- a/test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts +++ b/test/unit/server/coding-cli/codex-app-server/launch-planner.test.ts @@ -13,10 +13,13 @@ function deferred<T = void>() { class FakeRuntime { shutdownCalls = 0 + ensureReadyCalls = 0 startThreadCalls = 0 adopted: Array<{ terminalId: string; generation: number }> = [] loadedThreadListCalls = 0 adoptError?: Error + ensureReadyBlocker?: Promise<void> + ensureReadyError?: Error startThreadBlocker?: Promise<void> shutdownBlocker?: Promise<void> shutdownError?: Error @@ -29,6 +32,9 @@ class FakeRuntime { ) {} async ensureReady() { + this.ensureReadyCalls += 1 + await this.ensureReadyBlocker + if (this.ensureReadyError) throw this.ensureReadyError return { wsUrl: this.wsUrl, processPid: 100, @@ -82,8 +88,14 @@ describe('CodexLaunchPlanner', () => { const second = await planner.planCreate({ cwd: '/repo/two' }) expect(runtimes).toHaveLength(2) - expect(first.remote.wsUrl).toBe('ws://127.0.0.1:43001') - expect(second.remote.wsUrl).toBe('ws://127.0.0.1:43002') + expect(first.remote.wsUrl).toMatch(/^ws:\/\/127\.0\.0\.1:\d+$/) + expect(second.remote.wsUrl).toMatch(/^ws:\/\/127\.0\.0\.1:\d+$/) + expect(first.remote.wsUrl).not.toBe('ws://127.0.0.1:43001') + expect(second.remote.wsUrl).not.toBe('ws://127.0.0.1:43002') + expect(first.sessionId).toBeUndefined() + expect(second.sessionId).toBeUndefined() + expect(runtimes[0].startThreadCalls).toBe(0) + expect(runtimes[1].startThreadCalls).toBe(0) await first.sidecar.adopt({ terminalId: 'term-one', generation: 1 }) await second.sidecar.shutdown() @@ -91,10 +103,12 @@ describe('CodexLaunchPlanner', () => { expect(runtimes[0].adopted).toEqual([{ terminalId: 'term-one', generation: 1 }]) expect(runtimes[0].shutdownCalls).toBe(0) expect(runtimes[1].shutdownCalls).toBe(1) + await first.sidecar.shutdown() }) it('shuts down the owned sidecar when planning fails before adoption', async () => { - const runtime = new FakeRuntime('ws://127.0.0.1:43010', 'thread-fail', new Error('start failed')) + const runtime = new FakeRuntime('ws://127.0.0.1:43010', 'thread-fail') + runtime.ensureReadyError = new Error('start failed') const planner = new CodexLaunchPlanner(() => runtime as any) await expect(planner.planCreate({ cwd: '/repo/fail' })).rejects.toThrow('start failed') @@ -103,7 +117,8 @@ describe('CodexLaunchPlanner', () => { }) it('marks planning cleanup teardown failures as sidecar teardown failures', async () => { - const runtime = new FakeRuntime('ws://127.0.0.1:43022', 'thread-fail', new Error('start failed')) + const runtime = new FakeRuntime('ws://127.0.0.1:43022', 'thread-fail') + runtime.ensureReadyError = new Error('start failed') runtime.shutdownError = new Error('verified runtime teardown failed') const planner = new CodexLaunchPlanner(() => runtime as any) @@ -178,19 +193,19 @@ describe('CodexLaunchPlanner', () => { await expect(planner.planCreate({ cwd: '/repo/after-shutdown-complete' })).rejects.toThrow(/shutting down/i) }) - it('rejects and cleans up an in-flight launch plan when shutdown starts before thread creation returns', async () => { + it('rejects and cleans up an in-flight launch plan when shutdown starts before readiness returns', async () => { const runtime = new FakeRuntime('ws://127.0.0.1:43018', 'thread-after-shutdown') - const startThreadGate = deferred() - runtime.startThreadBlocker = startThreadGate.promise + const readinessGate = deferred() + runtime.ensureReadyBlocker = readinessGate.promise const planner = new CodexLaunchPlanner(() => runtime as any) const plan = planner.planCreate({ cwd: '/repo/in-flight' }) - await vi.waitFor(() => expect(runtime.startThreadCalls).toBe(1)) + await vi.waitFor(() => expect(runtime.ensureReadyCalls).toBe(1)) const shutdown = planner.shutdown() await vi.waitFor(() => expect(runtime.shutdownCalls).toBe(1)) - startThreadGate.resolve() + readinessGate.resolve() await expect(plan).rejects.toThrow(/shutting down/i) await expect(shutdown).resolves.toBeUndefined() @@ -266,9 +281,10 @@ describe('CodexLaunchPlanner', () => { const second = await planner.planCreate({ cwd: '/repo/two' }) - expect(second.sessionId).toBe('thread-2') + expect(second.sessionId).toBeUndefined() expect(runtimes).toHaveLength(2) expect(runtimes[0].shutdownCalls).toBe(3) + expect(runtimes[1].startThreadCalls).toBe(0) }) it('waits for every planner-owned sidecar shutdown before reporting a teardown failure', async () => { @@ -299,7 +315,7 @@ describe('CodexLaunchPlanner', () => { await expect(shutdown).rejects.toThrow('fast verified runtime teardown failed') }) - it('waits for candidate-local loaded-thread readiness', async () => { + it('does not poll loaded-thread state for resume plans', async () => { const runtime = new FakeRuntime( 'ws://127.0.0.1:43020', 'thread-ready', @@ -310,24 +326,7 @@ describe('CodexLaunchPlanner', () => { const plan = await planner.planCreate({ resumeSessionId: 'thread-ready' }) - await expect(plan.sidecar.waitForLoadedThread('thread-ready', { timeoutMs: 1_000, pollMs: 1 })) - .resolves.toBeUndefined() - expect(runtime.loadedThreadListCalls).toBe(3) - }) - - it('stops loaded-thread readiness polling after sidecar shutdown starts', async () => { - const runtime = new FakeRuntime('ws://127.0.0.1:43021', 'thread-never-loads') - const planner = new CodexLaunchPlanner(() => runtime as any) - - const plan = await planner.planCreate({ resumeSessionId: 'thread-never-loads' }) - const readiness = plan.sidecar.waitForLoadedThread('thread-never-loads', { timeoutMs: 250, pollMs: 20 }) - await vi.waitFor(() => expect(runtime.loadedThreadListCalls).toBeGreaterThan(0)) - - await plan.sidecar.shutdown() - await expect(readiness).rejects.toThrow(/shutting down/i) - - const callsAfterShutdown = runtime.loadedThreadListCalls - await new Promise((resolve) => setTimeout(resolve, 50)) - expect(runtime.loadedThreadListCalls).toBe(callsAfterShutdown) + expect(plan.sessionId).toBe('thread-ready') + expect(runtime.loadedThreadListCalls).toBe(0) }) }) diff --git a/test/unit/server/coding-cli/codex-app-server/launch-retry.test.ts b/test/unit/server/coding-cli/codex-app-server/launch-retry.test.ts new file mode 100644 index 000000000..b42c30f5c --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/launch-retry.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it, vi } from 'vitest' + +import { CodexLaunchConfigError } from '../../../../../server/coding-cli/codex-launch-config.js' +import { planCodexLaunchWithRetry } from '../../../../../server/coding-cli/codex-app-server/launch-retry.js' + +describe('planCodexLaunchWithRetry', () => { + it('retries transient launch-planning failures with linear backoff', async () => { + const plan = { sidecar: { shutdown: vi.fn() } } + const planner = { + planCreate: vi.fn() + .mockRejectedValueOnce(new Error('sidecar not ready')) + .mockRejectedValueOnce(new Error('port not ready')) + .mockResolvedValue(plan), + } + const logger = { warn: vi.fn() } + + await expect(planCodexLaunchWithRetry({ + planner: planner as any, + input: { cwd: '/workspace' } as any, + retryDelayMs: 1, + logger, + })).resolves.toBe(plan) + + expect(planner.planCreate).toHaveBeenCalledTimes(3) + expect(logger.warn).toHaveBeenNthCalledWith(1, expect.objectContaining({ + attempt: 1, + attempts: 5, + delayMs: 1, + cwd: '/workspace', + hasResumeSessionId: false, + }), 'Codex launch planning failed; retrying') + expect(logger.warn).toHaveBeenNthCalledWith(2, expect.objectContaining({ + attempt: 2, + attempts: 5, + delayMs: 2, + }), 'Codex launch planning failed; retrying') + }) + + it('does not retry configuration errors', async () => { + const planner = { + planCreate: vi.fn().mockRejectedValue(new CodexLaunchConfigError('Codex is disabled')), + } + + await expect(planCodexLaunchWithRetry({ + planner: planner as any, + input: { cwd: '/workspace' } as any, + retryDelayMs: 1, + })).rejects.toThrow('Codex is disabled') + + expect(planner.planCreate).toHaveBeenCalledTimes(1) + }) + + it('wraps non-Error failures after attempts are exhausted', async () => { + const planner = { + planCreate: vi.fn().mockRejectedValue('temporary failure'), + } + + await expect(planCodexLaunchWithRetry({ + planner: planner as any, + input: { cwd: '/workspace', resumeSessionId: 'thread-1' } as any, + attempts: 2, + retryDelayMs: 1, + })).rejects.toThrow('temporary failure') + + expect(planner.planCreate).toHaveBeenCalledTimes(2) + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/legacy-sidecar-dead-code.test.ts b/test/unit/server/coding-cli/codex-app-server/legacy-sidecar-dead-code.test.ts new file mode 100644 index 000000000..c8c642ad6 --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/legacy-sidecar-dead-code.test.ts @@ -0,0 +1,15 @@ +import fs from 'node:fs' +import path from 'node:path' + +import { describe, expect, it } from 'vitest' + +const repoRoot = path.resolve(__dirname, '../../../../..') + +describe('Codex app-server sidecar production surface', () => { + it('does not keep the legacy polling sidecar modules alongside the launch-planner path', () => { + expect(fs.existsSync(path.join(repoRoot, 'server/coding-cli/codex-app-server/sidecar.ts'))).toBe(false) + expect(fs.existsSync(path.join(repoRoot, 'server/coding-cli/codex-app-server/durable-rollout-tracker.ts'))).toBe(false) + expect(fs.existsSync(path.join(repoRoot, 'test/unit/server/coding-cli/codex-app-server/sidecar.test.ts'))).toBe(false) + expect(fs.existsSync(path.join(repoRoot, 'test/unit/server/coding-cli/codex-app-server/durable-rollout-tracker.test.ts'))).toBe(false) + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts b/test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts new file mode 100644 index 000000000..9daeac6be --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/remote-proxy.test.ts @@ -0,0 +1,339 @@ +import WebSocket, { WebSocketServer } from 'ws' +import { afterEach, describe, expect, it } from 'vitest' +import { allocateLocalhostPort } from '../../../../../server/local-port.js' +import { CodexRemoteProxy } from '../../../../../server/coding-cli/codex-app-server/remote-proxy.js' + +type UpstreamHandle = { + server: WebSocketServer + wsUrl: string + messages: unknown[] + binaryFlags: boolean[] + sockets: Set<WebSocket> +} + +const upstreams = new Set<UpstreamHandle>() +const proxies = new Set<CodexRemoteProxy>() + +afterEach(async () => { + await Promise.all([...proxies].map(async (proxy) => { + proxies.delete(proxy) + await proxy.close() + })) + await Promise.all([...upstreams].map(async (upstream) => { + upstreams.delete(upstream) + for (const socket of upstream.sockets) socket.close() + await new Promise<void>((resolve) => upstream.server.close(() => resolve())) + })) +}) + +async function startUpstream(handler?: (socket: WebSocket, message: any) => void): Promise<UpstreamHandle> { + const endpoint = await allocateLocalhostPort() + const sockets = new Set<WebSocket>() + const messages: unknown[] = [] + const binaryFlags: boolean[] = [] + const server = await new Promise<WebSocketServer>((resolve) => { + const wss = new WebSocketServer({ host: endpoint.hostname, port: endpoint.port }, () => resolve(wss)) + wss.on('connection', (socket) => { + sockets.add(socket) + socket.on('close', () => sockets.delete(socket)) + socket.on('message', (raw, isBinary) => { + binaryFlags.push(isBinary) + const message = JSON.parse(raw.toString()) + messages.push(message) + handler?.(socket, message) + }) + }) + }) + const handle = { + server, + wsUrl: `ws://${endpoint.hostname}:${endpoint.port}`, + messages, + binaryFlags, + sockets, + } + upstreams.add(handle) + return handle +} + +async function startProxy(upstreamWsUrl: string, options: { + requestHoldTimeoutMs?: number + candidateCaptureTimeoutMs?: number + requireCandidatePersistence?: boolean +} = {}): Promise<CodexRemoteProxy> { + const proxy = new CodexRemoteProxy({ upstreamWsUrl, ...options }) + await proxy.start() + proxies.add(proxy) + return proxy +} + +async function connect(wsUrl: string): Promise<WebSocket> { + const socket = new WebSocket(wsUrl) + await new Promise<void>((resolve, reject) => { + socket.once('open', () => resolve()) + socket.once('error', reject) + }) + return socket +} + +function nextMessage(socket: WebSocket): Promise<any> { + return new Promise((resolve) => { + socket.once('message', (raw) => resolve(JSON.parse(raw.toString()))) + }) +} + +function nextMessageFrame(socket: WebSocket): Promise<{ message: any; isBinary: boolean }> { + return new Promise((resolve) => { + socket.once('message', (raw, isBinary) => resolve({ + message: JSON.parse(raw.toString()), + isBinary, + })) + }) +} + +function socketClosed(socket: WebSocket): Promise<void> { + return new Promise((resolve) => { + if (socket.readyState === WebSocket.CLOSED) { + resolve() + return + } + socket.once('close', () => resolve()) + }) +} + +function delay(ms: number): Promise<void> { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +describe('CodexRemoteProxy', () => { + it('captures a fresh candidate from the thread/start response and forwards the response', async () => { + const upstream = await startUpstream((socket, message) => { + if (message.method === 'thread/start') { + socket.send(JSON.stringify({ + id: message.id, + result: { + thread: { + id: 'thread-1', + path: '/tmp/codex/rollout.jsonl', + ephemeral: false, + }, + }, + })) + } + }) + const proxy = await startProxy(upstream.wsUrl) + const candidates: unknown[] = [] + proxy.onCandidate((candidate) => { + candidates.push(candidate) + proxy.markCandidatePersisted() + }) + const tui = await connect(proxy.wsUrl) + const responsePromise = nextMessageFrame(tui) + + tui.send(JSON.stringify({ id: 1, method: 'thread/start', params: {} })) + + await expect(responsePromise).resolves.toMatchObject({ + isBinary: false, + message: { + id: 1, + result: { + thread: { + id: 'thread-1', + path: '/tmp/codex/rollout.jsonl', + }, + }, + }, + }) + expect(upstream.binaryFlags).toEqual([false]) + expect(candidates).toEqual([ + { + source: 'thread_start_response', + thread: { + id: 'thread-1', + path: '/tmp/codex/rollout.jsonl', + ephemeral: false, + }, + }, + ]) + }) + + it('captures a candidate from thread/started notification', async () => { + const upstream = await startUpstream((socket, message) => { + if (message.method === 'initialize') { + socket.send(JSON.stringify({ id: message.id, result: {} })) + socket.send(JSON.stringify({ + method: 'thread/started', + params: { + thread: { + id: 'thread-notified', + path: '/tmp/codex/notified.jsonl', + }, + }, + })) + } + }) + const proxy = await startProxy(upstream.wsUrl) + const candidate = new Promise((resolve) => { + proxy.onCandidate((event) => { + proxy.markCandidatePersisted() + resolve(event) + }) + }) + const tui = await connect(proxy.wsUrl) + + tui.send(JSON.stringify({ id: 1, method: 'initialize', params: {} })) + + await expect(candidate).resolves.toEqual({ + source: 'thread_started_notification', + thread: { + id: 'thread-notified', + path: '/tmp/codex/notified.jsonl', + ephemeral: false, + }, + }) + }) + + it('holds turn/start until candidate persistence is marked complete', async () => { + const upstream = await startUpstream((socket, message) => { + if (message.method === 'turn/start') { + socket.send(JSON.stringify({ id: message.id, result: { ok: true } })) + } + }) + const proxy = await startProxy(upstream.wsUrl, { candidateCaptureTimeoutMs: 1_000 }) + const tui = await connect(proxy.wsUrl) + const responsePromise = nextMessage(tui) + + tui.send(JSON.stringify({ id: 7, method: 'turn/start', params: { threadId: 'thread-1' } })) + await new Promise((resolve) => setTimeout(resolve, 25)) + expect(upstream.messages).toHaveLength(0) + + proxy.markCandidatePersisted() + + await expect(responsePromise).resolves.toEqual({ id: 7, result: { ok: true } }) + expect(upstream.messages).toEqual([ + { id: 7, method: 'turn/start', params: { threadId: 'thread-1' } }, + ]) + }) + + it('fails held turn/start and closes sockets when candidate persistence times out', async () => { + const upstream = await startUpstream() + const proxy = await startProxy(upstream.wsUrl, { + requestHoldTimeoutMs: 20, + candidateCaptureTimeoutMs: 1_000, + }) + const repairTriggers: unknown[] = [] + proxy.onRepairTrigger((event) => repairTriggers.push(event)) + const tui = await connect(proxy.wsUrl) + const responsePromise = nextMessage(tui) + + tui.send(JSON.stringify({ id: 9, method: 'turn/start', params: { threadId: 'thread-1' } })) + + await expect(responsePromise).resolves.toMatchObject({ + id: 9, + error: { + code: -32000, + message: expect.stringContaining('persist Codex restore identity'), + }, + }) + await socketClosed(tui) + expect(upstream.messages).toHaveLength(0) + expect(repairTriggers).toContainEqual({ kind: 'candidate_capture_timeout' }) + }) + + it('does not hold turn/start or arm candidate-capture timeout when candidate persistence is not required', async () => { + const upstream = await startUpstream((socket, message) => { + if (message.method === 'turn/start') { + socket.send(JSON.stringify({ id: message.id, result: { ok: true } })) + } + }) + const proxy = await startProxy(upstream.wsUrl, { + requestHoldTimeoutMs: 20, + candidateCaptureTimeoutMs: 20, + requireCandidatePersistence: false, + }) + const repairTriggers: unknown[] = [] + proxy.onRepairTrigger((event) => repairTriggers.push(event)) + const tui = await connect(proxy.wsUrl) + const responsePromise = nextMessage(tui) + + tui.send(JSON.stringify({ id: 11, method: 'turn/start', params: { threadId: 'durable-thread-1' } })) + + await expect(responsePromise).resolves.toEqual({ id: 11, result: { ok: true } }) + expect(upstream.messages).toEqual([ + { id: 11, method: 'turn/start', params: { threadId: 'durable-thread-1' } }, + ]) + await new Promise((resolve) => setTimeout(resolve, 50)) + expect(tui.readyState).toBe(WebSocket.OPEN) + expect(repairTriggers).toEqual([]) + }) + + it('closes an idle TUI when candidate capture times out before user input', async () => { + const upstream = await startUpstream() + const proxy = await startProxy(upstream.wsUrl, { + candidateCaptureTimeoutMs: 20, + }) + const repairTriggers: unknown[] = [] + proxy.onRepairTrigger((event) => repairTriggers.push(event)) + const tui = await connect(proxy.wsUrl) + + await socketClosed(tui) + expect(upstream.messages).toHaveLength(0) + expect(repairTriggers).toContainEqual({ kind: 'candidate_capture_timeout' }) + }) + + it('times out candidate capture even when the TUI never connects to the proxy', async () => { + const upstream = await startUpstream() + const proxy = await startProxy(upstream.wsUrl, { + candidateCaptureTimeoutMs: 20, + }) + const repairTriggers: unknown[] = [] + proxy.onRepairTrigger((event) => repairTriggers.push(event)) + + await delay(50) + + expect(upstream.messages).toHaveLength(0) + expect(repairTriggers).toContainEqual({ kind: 'candidate_capture_timeout' }) + }) + + it('does not arm the no-client candidate-capture timeout for durable resumes', async () => { + const upstream = await startUpstream() + const proxy = await startProxy(upstream.wsUrl, { + candidateCaptureTimeoutMs: 20, + requireCandidatePersistence: false, + }) + const repairTriggers: unknown[] = [] + proxy.onRepairTrigger((event) => repairTriggers.push(event)) + + await delay(50) + + expect(upstream.messages).toHaveLength(0) + expect(repairTriggers).toEqual([]) + }) + + it('emits turn/completed notifications', async () => { + const upstream = await startUpstream((socket, message) => { + if (message.method === 'initialize') { + socket.send(JSON.stringify({ id: message.id, result: {} })) + socket.send(JSON.stringify({ + method: 'turn/completed', + params: { threadId: 'thread-1', turnId: 'turn-1', status: 'completed' }, + })) + } + }) + const proxy = await startProxy(upstream.wsUrl) + const completed = new Promise((resolve) => { + proxy.onTurnCompleted((event) => { + proxy.markCandidatePersisted() + resolve(event) + }) + }) + const tui = await connect(proxy.wsUrl) + + tui.send(JSON.stringify({ id: 1, method: 'initialize', params: {} })) + + await expect(completed).resolves.toEqual({ + threadId: 'thread-1', + turnId: 'turn-1', + params: { threadId: 'thread-1', turnId: 'turn-1', status: 'completed' }, + }) + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/restore-decision.test.ts b/test/unit/server/coding-cli/codex-app-server/restore-decision.test.ts new file mode 100644 index 000000000..8b5abe3a3 --- /dev/null +++ b/test/unit/server/coding-cli/codex-app-server/restore-decision.test.ts @@ -0,0 +1,279 @@ +import { describe, expect, it, vi } from 'vitest' +import { CODEX_DURABILITY_SCHEMA_VERSION, type CodexCandidateIdentity, type CodexDurabilityRef } from '../../../../../shared/codex-durability.js' +import { + INVALID_RAW_CODEX_RESUME_MESSAGE, + MISSING_CODEX_SESSION_REF_MESSAGE, + planCodexCreateRestoreDecision, + resolveCodexCreateRestoreDecision, + type CodexLiveRestoreTerminal, +} from '../../../../../server/coding-cli/codex-app-server/restore-decision.js' +import type { CodexRolloutProofResult } from '../../../../../server/coding-cli/codex-app-server/durability-proof.js' + +const candidate: CodexCandidateIdentity = { + provider: 'codex', + candidateThreadId: 'thread-1', + rolloutPath: '/tmp/freshell-codex/rollout.jsonl', + source: 'restored_client_state', + capturedAt: 1, +} + +const durability: CodexDurabilityRef = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durability_unproven_after_completion', + candidate, + turnCompletedAt: 2, +} + +const durableDurability: CodexDurabilityRef = { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + candidate, + durableThreadId: 'thread-durable', + turnCompletedAt: 3, +} + +const proofOk: CodexRolloutProofResult = { + ok: true, + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, + rolloutProofId: candidate.candidateThreadId, +} + +const proofMissing: CodexRolloutProofResult = { + ok: false, + reason: 'missing', + message: 'Codex rollout proof file does not exist.', + candidateThreadId: candidate.candidateThreadId, + rolloutPath: candidate.rolloutPath, +} + +describe('Codex create/restore decision', () => { + it('rejects restore requests that only provide a raw legacy resume id', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: true, + legacyResumeSessionId: 'thread-raw', + })).toEqual({ + kind: 'reject_invalid_raw_codex_resume_request', + code: 'INVALID_MESSAGE', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + }) + + it('rejects non-restore creates that provide a raw legacy Codex resume id', () => { + expect(planCodexCreateRestoreDecision({ + legacyResumeSessionId: 'thread-raw', + })).toEqual({ + kind: 'reject_invalid_raw_codex_resume_request', + code: 'INVALID_MESSAGE', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + }) + + it('rejects restore requests without sessionRef, durable ref, or candidate', () => { + expect(planCodexCreateRestoreDecision({ restoreRequested: true })).toEqual({ + kind: 'reject_missing_codex_session_ref', + code: 'RESTORE_UNAVAILABLE', + message: MISSING_CODEX_SESSION_REF_MESSAGE, + }) + }) + + it('routes canonical sessionRef restores without using candidate proof', async () => { + const proofRollout = vi.fn(async () => proofOk) + + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + legacyResumeSessionId: 'thread-raw', + sessionRef: { provider: 'codex', sessionId: 'thread-durable' }, + codexDurability: durability, + proofRollout, + }) + + expect(decision).toEqual({ + kind: 'durable_session_ref_resume', + sessionRef: { provider: 'codex', sessionId: 'thread-durable' }, + sessionId: 'thread-durable', + }) + expect(proofRollout).not.toHaveBeenCalled() + }) + + it('uses durable Codex durability state as a canonical restore sessionRef', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: { + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + state: 'durable', + durableThreadId: 'thread-durable', + }, + })).toEqual({ + kind: 'durable_session_ref_resume', + sessionRef: { provider: 'codex', sessionId: 'thread-durable' }, + sessionId: 'thread-durable', + }) + }) + + it('uses explicit sessionRef before durable Codex durability state', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: true, + sessionRef: { provider: 'codex', sessionId: 'thread-explicit' }, + codexDurability: durableDurability, + })).toEqual({ + kind: 'durable_session_ref_resume', + sessionRef: { provider: 'codex', sessionId: 'thread-explicit' }, + sessionId: 'thread-explicit', + }) + }) + + it('uses durable Codex durability state before candidate proof', async () => { + const proofRollout = vi.fn(async () => proofOk) + + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durableDurability, + proofRollout, + }) + + expect(decision).toEqual({ + kind: 'durable_session_ref_resume', + sessionRef: { provider: 'codex', sessionId: 'thread-durable' }, + sessionId: 'thread-durable', + }) + expect(proofRollout).not.toHaveBeenCalled() + }) + + it('rejects raw legacy resume ids even when durable Codex durability is present without sessionRef', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: true, + legacyResumeSessionId: 'thread-raw', + codexDurability: durableDurability, + })).toEqual({ + kind: 'reject_invalid_raw_codex_resume_request', + code: 'INVALID_MESSAGE', + message: INVALID_RAW_CODEX_RESUME_MESSAGE, + }) + }) + + it('plans candidate proof before a restored candidate can become durable', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durability, + })).toEqual({ + kind: 'proof_existing_candidate_first', + candidate, + }) + }) + + it('ignores captured Codex candidates for non-restore fresh creates', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: false, + codexDurability: durability, + })).toEqual({ + kind: 'fresh_codex_launch', + }) + }) + + it('ignores durable Codex durability state for non-restore fresh creates', () => { + expect(planCodexCreateRestoreDecision({ + restoreRequested: false, + codexDurability: durableDurability, + })).toEqual({ + kind: 'fresh_codex_launch', + }) + }) + + it('uses exact rollout proof as the durable session id and returns a matching live terminal when present', async () => { + const liveTerminal: CodexLiveRestoreTerminal = { + terminalId: 'term-live', + createdAt: 10, + codexDurability: durability, + } + + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durability, + proofRollout: async () => proofOk, + findLiveTerminalByCandidate: () => liveTerminal, + }) + + expect(decision).toEqual({ + kind: 'proof_succeeded_resume_durable', + candidate, + proof: proofOk, + sessionId: 'thread-1', + liveTerminal, + }) + }) + + it('attaches the exact live candidate when proof fails but the terminal still exists', async () => { + const liveTerminal: CodexLiveRestoreTerminal = { + terminalId: 'term-unproved-live', + createdAt: 10, + codexDurability: durability, + } + + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durability, + proofRollout: async () => proofMissing, + findLiveTerminalByCandidate: () => liveTerminal, + }) + + expect(decision).toEqual({ + kind: 'proof_failed_attach_live_candidate', + candidate, + proof: proofMissing, + liveTerminal, + }) + }) + + it('fresh-creates with a restore-failed marker when candidate proof fails and no exact live terminal exists', async () => { + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durability, + proofRollout: async () => proofMissing, + findLiveTerminalByCandidate: () => undefined, + }) + + expect(decision).toEqual({ + kind: 'proof_failed_fresh_create', + candidate, + proof: proofMissing, + clearCodexDurability: true, + restoreError: { + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }, + }) + }) + + it('does not accept a loose live terminal candidate returned by the caller', async () => { + const looseLiveTerminal: CodexLiveRestoreTerminal = { + terminalId: 'term-loose-live', + createdAt: 10, + codexDurability: { + ...durability, + candidate: { + ...candidate, + candidateThreadId: 'thread-other', + }, + }, + } + + const decision = await resolveCodexCreateRestoreDecision({ + restoreRequested: true, + codexDurability: durability, + proofRollout: async () => proofMissing, + findLiveTerminalByCandidate: () => looseLiveTerminal, + }) + + expect(decision).toEqual({ + kind: 'proof_failed_fresh_create', + candidate, + proof: proofMissing, + clearCodexDurability: true, + restoreError: { + code: 'RESTORE_UNAVAILABLE', + reason: 'durable_artifact_missing', + }, + }) + }) +}) diff --git a/test/unit/server/coding-cli/codex-app-server/runtime.test.ts b/test/unit/server/coding-cli/codex-app-server/runtime.test.ts index b2108bd4f..0986bbffe 100644 --- a/test/unit/server/coding-cli/codex-app-server/runtime.test.ts +++ b/test/unit/server/coding-cli/codex-app-server/runtime.test.ts @@ -204,6 +204,28 @@ describe('CodexAppServerRuntime', () => { expect(runtime.status()).toBe('running') }) + it('disables Codex apps while starting Freshell-managed app-server processes', async () => { + const tempDir = await makeTempDir() + const argLogPath = path.join(tempDir, 'argv.json') + const runtime = createRuntime({ + env: { + FAKE_CODEX_APP_SERVER_ARG_LOG: argLogPath, + }, + }) + + await runtime.ensureReady() + + const payload = JSON.parse(await fsp.readFile(argLogPath, 'utf8')) as { + argv: string[] + } + const args = payload.argv + + expect(args).toContain('-c') + expect(args).toContain('features.apps=false') + expect(args.indexOf('features.apps=false')).toBeLessThan(args.indexOf('app-server')) + expect(args).toContain('--listen') + }) + it('rejects before spawning on platforms without Linux /proc ownership support', async () => { const originalPlatform = Object.getOwnPropertyDescriptor(process, 'platform') if (!originalPlatform?.configurable) { @@ -363,6 +385,44 @@ describe('CodexAppServerRuntime', () => { } }) + it('keeps the same sidecar when wrapper identity is transiently incomplete', async () => { + const tempDir = await makeTempDir() + const metadataDir = path.join(tempDir, 'metadata') + const processGroups: number[] = [] + const seenProcessGroups = new Set<number>() + let identityReadAttempts = 0 + const runtime = createRuntime({ + metadataDir, + serverInstanceId: 'srv-runtime-test', + startupAttemptLimit: 1, + startupAttemptTimeoutMs: 500, + processIdentityReader: async (pid) => { + identityReadAttempts += 1 + if (identityReadAttempts === 1) { + return { commandLine: [], cwd: null, startTimeTicks: null } + } + return readWrapperIdentityForTest(pid) + }, + metadataWriter: async (filePath, metadata) => { + if (!seenProcessGroups.has(metadata.processGroupId)) { + seenProcessGroups.add(metadata.processGroupId) + processGroups.push(metadata.processGroupId) + } + await fsp.mkdir(path.dirname(filePath), { recursive: true }) + await fsp.writeFile(filePath, JSON.stringify(metadata), 'utf8') + }, + }) + + const ready = await runtime.ensureReady() + const record = JSON.parse(await fsp.readFile(ready.metadataPath, 'utf8')) + + expect(processGroups).toEqual([ready.processGroupId]) + expect(identityReadAttempts).toBe(2) + expect(record.wrapperIdentity.commandLine.length).toBeGreaterThan(0) + expect(record.wrapperIdentity.cwd).toEqual(expect.any(String)) + expect(record.wrapperIdentity.startTimeTicks).toEqual(expect.any(Number)) + }, 3_000) + it('tears down both the wrapper and native child in its process group', async () => { const metadataDir = await makeTempDir() const nativePidFile = path.join(metadataDir, 'native.pid') @@ -494,7 +554,7 @@ describe('CodexAppServerRuntime', () => { requestTimeoutMs: 1_000, processIdentityReader: async (pid) => { identityReadAttempts += 1 - if (identityReadAttempts === 1) return null + if (pid === processGroups[0]) return null return readWrapperIdentityForTest(pid) }, metadataWriter: async (filePath, metadata) => { @@ -514,7 +574,7 @@ describe('CodexAppServerRuntime', () => { const record = JSON.parse(await fsp.readFile(ready.metadataPath, 'utf8')) expect(processGroups).toHaveLength(2) - expect(identityReadAttempts).toBe(2) + expect(identityReadAttempts).toBeGreaterThan(2) expect(previousAttemptGoneBeforeRetry).toBe(true) expect(record.processGroupId).toBe(processGroups[1]) expect(record.wrapperIdentity.startTimeTicks).toEqual(expect.any(Number)) @@ -535,7 +595,7 @@ describe('CodexAppServerRuntime', () => { requestTimeoutMs: 1_000, processIdentityReader: async (pid) => { identityReadAttempts += 1 - if (identityReadAttempts === 1) { + if (pid === processGroups[0]) { return { commandLine: [], cwd: null, startTimeTicks: null } } return readWrapperIdentityForTest(pid) @@ -557,7 +617,7 @@ describe('CodexAppServerRuntime', () => { const record = JSON.parse(await fsp.readFile(ready.metadataPath, 'utf8')) expect(processGroups).toHaveLength(2) - expect(identityReadAttempts).toBe(2) + expect(identityReadAttempts).toBeGreaterThan(2) expect(previousAttemptGoneBeforeRetry).toBe(true) expect(record.processGroupId).toBe(processGroups[1]) expect(record.wrapperIdentity.commandLine.length).toBeGreaterThan(0) @@ -921,7 +981,46 @@ describe('CodexAppServerRuntime', () => { ignoredLegacyRecords: [], skippedActiveOwnershipIds: [], failedOwnershipIds: ['ownership-alpha', 'ownership-beta'], - })).toThrow(/startup reaper failed.*ownership-alpha.*ownership-beta/i) + })).toThrow(/startup reaper blocked startup.*failed to reap 2 ownership record.*ownership-alpha.*ownership-beta/i) + }) + + it('reports active live sidecar owners separately from failed cleanup', () => { + let thrown: Error | undefined + + try { + assertCodexStartupReaperSucceeded({ + reapedOwnershipIds: [], + ignoredLegacyRecords: [], + skippedActiveOwnershipIds: ['active-owner'], + failedOwnershipIds: [], + }) + } catch (error) { + thrown = error as Error + } + + expect(thrown).toBeDefined() + expect(thrown?.message).toContain('still owned by a live Freshell server/process') + expect(thrown?.message).toContain('active-owner') + expect(thrown?.message).not.toContain('failed to reap 1 ownership record(s): active-owner') + }) + + it('reports mixed active owners and failed reaps without conflating them', () => { + let thrown: Error | undefined + + try { + assertCodexStartupReaperSucceeded({ + reapedOwnershipIds: [], + ignoredLegacyRecords: [], + skippedActiveOwnershipIds: ['active-owner'], + failedOwnershipIds: ['failed-owner'], + }) + } catch (error) { + thrown = error as Error + } + + expect(thrown).toBeDefined() + expect(thrown?.message).toContain('failed to reap 1 ownership record(s): failed-owner') + expect(thrown?.message).toContain('still owned by a live Freshell server/process: active-owner') }) it('blocks startup when a new-schema ownership record is skipped because the owner pid is live', async () => { @@ -1215,6 +1314,51 @@ describe('CodexAppServerRuntime', () => { }) }) + it('re-emits turn notifications from the sidecar client', async () => { + const runtime = createRuntime({ + env: { + FAKE_CODEX_APP_SERVER_BEHAVIOR: JSON.stringify({ + notificationsAfterMethods: { + 'thread/loaded/list': [ + { + method: 'turn/started', + params: { threadId: 'thread-1', turnId: 'turn-1' }, + }, + { + method: 'turn/completed', + params: { threadId: 'thread-1', turnId: 'turn-1', status: 'completed' }, + }, + ], + }, + }), + }, + }) + const started: unknown[] = [] + const completed: unknown[] = [] + const unsubscribeStarted = runtime.onTurnStarted((event) => started.push(event)) + const unsubscribeCompleted = runtime.onTurnCompleted((event) => completed.push(event)) + + await runtime.listLoadedThreads() + await new Promise((resolve) => setTimeout(resolve, 25)) + unsubscribeStarted() + unsubscribeCompleted() + + expect(started).toEqual([ + { + threadId: 'thread-1', + turnId: 'turn-1', + params: { threadId: 'thread-1', turnId: 'turn-1' }, + }, + ]) + expect(completed).toEqual([ + { + threadId: 'thread-1', + turnId: 'turn-1', + params: { threadId: 'thread-1', turnId: 'turn-1', status: 'completed' }, + }, + ]) + }) + it('drops cached state after an unexpected child exit and starts a fresh process on the next call', async () => { const runtime = createRuntime() diff --git a/test/unit/server/mcp/freshell-tool.test.ts b/test/unit/server/mcp/freshell-tool.test.ts index d989ed28f..cbb962482 100644 --- a/test/unit/server/mcp/freshell-tool.test.ts +++ b/test/unit/server/mcp/freshell-tool.test.ts @@ -102,6 +102,44 @@ describe('executeAction -- tab actions', () => { expect(mockClient.post.mock.calls.at(-1)?.[1]).not.toHaveProperty('resumeSessionId') }) + it('new-tab rejects raw Codex resume ids', async () => { + mockClient.post.mockResolvedValue({ id: 't1' }) + + const result = await executeAction('new-tab', { + name: 'Codex', + mode: 'codex', + resume: 'thread-pre-durable', + }) + + expect(result).toEqual({ + error: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + hint: 'Use sessionRef: { provider: "codex", sessionId } after Codex identity is durable.', + }) + expect(mockClient.post).not.toHaveBeenCalled() + }) + + it('new-tab passes explicit canonical Codex sessionRef', async () => { + mockClient.post.mockResolvedValue({ id: 't1' }) + + await executeAction('new-tab', { + name: 'Codex', + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + }) + + expect(mockClient.post).toHaveBeenCalledWith('/api/tabs', expect.objectContaining({ + name: 'Codex', + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + })) + }) + it('list-tabs calls GET /api/tabs', async () => { mockClient.get.mockResolvedValue({ tabs: [] }) await executeAction('list-tabs') @@ -166,6 +204,55 @@ describe('executeAction -- pane actions', () => { ) }) + it('split-pane passes explicit canonical Codex sessionRef', async () => { + mockClient.get.mockImplementation((path: string) => { + if (path === '/api/tabs') return Promise.resolve({ tabs: [{ id: 't1', activePaneId: 'p1' }], activeTabId: 't1' }) + if (path.includes('/api/panes')) return Promise.resolve({ panes: [{ id: 'p1', index: 0, kind: 'terminal', terminalId: 'term-1' }] }) + return Promise.resolve({}) + }) + mockClient.post.mockResolvedValue({ ok: true }) + + await executeAction('split-pane', { + target: 'p1', + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + }) + + expect(mockClient.post).toHaveBeenCalledWith( + expect.stringContaining('/api/panes/p1/split'), + expect.objectContaining({ + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + }), + ) + }) + + it('split-pane rejects raw Codex resume ids', async () => { + mockClient.get.mockImplementation((path: string) => { + if (path === '/api/tabs') return Promise.resolve({ tabs: [{ id: 't1', activePaneId: 'p1' }], activeTabId: 't1' }) + if (path.includes('/api/panes')) return Promise.resolve({ panes: [{ id: 'p1', index: 0, kind: 'terminal', terminalId: 'term-1' }] }) + return Promise.resolve({}) + }) + + const result = await executeAction('split-pane', { + target: 'p1', + mode: 'codex', + resume: 'thread-pre-durable', + }) + + expect(result).toEqual({ + error: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + hint: 'Use sessionRef: { provider: "codex", sessionId } after Codex identity is durable.', + }) + expect(mockClient.post).not.toHaveBeenCalled() + }) + it('list-panes calls GET /api/panes', async () => { mockClient.get.mockResolvedValue({ panes: [] }) await executeAction('list-panes') @@ -244,6 +331,46 @@ describe('executeAction -- pane actions', () => { await executeAction('respawn-pane', { target: 'p1' }) expect(mockClient.post).toHaveBeenCalledWith(expect.stringContaining('/api/panes/p1/respawn'), expect.anything()) }) + + it('respawn-pane passes explicit canonical Codex sessionRef', async () => { + mockClient.post.mockResolvedValue({ ok: true }) + + await executeAction('respawn-pane', { + target: 'p1', + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + }) + + expect(mockClient.post).toHaveBeenCalledWith( + expect.stringContaining('/api/panes/p1/respawn'), + expect.objectContaining({ + mode: 'codex', + sessionRef: { + provider: 'codex', + sessionId: '019e180a-9e92-7b63-9189-edaec526ad1a', + }, + }), + ) + }) + + it('respawn-pane rejects raw Codex resume ids', async () => { + mockClient.post.mockResolvedValue({ ok: true }) + + const result = await executeAction('respawn-pane', { + target: 'p1', + mode: 'codex', + resume: 'thread-pre-durable', + }) + + expect(result).toEqual({ + error: 'Restore requires sessionRef; resumeSessionId is a legacy field and cannot be used as restore identity.', + hint: 'Use sessionRef: { provider: "codex", sessionId } after Codex identity is durable.', + }) + expect(mockClient.post).not.toHaveBeenCalled() + }) }) describe('executeAction -- terminal I/O', () => { @@ -982,6 +1109,25 @@ describe('executeAction -- new-tab with prompt sends keys', () => { ) }) + it('new-tab with a Codex prompt asks the server to wait for Codex identity capture', async () => { + mockClient.post.mockImplementation((path: string) => { + if (path === '/api/tabs') { + return Promise.resolve({ status: 'ok', data: { id: 't1', paneId: 'p-new' } }) + } + return Promise.resolve({ ok: true }) + }) + + await executeAction('new-tab', { name: 'Work', mode: 'codex', prompt: 'build the thing' }) + + expect(mockClient.post).toHaveBeenCalledWith( + '/api/panes/p-new/send-keys', + expect.objectContaining({ + data: 'build the thing\r', + waitForCodexIdentity: true, + }), + ) + }) + it('new-tab without prompt does not send keys', async () => { mockClient.post.mockResolvedValue({ status: 'ok', data: { id: 't1', paneId: 'p-new' } }) await executeAction('new-tab', { name: 'Work', mode: 'claude' }) diff --git a/test/unit/server/production-edge-cases.test.ts b/test/unit/server/production-edge-cases.test.ts index d75417d98..b6be14370 100644 --- a/test/unit/server/production-edge-cases.test.ts +++ b/test/unit/server/production-edge-cases.test.ts @@ -509,7 +509,7 @@ describe('TerminalRegistry Production Edge Cases', () => { registry = new TerminalRegistry() const result = registry.input('nonexistent-terminal-id', 'test') - expect(result).toBe(false) + expect(result).toEqual({ status: 'no_terminal' }) }) it('handles input to exited terminal', () => { @@ -522,7 +522,7 @@ describe('TerminalRegistry Production Edge Cases', () => { emitExit(0) const result = registry.input(record.terminalId, 'test') - expect(result).toBe(false) + expect(result).toEqual({ status: 'not_running' }) }) it('handles resize with extreme dimensions', () => { diff --git a/test/unit/server/terminal-lifecycle.test.ts b/test/unit/server/terminal-lifecycle.test.ts index 71db44d00..cd585caa6 100644 --- a/test/unit/server/terminal-lifecycle.test.ts +++ b/test/unit/server/terminal-lifecycle.test.ts @@ -664,7 +664,7 @@ describe('TerminalRegistry Lifecycle', () => { pty._emitExit(0) const result = registry.input(term.terminalId, 'some input') - expect(result).toBe(false) + expect(result).toEqual({ status: 'not_running' }) }) it('should not call pty.write on exited terminal', () => { @@ -679,7 +679,7 @@ describe('TerminalRegistry Lifecycle', () => { it('should return false for input to non-existent terminal', () => { const result = registry.input('non-existent-id', 'some input') - expect(result).toBe(false) + expect(result).toEqual({ status: 'no_terminal' }) }) it('should update lastActivityAt on successful input', () => { diff --git a/test/unit/server/terminal-registry.codex-recovery.test.ts b/test/unit/server/terminal-registry.codex-recovery.test.ts index c544d66dc..cbe72377e 100644 --- a/test/unit/server/terminal-registry.codex-recovery.test.ts +++ b/test/unit/server/terminal-registry.codex-recovery.test.ts @@ -1,8 +1,6 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import WebSocket from 'ws' import { TerminalRegistry } from '../../../server/terminal-registry.js' -import { TerminalStreamBroker } from '../../../server/terminal-stream/broker.js' -import type { CodexThreadLifecycleEvent } from '../../../server/coding-cli/codex-app-server/client.js' +import type { CodexLaunchSidecar } from '../../../server/coding-cli/codex-app-server/launch-planner.js' type MockPty = { onData: ReturnType<typeof vi.fn> @@ -57,70 +55,32 @@ function createMockPty(): MockPty { } } -async function lastPty(): Promise<MockPty> { - const pty = await import('node-pty') - return vi.mocked(pty.spawn).mock.results.at(-1)?.value as MockPty -} - async function spawnedPtys(): Promise<MockPty[]> { const pty = await import('node-pty') return vi.mocked(pty.spawn).mock.results.map((result) => result.value as MockPty) } -async function loggerWarnCalls(): Promise<Array<[Record<string, any>, string]>> { - const { logger } = await import('../../../server/logger.js') - return vi.mocked(logger.warn).mock.calls as Array<[Record<string, any>, string]> -} - -function createMockWs(connectionId: string) { - return { - bufferedAmount: 0, - readyState: WebSocket.OPEN, - send: vi.fn(), - close: vi.fn(), - connectionId, - } -} - -function sentPayloads(ws: ReturnType<typeof createMockWs>) { - return ws.send.mock.calls - .map(([raw]) => (typeof raw === 'string' ? JSON.parse(raw) : raw)) - .filter((payload): payload is Record<string, any> => !!payload && typeof payload === 'object') -} - -type MockSidecarAttachment = { - terminalId: string - onDurableSession: (sessionId: string) => void - onThreadLifecycle: (event: CodexThreadLifecycleEvent) => void - onFatal: (error: Error, source?: 'sidecar_fatal' | 'app_server_exit' | 'app_server_client_disconnect') => void +function deferred<T = void>() { + let resolve!: (value: T | PromiseLike<T>) => void + let reject!: (reason?: unknown) => void + const promise = new Promise<T>((res, rej) => { + resolve = res + reject = rej + }) + return { promise, resolve, reject } } -function createMockSidecar(options: { onAttach?: (attachment: MockSidecarAttachment) => void } = {}) { - let attachment: MockSidecarAttachment | undefined +function createFakeSidecar(options: { + shutdown?: CodexLaunchSidecar['shutdown'] +} = {}): CodexLaunchSidecar { return { - api: { - attachTerminal: vi.fn((next: MockSidecarAttachment) => { - attachment = next - options.onAttach?.(next) - }), - shutdown: vi.fn().mockResolvedValue(undefined), - }, - emitDurableSession(sessionId: string) { - attachment?.onDurableSession(sessionId) - }, - emitLifecycle(event: CodexThreadLifecycleEvent) { - attachment?.onThreadLifecycle(event) - }, - emitFatal( - error = new Error('fake sidecar fatal'), - source: 'sidecar_fatal' | 'app_server_exit' | 'app_server_client_disconnect' = 'sidecar_fatal', - ) { - attachment?.onFatal(error, source) - }, + adopt: vi.fn().mockResolvedValue(undefined), + shutdown: vi.fn(options.shutdown ?? (async () => undefined)), + onLifecycleLoss: vi.fn(() => vi.fn()), } } -describe('TerminalRegistry Codex recovery generation guards', () => { +describe('TerminalRegistry Codex durable recovery', () => { let registry: TerminalRegistry beforeEach(async () => { @@ -135,985 +95,151 @@ describe('TerminalRegistry Codex recovery generation guards', () => { vi.useRealTimers() }) - it('ignores stale generation PTY data and exit without mutating stable output or final state', async () => { - const record = registry.create({ mode: 'codex', cwd: '/repo' }) - const mockPty = await lastPty() - const onData = mockPty.onData.mock.calls[0][0] - const onExit = mockPty.onExit.mock.calls[0][0] - record.codex!.workerGeneration = 2 - - onData('stale output') - onExit({ exitCode: 9, signal: 0 }) - - expect(record.buffer.snapshot()).toBe('') - expect(record.status).toBe('running') - }) - - it('ignores recovery-retire generation output and exit', async () => { - const record = registry.create({ mode: 'codex', cwd: '/repo' }) - const mockPty = await lastPty() - const onData = mockPty.onData.mock.calls[0][0] - const onExit = mockPty.onExit.mock.calls[0][0] - record.codex!.retiringGenerations.add(1) - record.codex!.closeReasonByGeneration.set(1, 'recovery_retire') - - onData('retired output') - onExit({ exitCode: 9, signal: 0 }) - - expect(record.buffer.snapshot()).toBe('') - expect(record.status).toBe('running') - }) - - it('treats explicit user final close as final and emits terminal.exit', async () => { - const exited = vi.fn() - registry.on('terminal.exit', exited) - const record = registry.create({ mode: 'codex', cwd: '/repo' }) - - registry.kill(record.terminalId) - - expect(record.codex!.closeReasonByGeneration.get(1)).toBe('user_final_close') - expect(record.status).toBe('exited') - expect(exited).toHaveBeenCalledWith({ terminalId: record.terminalId, exitCode: 0 }) - }) - - it('treats in-TUI PTY exit for a durable Codex session as recoverable, not final', async () => { - const exited = vi.fn() - registry.on('terminal.exit', exited) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - const mockPty = await lastPty() - const onExit = mockPty.onExit.mock.calls[0][0] - - onExit({ exitCode: 0, signal: 0 }) - - expect(record.status).toBe('running') - expect(record.codex!.recoveryState).toBe('recovering_durable') - expect(record.codex!.durableSessionId).toBe('thread-durable-1') - expect(exited).not.toHaveBeenCalled() - }) - - it('initializes durable Codex state from an explicit resume session id', () => { - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - - expect(record.codex?.durableSessionId).toBe('thread-durable-1') - expect(record.codex?.recoveryState).toBe('running_durable') - expect(record.resumeSessionId).toBe('thread-durable-1') - }) - - it('keeps non-Codex PTY exit final', async () => { + it('recovers a durable Codex terminal when the visible PTY exits unexpectedly', async () => { const exited = vi.fn() registry.on('terminal.exit', exited) - const record = registry.create({ mode: 'shell', cwd: '/repo' }) - const mockPty = await lastPty() - const onExit = mockPty.onExit.mock.calls[0][0] - - onExit({ exitCode: 3, signal: 0 }) - - expect(record.status).toBe('exited') - expect(exited).toHaveBeenCalledWith({ terminalId: record.terminalId, exitCode: 3 }) - }) - - it('replaces a durable Codex worker bundle after PTY exit without finalizing the terminal', async () => { - const exited = vi.fn() - const status = vi.fn() - registry.on('terminal.exit', exited) - registry.on('terminal.status', status) - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ + const currentSidecar = createFakeSidecar() + const replacementSidecar = createFakeSidecar() + const planCreate = vi.fn(async () => ({ sessionId: 'thread-durable-1', remote: { wsUrl: 'ws://127.0.0.1:46002/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - providerSettings: { - codexAppServer: { wsUrl: 'ws://127.0.0.1:46001/' }, - model: 'codex-test', - }, - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - envContext: { tabId: 'tab-1', paneId: 'pane-1' }, - }) - const oldPty = await lastPty() - const onExit = oldPty.onExit.mock.calls[0][0] - - onExit({ exitCode: 0, signal: 0 }) - - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(1)) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const allPtys = await spawnedPtys() - const replacementPty = allPtys.at(-1)! - const replacementSpawnArgs = (await import('node-pty')).spawn.mock.calls.at(-1)?.[1] as string[] - - expect(record.status).toBe('running') - expect(record.terminalId).toBeDefined() - expect(record.codex?.durableSessionId).toBe('thread-durable-1') - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(record.codex?.retiringGenerations.has(1)).toBe(true) - expect(initialSidecar.api.shutdown).toHaveBeenCalledTimes(1) - expect(oldPty.kill).toHaveBeenCalledTimes(1) - expect(record.pty).toBe(replacementPty) - expect(launchFactory).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - envContext: { tabId: 'tab-1', paneId: 'pane-1' }, - providerSettings: expect.objectContaining({ model: 'codex-test' }), - })) - expect(replacementSpawnArgs).toEqual(expect.arrayContaining([ - '--remote', - 'ws://127.0.0.1:46002/', - 'resume', - 'thread-durable-1', - ])) - expect(status).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - status: 'recovering', - attempt: 1, + sidecar: replacementSidecar, })) - expect(exited).not.toHaveBeenCalled() - }) - - it('coalesces duplicate current-generation failure signals into one replacement attempt', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46003/' }, - sidecar: replacementSidecar.api, - }) - registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - - initialSidecar.emitFatal() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - oldPty.onData.mock.calls[0][0]('late retired output') - - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(1)) - }) - - it('flushes recovery-buffered input only after current-generation durable readiness proof', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46004/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - expect(record.pty).toBe(replacementPty) - expect(record.codex?.recoveryState).toBe('recovering_durable') - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - replacementPty.onData.mock.calls[0][0]('process output before proof') - expect(oldPty.write).not.toHaveBeenCalledWith('abc') - expect(replacementPty.write).not.toHaveBeenCalledWith('abc') - - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-1', - path: '/tmp/rollout-thread-durable-1.jsonl', - ephemeral: false, - }, - }) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.write).toHaveBeenCalledWith('abc') - }) - - it('logs recovery transition context with websocket URLs and process identifiers when known', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46028/', processPid: 45678 }, - sidecar: replacementSidecar.api, - }) const record = registry.create({ mode: 'codex', cwd: '/repo', resumeSessionId: 'thread-durable-1', providerSettings: { - codexAppServer: { wsUrl: 'ws://127.0.0.1:46027/' }, - }, - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, + codexAppServer: { + wsUrl: 'ws://127.0.0.1:46001/', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, }) - const oldPty = await lastPty() + const [oldPty] = await spawnedPtys() oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-1', - path: '/tmp/rollout-thread-durable-1.jsonl', - ephemeral: false, - }, - }) - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - const warns = await loggerWarnCalls() - const started = warns.find(([, message]) => message === 'codex_recovery_started')?.[0] - const ready = warns.find(([, message]) => message === 'codex_recovery_ready')?.[0] + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: record.terminalId, generation: 1 })) + await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1)) + const [, replacementPty] = await spawnedPtys() - expect(started).toEqual(expect.objectContaining({ + expect(registry.get(record.terminalId)?.status).toBe('running') + expect(registry.get(record.terminalId)?.pty).toBe(replacementPty) + expect(planCreate).toHaveBeenCalledWith(expect.objectContaining({ terminalId: record.terminalId, - oldWsUrl: 'ws://127.0.0.1:46027/', - oldPtyPid: 12345, - source: 'pty_exit', + resumeSessionId: 'thread-durable-1', generation: 1, - candidateGeneration: 2, - attempt: 1, - hasDurableSession: true, - })) - expect(ready).toEqual(expect.objectContaining({ - terminalId: record.terminalId, - oldWsUrl: 'ws://127.0.0.1:46027/', - newWsUrl: 'ws://127.0.0.1:46028/', - oldPtyPid: 12345, - newPtyPid: 12345, - newAppServerPid: 45678, - generation: 2, - attempt: 1, - hasDurableSession: true, })) + expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: record.terminalId, generation: 1 }) + expect(oldPty.kill).toHaveBeenCalledTimes(1) + expect(exited).not.toHaveBeenCalled() }) - it('applies latest resize to durable replacement PTY before flushing buffered input', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46026/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - cols: 80, - rows: 24, - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - expect(registry.resize(record.terminalId, 132, 41)).toBe(true) - expect(record.cols).toBe(132) - expect(record.rows).toBe(41) - expect(replacementPty.write).not.toHaveBeenCalledWith('abc') - - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-1', - path: '/tmp/rollout-thread-durable-1.jsonl', - ephemeral: false, - }, - }) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.resize).toHaveBeenCalledWith(132, 41) - expect(replacementPty.write).toHaveBeenCalledWith('abc') - expect(replacementPty.resize.mock.invocationCallOrder[0]) - .toBeLessThan(replacementPty.write.mock.invocationCallOrder[0]) - }) - - it('fails a published durable replacement candidate immediately when its PTY exits before readiness', async () => { - const initialSidecar = createMockSidecar() - const firstReplacementSidecar = createMockSidecar() - const secondReplacementSidecar = createMockSidecar() - const launchFactory = vi.fn() - .mockResolvedValueOnce({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46020/' }, - sidecar: firstReplacementSidecar.api, - }) - .mockResolvedValueOnce({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46021/' }, - sidecar: secondReplacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const firstReplacementPty = await lastPty() - - firstReplacementPty.onExit.mock.calls[0][0]({ exitCode: 2, signal: 0 }) - - await vi.waitFor(() => expect(record.codex?.activeReplacement?.attempt).toBe(2), 600) - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(2), 600) - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(firstReplacementSidecar.api.shutdown).toHaveBeenCalledTimes(1) - expect(firstReplacementPty.kill).toHaveBeenCalledTimes(1) - }) - - it('fails a published durable replacement candidate immediately when fatal PTY output arrives before readiness', async () => { - const initialSidecar = createMockSidecar() - const firstReplacementSidecar = createMockSidecar() - const secondReplacementSidecar = createMockSidecar() - const launchFactory = vi.fn() - .mockResolvedValueOnce({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46022/' }, - sidecar: firstReplacementSidecar.api, - }) - .mockResolvedValueOnce({ + it('blocks input during durable recovery and sends later input only to the replacement PTY', async () => { + const planReady = deferred() + const currentSidecar = createFakeSidecar() + const replacementSidecar = createFakeSidecar() + const planCreate = vi.fn(async () => { + await planReady.promise + return { sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46023/' }, - sidecar: secondReplacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const firstReplacementPty = await lastPty() - - firstReplacementPty.onData.mock.calls[0][0]( - 'ERROR: remote app server at `ws://127.0.0.1:46022/` transport failed: WebSocket protocol error: Connection reset without closing handshake', - ) - - await vi.waitFor(() => expect(record.codex?.activeReplacement?.attempt).toBe(2), 600) - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(2), 600) - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(record.buffer.snapshot()).toContain('Connection reset without closing handshake') - expect(firstReplacementSidecar.api.shutdown).toHaveBeenCalledTimes(1) - }) - - it('does not let a dead pre-durable replacement candidate pass the stability window', async () => { - const initialSidecar = createMockSidecar() - const firstReplacementSidecar = createMockSidecar() - const secondReplacementSidecar = createMockSidecar() - const launchFactory = vi.fn() - .mockResolvedValueOnce({ - remote: { wsUrl: 'ws://127.0.0.1:46024/' }, - sidecar: firstReplacementSidecar.api, - }) - .mockResolvedValueOnce({ - remote: { wsUrl: 'ws://127.0.0.1:46025/' }, - sidecar: secondReplacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const firstReplacementPty = await lastPty() - expect(registry.input(record.terminalId, 'pre-dead')).toBe(true) - - firstReplacementPty.onExit.mock.calls[0][0]({ exitCode: 2, signal: 0 }) - - await new Promise((resolve) => setTimeout(resolve, 1_650)) - expect(record.codex?.recoveryState).toBe('recovering_pre_durable') - expect(firstReplacementPty.write).not.toHaveBeenCalledWith('pre-dead') - expect(launchFactory).toHaveBeenCalledTimes(2) - }) - - it('does not accept a current-generation non-ready durable status change as recovery readiness proof', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46014/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - replacementSidecar.emitLifecycle({ - kind: 'thread_status_changed', - threadId: 'thread-durable-1', - status: { type: 'active' }, - }) - - await Promise.resolve() - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(replacementPty.write).not.toHaveBeenCalledWith('abc') - }) - - it('accepts current-generation durable idle status as recovery readiness proof', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46016/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - replacementSidecar.emitLifecycle({ - kind: 'thread_status_changed', - threadId: 'thread-durable-1', - status: { type: 'idle' }, - }) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.write).toHaveBeenCalledWith('abc') - }) - - it('still accepts current-generation durable thread-started as recovery readiness proof', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46017/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-1', - path: '/tmp/rollout-thread-durable-1.jsonl', - ephemeral: false, - }, - }) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.write).toHaveBeenCalledWith('abc') - }) - - it('buffers input while durable Codex recovery is active', async () => { - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - const pty = await lastPty() - record.codex!.recoveryState = 'recovering_durable' - - expect(registry.input(record.terminalId, 'abc')).toBe(true) - - expect(pty.write).not.toHaveBeenCalledWith('abc') - }) - - it('handles recovery input overflow locally so ws-handler does not see an invalid terminal', async () => { - const output = vi.fn() - registry.on('terminal.output.raw', output) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - const pty = await lastPty() - record.codex!.recoveryState = 'recovering_durable' - - expect(registry.input(record.terminalId, 'x'.repeat(8 * 1024))).toBe(true) - expect(registry.input(record.terminalId, 'y')).toBe(true) - - expect(pty.write).not.toHaveBeenCalled() - expect(record.buffer.snapshot()).toContain('Codex is reconnecting; input was not sent') - expect(output).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - data: expect.stringContaining('Codex is reconnecting; input was not sent'), - })) - }) - - it('queues local recovery diagnostics behind a pending attach snapshot', async () => { - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - record.codex!.recoveryState = 'recovering_durable' - const client = createMockWs('pending-local-diagnostic') - - expect(registry.attach(record.terminalId, client as any, { pendingSnapshot: true })).toBe(record) - expect(registry.input(record.terminalId, 'x'.repeat(8 * 1024))).toBe(true) - expect(registry.input(record.terminalId, 'y')).toBe(true) - - expect(sentPayloads(client).some((payload) => - payload.type === 'terminal.output' - && payload.terminalId === record.terminalId - && String(payload.data).includes('Codex is reconnecting; input was not sent'), - )).toBe(false) - - registry.finishAttachSnapshot(record.terminalId, client as any) - - expect(sentPayloads(client).some((payload) => - payload.type === 'terminal.output' - && payload.terminalId === record.terminalId - && String(payload.data).includes('Codex is reconnecting; input was not sent'), - )).toBe(true) - }) - - it('handles recovery input expiry locally so ws-handler does not see an invalid terminal', async () => { - vi.useFakeTimers() - const output = vi.fn() - registry.on('terminal.output.raw', output) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - const pty = await lastPty() - record.codex!.recoveryState = 'recovering_durable' - - expect(registry.input(record.terminalId, 'first')).toBe(true) - vi.advanceTimersByTime(10_001) - expect(registry.input(record.terminalId, 'second')).toBe(true) - - expect(pty.write).not.toHaveBeenCalled() - expect(record.buffer.snapshot()).toContain('Codex is reconnecting; input was not sent') - expect(output).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - data: expect.stringContaining('Codex is reconnecting; input was not sent'), - })) - }) - - it('expires recovery-buffered input on the ttl even when no later input or readiness arrives', async () => { - vi.useFakeTimers() - const output = vi.fn() - registry.on('terminal.output.raw', output) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - const pty = await lastPty() - record.codex!.recoveryState = 'recovering_durable' - - expect(registry.input(record.terminalId, 'first')).toBe(true) - vi.advanceTimersByTime(10_001) - - expect(pty.write).not.toHaveBeenCalled() - expect(record.buffer.snapshot()).toContain('Codex is reconnecting; input was not sent') - expect(output).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - data: expect.stringContaining('Codex is reconnecting; input was not sent'), - })) - }) - - it('reports expired buffered input through local output when durable recovery becomes ready', async () => { - vi.useFakeTimers() - const output = vi.fn() - registry.on('terminal.output.raw', output) - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - sessionId: 'thread-durable-1', - remote: { wsUrl: 'ws://127.0.0.1:46027/' }, - sidecar: replacementSidecar.api, + remote: { wsUrl: 'ws://127.0.0.1:46003/' }, + sidecar: replacementSidecar, + } }) const record = registry.create({ mode: 'codex', cwd: '/repo', resumeSessionId: 'thread-durable-1', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - - expect(registry.input(record.terminalId, 'too-late')).toBe(true) - vi.advanceTimersByTime(10_001) - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-1', - path: '/tmp/rollout-thread-durable-1.jsonl', - ephemeral: false, - }, + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:46001/', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, }) + const [oldPty] = await spawnedPtys() - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.write).not.toHaveBeenCalledWith('too-late') - expect(record.buffer.snapshot()).toContain('Codex is reconnecting; input was not sent') - expect(output).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - data: expect.stringContaining('Codex is reconnecting; input was not sent'), - })) - }) - - it('replays local recovery diagnostics through the terminal stream broker after detach and reattach', async () => { - const broker = new TerminalStreamBroker(registry, vi.fn()) - try { - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - }) - record.codex!.recoveryState = 'recovering_durable' - - const liveWs = createMockWs('live-recovery-diagnostic') - await broker.attach(liveWs as any, record.terminalId, 'viewport_hydrate', 120, 40, 0, 'live-attach') - expect(registry.input(record.terminalId, 'x'.repeat(8 * 1024))).toBe(true) - expect(registry.input(record.terminalId, 'y')).toBe(true) - await new Promise((resolve) => setTimeout(resolve, 5)) - - expect(sentPayloads(liveWs).some((payload) => - payload.type === 'terminal.output' - && payload.terminalId === record.terminalId - && String(payload.data).includes('Codex is reconnecting; input was not sent'), - )).toBe(true) - - broker.detach(record.terminalId, liveWs as any) - const replayWs = createMockWs('replay-recovery-diagnostic') - await broker.attach(replayWs as any, record.terminalId, 'transport_reconnect', 120, 40, 0, 'replay-attach') - - const replayed = sentPayloads(replayWs) - expect(replayed.some((payload) => - payload.type === 'terminal.attach.ready' - && payload.terminalId === record.terminalId - && payload.attachRequestId === 'replay-attach', - )).toBe(true) - expect(replayed.some((payload) => - payload.type === 'terminal.output' - && payload.terminalId === record.terminalId - && payload.attachRequestId === 'replay-attach' - && String(payload.data).includes('Codex is reconnecting; input was not sent'), - )).toBe(true) - } finally { - broker.close() - } - }) - - it('makes pre-durable recovery live only after the attach-stability window and then flushes input', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const status = vi.fn() - registry.on('terminal.status', status) - const launchFactory = vi.fn().mockResolvedValue({ - remote: { wsUrl: 'ws://127.0.0.1:46005/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) + await vi.waitFor(() => expect(planCreate).toHaveBeenCalledTimes(1)) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - expect(record.codex?.recoveryState).toBe('recovering_pre_durable') - expect(registry.input(record.terminalId, 'pre')).toBe(true) - expect(replacementPty.write).not.toHaveBeenCalledWith('pre') - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_live_only'), 2_000) - expect(replacementPty.write).toHaveBeenCalledWith('pre') - expect(status).toHaveBeenCalledWith(expect.objectContaining({ + expect(registry.input(record.terminalId, 'during recovery')).toEqual({ + status: 'blocked_codex_recovery_pending', terminalId: record.terminalId, - status: 'running', - })) - }) - - it('cancels pre-durable stability when durable promotion arrives before the window elapses', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar() - const launchFactory = vi.fn().mockResolvedValue({ - remote: { wsUrl: 'ws://127.0.0.1:46015/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.waitFor(() => expect(record.codex?.workerGeneration).toBe(2)) - const replacementPty = await lastPty() - expect(registry.input(record.terminalId, 'late-durable')).toBe(true) + expect(oldPty.write).not.toHaveBeenCalledWith('during recovery') - replacementSidecar.emitDurableSession('thread-durable-late') - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('recovering_durable')) + planReady.resolve() + await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1)) + const [, replacementPty] = await spawnedPtys() - await new Promise((resolve) => setTimeout(resolve, 1_600)) - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(replacementPty.write).not.toHaveBeenCalledWith('late-durable') - - replacementSidecar.emitLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-durable-late', - path: '/tmp/rollout-thread-durable-late.jsonl', - ephemeral: false, - }, - }) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable')) - expect(replacementPty.write).toHaveBeenCalledWith('late-durable') + expect(registry.input(record.terminalId, 'after recovery')).toEqual({ status: 'written' }) + expect(oldPty.write).not.toHaveBeenCalledWith('after recovery') + expect(replacementPty.write).toHaveBeenCalledWith('after recovery') }) - it('latches fast candidate readiness before unpublished durable identity is replayed', async () => { - const initialSidecar = createMockSidecar() - const replacementSidecar = createMockSidecar({ - onAttach: (attachment) => { - attachment.onThreadLifecycle({ - kind: 'thread_started', - thread: { - id: 'thread-fast-candidate', - path: '/tmp/rollout-thread-fast-candidate.jsonl', - ephemeral: false, - }, - }) - attachment.onDurableSession('thread-fast-candidate') - }, - }) - const launchFactory = vi.fn().mockResolvedValue({ - remote: { wsUrl: 'ws://127.0.0.1:46029/' }, - sidecar: replacementSidecar.api, - }) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - codexSidecar: initialSidecar.api, - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - expect(registry.input(record.terminalId, 'fast')).toBe(true) - - await vi.waitFor(() => expect(record.codex?.recoveryState).toBe('running_durable'), 600) - const replacementPty = await lastPty() - expect(record.codex?.durableSessionId).toBe('thread-fast-candidate') - expect(replacementPty.write).toHaveBeenCalledWith('fast') - }) - - it('keeps retrying durable Codex resume after repeated replacement launch failures', async () => { - vi.useFakeTimers() + it('keeps non-durable Codex PTY exit final', async () => { const exited = vi.fn() - const status = vi.fn() registry.on('terminal.exit', exited) - registry.on('terminal.status', status) - const launchFactory = vi.fn().mockRejectedValue(new Error('replacement launch unavailable')) - const record = registry.create({ - mode: 'codex', - cwd: '/repo', - resumeSessionId: 'thread-durable-1', - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) + const record = registry.create({ mode: 'codex', cwd: '/repo' }) + const [pty] = await spawnedPtys() - for (let index = 0; index < 16; index += 1) { - await vi.runOnlyPendingTimersAsync() - await Promise.resolve() - } + pty.onExit.mock.calls[0][0]({ exitCode: 2, signal: 0 }) - expect(launchFactory.mock.calls.length).toBeGreaterThan(5) - expect(record.status).toBe('running') - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(exited).not.toHaveBeenCalled() - expect(status).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - status: 'recovering', - })) - expect(status).not.toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - status: 'recovery_failed', - })) - expect(launchFactory.mock.calls.every(([input]) => input.resumeSessionId === 'thread-durable-1')).toBe(true) + expect(registry.get(record.terminalId)?.status).toBe('exited') + expect(exited).toHaveBeenCalledWith({ terminalId: record.terminalId, exitCode: 2 }) }) - it('retires the failed worker and schedules another durable resume attempt after many failures', async () => { - vi.useFakeTimers() - const sidecar = createMockSidecar() - const status = vi.fn() - registry.on('terminal.status', status) - const launchFactory = vi.fn().mockRejectedValue(new Error('still unavailable')) + it('does not start durable recovery for an explicit user close', async () => { + const currentSidecar = createFakeSidecar() + const planCreate = vi.fn() const record = registry.create({ mode: 'codex', cwd: '/repo', resumeSessionId: 'thread-durable-1', - codexSidecar: sidecar.api, - codexLaunchFactory: launchFactory, + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:46001/', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, }) - const failedPty = await lastPty() - - for (let index = 0; index < 5; index += 1) { - record.codex!.recoveryPolicy.nextAttempt() - } - failedPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - await vi.runOnlyPendingTimersAsync() - await Promise.resolve() + registry.kill(record.terminalId) - expect(record.codex?.retiringGenerations.has(1)).toBe(true) - expect(record.codex?.closeReasonByGeneration.get(1)).toBe('recovery_retire') - expect(sidecar.api.shutdown).toHaveBeenCalledTimes(1) - expect(failedPty.kill).toHaveBeenCalledTimes(1) - expect(record.codex?.recoveryState).toBe('recovering_durable') - expect(status).not.toHaveBeenCalledWith(expect.objectContaining({ status: 'recovery_failed' })) - expect(launchFactory).toHaveBeenCalled() + expect(planCreate).not.toHaveBeenCalled() + expect(registry.get(record.terminalId)?.status).toBe('exited') + await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1)) }) - it('does not commit durable identity from a failed unpublished replacement candidate', async () => { - const status = vi.fn() - registry.on('terminal.status', status) - const pty = await import('node-pty') - let spawnCount = 0 - vi.mocked(pty.spawn).mockImplementation(() => { - spawnCount += 1 - if (spawnCount === 2) { - throw new Error('candidate spawn failed') - } - return createMockPty() as any - }) - - const firstReplacementSidecar = createMockSidecar({ - onAttach: (attachment) => { - attachment.onDurableSession('failed-unpublished-session') - }, - }) - const secondReplacementSidecar = createMockSidecar() - const launchFactory = vi.fn() - .mockResolvedValueOnce({ - remote: { wsUrl: 'ws://127.0.0.1:46018/' }, - sidecar: firstReplacementSidecar.api, - }) - .mockResolvedValueOnce({ - remote: { wsUrl: 'ws://127.0.0.1:46019/' }, - sidecar: secondReplacementSidecar.api, - }) - + it('runs normal PTY-exit cleanup when durable recovery is already blocked', async () => { + const exited = vi.fn() + registry.on('terminal.exit', exited) + const currentSidecar = createFakeSidecar() + const planCreate = vi.fn() const record = registry.create({ - mode: 'codex', - cwd: '/repo', - codexLaunchFactory: launchFactory, - }) - const oldPty = await lastPty() - oldPty.onExit.mock.calls[0][0]({ exitCode: 1, signal: 0 }) - - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(1)) - await vi.waitFor(() => expect(record.codex?.activeReplacement?.attempt).toBe(2)) - expect(status).toHaveBeenCalledWith(expect.objectContaining({ - terminalId: record.terminalId, - status: 'recovering', - reason: 'replacement_spawn_failure', - attempt: 2, - })) - - expect(record.codex?.durableSessionId).toBeUndefined() - expect(launchFactory.mock.calls[0]?.[0]).toEqual(expect.objectContaining({ - resumeSessionId: undefined, - })) - - await new Promise((resolve) => setTimeout(resolve, 300)) - await vi.waitFor(() => expect(launchFactory).toHaveBeenCalledTimes(2)) - - expect(launchFactory.mock.calls[1]?.[0]).toEqual(expect.objectContaining({ - resumeSessionId: undefined, - })) - }) - - it('does not idle-kill detached Codex recovery states but still kills ordinary detached terminals', async () => { - const settings = { - safety: { autoKillIdleMinutes: 1 }, - terminal: {}, - } as any - registry.shutdown() - registry = new TerminalRegistry(settings, 10) - - const recoveringPreDurable = registry.create({ mode: 'codex', cwd: '/repo' }) - recoveringPreDurable.codex!.recoveryState = 'recovering_pre_durable' - recoveringPreDurable.lastActivityAt = Date.now() - 120_000 - - const recoveringDurable = registry.create({ mode: 'codex', cwd: '/repo', resumeSessionId: 'thread-durable-1', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:46001/', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, }) - recoveringDurable.codex!.recoveryState = 'recovering_durable' - recoveringDurable.lastActivityAt = Date.now() - 120_000 - - const shell = registry.create({ mode: 'shell', cwd: '/repo' }) - shell.lastActivityAt = Date.now() - 120_000 + const [pty] = await spawnedPtys() + record.codexRecoveryBlockedError = new Error('previous teardown failed') - await registry.enforceIdleKillsForTest() + pty.onExit.mock.calls[0][0]({ exitCode: 9, signal: 0 }) - expect(recoveringPreDurable.status).toBe('running') - expect(recoveringDurable.status).toBe('running') - expect(shell.status).toBe('exited') + expect(planCreate).not.toHaveBeenCalled() + expect(registry.get(record.terminalId)?.status).toBe('exited') + expect(exited).toHaveBeenCalledWith({ terminalId: record.terminalId, exitCode: 9 }) }) }) diff --git a/test/unit/server/terminal-registry.codex-sidecar.test.ts b/test/unit/server/terminal-registry.codex-sidecar.test.ts index 0597f8a89..98a4cde3f 100644 --- a/test/unit/server/terminal-registry.codex-sidecar.test.ts +++ b/test/unit/server/terminal-registry.codex-sidecar.test.ts @@ -1,5 +1,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { EventEmitter } from 'node:events' +import fsp from 'node:fs/promises' +import os from 'node:os' +import path from 'node:path' const mockPtyProcess = vi.hoisted(() => { const createMockPty = () => { @@ -52,11 +55,13 @@ vi.mock('../../../server/logger', () => { child: vi.fn(), } logger.child.mockReturnValue(logger) - return { logger } + return { logger, sessionLifecycleLogger: logger } }) import { TerminalRegistry } from '../../../server/terminal-registry.js' +import { CodexDurabilityStore } from '../../../server/coding-cli/codex-app-server/durability-store.js' import { logger } from '../../../server/logger.js' +import { CODEX_DURABILITY_SCHEMA_VERSION } from '../../../shared/codex-durability.js' function deferred<T = void>() { let resolve!: (value: T | PromiseLike<T>) => void @@ -69,19 +74,70 @@ function deferred<T = void>() { } function createFakeSidecar(options: { - waitForLoadedThread?: () => Promise<void> + adopt?: () => Promise<void> shutdown?: () => Promise<void> } = {}) { const lifecycleLossHandlers = new Set<(event: unknown) => void>() + const candidateHandlers = new Set<(event: any) => void>() + const turnStartedHandlers = new Set<(event: any) => void>() + const turnCompletedHandlers = new Set<(event: any) => void>() + const repairHandlers = new Set<(event: any) => void>() + const fsChangedHandlers = new Set<(event: any) => void>() return { - adopt: vi.fn(async () => undefined), - listLoadedThreads: vi.fn(async () => ['thread-1']), - waitForLoadedThread: vi.fn(options.waitForLoadedThread ?? (async () => undefined)), + adopt: vi.fn(options.adopt ?? (async () => undefined)), shutdown: vi.fn(options.shutdown ?? (async () => undefined)), + markCandidatePersisted: vi.fn(), + watchPath: vi.fn(async (targetPath: string) => ({ path: targetPath })), + unwatchPath: vi.fn(async () => undefined), + onCandidate: vi.fn((handler: (event: any) => void) => { + candidateHandlers.add(handler) + return () => candidateHandlers.delete(handler) + }), + onTurnStarted: vi.fn((handler: (event: any) => void) => { + turnStartedHandlers.add(handler) + return () => turnStartedHandlers.delete(handler) + }), + onTurnCompleted: vi.fn((handler: (event: any) => void) => { + turnCompletedHandlers.add(handler) + return () => turnCompletedHandlers.delete(handler) + }), + onRepairTrigger: vi.fn((handler: (event: any) => void) => { + repairHandlers.add(handler) + return () => repairHandlers.delete(handler) + }), + onFsChanged: vi.fn((handler: (event: any) => void) => { + fsChangedHandlers.add(handler) + return () => fsChangedHandlers.delete(handler) + }), onLifecycleLoss: vi.fn((handler: (event: unknown) => void) => { lifecycleLossHandlers.add(handler) return () => lifecycleLossHandlers.delete(handler) }), + emitCandidate(event: any) { + for (const handler of candidateHandlers) { + handler(event) + } + }, + emitTurnStarted(event: any) { + for (const handler of turnStartedHandlers) { + handler(event) + } + }, + emitTurnCompleted(event: any) { + for (const handler of turnCompletedHandlers) { + handler(event) + } + }, + emitRepairTrigger(event: any) { + for (const handler of repairHandlers) { + handler(event) + } + }, + emitFsChanged(event: any) { + for (const handler of fsChangedHandlers) { + handler(event) + } + }, emitLifecycleLoss(event: unknown) { for (const handler of lifecycleLossHandlers) { handler(event) @@ -96,6 +152,1000 @@ describe('TerminalRegistry Codex sidecar ownership', () => { vi.clearAllMocks() }) + it('persists Codex restore identity server-side before releasing fresh terminal input', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + envContext: { tabId: 'tab-1', paneId: 'pane-1' }, + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_pending', + terminalId: term.terminalId, + }) + expect(mockPtyProcess.instances[0].write).not.toHaveBeenCalled() + + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_started_notification', + thread: { + id: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + path: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + ephemeral: false, + }, + }) + + await vi.waitFor(() => expect(sidecar.markCandidatePersisted).toHaveBeenCalledTimes(1)) + const record = registry.get(term.terminalId)! + expect(record.codexInputGate).toBeUndefined() + expect(record.codexDurability).toMatchObject({ + state: 'captured_pre_turn', + candidate: { + candidateThreadId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + source: 'thread_started_notification', + }, + }) + + const stored = await new CodexDurabilityStore({ dir: durabilityDir }).read(term.terminalId) + expect(stored).toMatchObject({ + terminalId: term.terminalId, + tabId: 'tab-1', + paneId: 'pane-1', + serverInstanceId: 'srv-test', + state: 'captured_pre_turn', + candidate: { + candidateThreadId: '019e2a0c-7cef-7281-94df-d0d05d7b9ac3', + rolloutPath: '/home/user/.codex/sessions/2026/05/14/rollout.jsonl', + }, + }) + expect(sent).toContainEqual(expect.objectContaining({ + type: 'terminal.codex.durability.updated', + terminalId: term.terminalId, + durability: expect.objectContaining({ + state: 'captured_pre_turn', + }), + })) + + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ status: 'written' }) + expect(mockPtyProcess.instances[0].write).toHaveBeenCalledWith('hello\r') + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('allows only terminal startup control replies while Codex restore identity is pending', () => { + const registry = new TerminalRegistry() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: createFakeSidecar(), + }, + } as any, + }) + + for (const data of [ + '\x1b[1;1R', + '\x1b[I', + '\x1b[?1;2c', + '\x1b]10;rgb:2424/2929/2f2f\x1b\\', + '\x1b]11;rgb:ffff/ffff/ffff\x1b\\', + ]) { + expect(registry.input(term.terminalId, data)).toEqual({ status: 'written' }) + expect(mockPtyProcess.instances[0].write).toHaveBeenLastCalledWith(data) + } + + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_pending', + terminalId: term.terminalId, + }) + expect(registry.input(term.terminalId, '\x1b[A')).toEqual({ + status: 'blocked_codex_identity_pending', + terminalId: term.terminalId, + }) + }) + + it('keeps reporting the Codex identity capture timeout after closing the failed terminal', async () => { + const registry = new TerminalRegistry() + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitRepairTrigger({ kind: 'candidate_capture_timeout' }) + + await vi.waitFor(() => { + expect(registry.get(term.terminalId)?.status).toBe('exited') + }) + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_capture_timeout', + terminalId: term.terminalId, + }) + }) + + it('does not release fresh Codex input from a browser persistence acknowledgement alone', () => { + const registry = new TerminalRegistry() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: createFakeSidecar(), + }, + } as any, + }) + + expect(registry.acknowledgeCodexCandidatePersisted({ + terminalId: term.terminalId, + candidateThreadId: 'thread-1', + rolloutPath: '/home/user/.codex/sessions/rollout.jsonl', + })).toBe('no_candidate') + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_pending', + terminalId: term.terminalId, + }) + }) + + it('deletes the transient Codex durability store record when the terminal is killed', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const store = new CodexDurabilityStore({ dir: durabilityDir }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-delete-store', + path: path.join(durabilityDir, 'rollout.jsonl'), + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await expect(store.read(term.terminalId)).resolves.toMatchObject({ + terminalId: term.terminalId, + state: 'captured_pre_turn', + }) + + await registry.killAndWait(term.terminalId) + + await vi.waitFor(async () => { + await expect(store.read(term.terminalId)).resolves.toBeUndefined() + }) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('marks fresh Codex non-restorable and closes it when candidate capture times out', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitRepairTrigger({ kind: 'candidate_capture_timeout' }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'non_restorable', + nonRestorableReason: 'candidate_capture_timeout', + }) + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_capture_timeout', + terminalId: term.terminalId, + }) + expect(mockPtyProcess.instances[0].kill).toHaveBeenCalledTimes(1) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('discards a delayed candidate write after candidate capture already timed out', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + const firstCandidateWriteStarted = deferred() + const releaseFirstCandidateWrite = deferred() + let writeCount = 0 + const fsImpl = { + mkdir: fsp.mkdir, + readdir: fsp.readdir, + readFile: fsp.readFile, + rename: fsp.rename, + unlink: fsp.unlink, + writeFile: vi.fn(async (...args: Parameters<typeof fsp.writeFile>) => { + writeCount += 1 + if (writeCount === 1) { + firstCandidateWriteStarted.resolve() + await releaseFirstCandidateWrite.promise + } + return fsp.writeFile(...args) + }), + } + try { + const store = new CodexDurabilityStore({ dir: durabilityDir, fsImpl }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-late-candidate', + path: path.join(durabilityDir, 'rollout.jsonl'), + ephemeral: false, + }, + }) + await firstCandidateWriteStarted.promise + + sidecar.emitRepairTrigger({ kind: 'candidate_capture_timeout' }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'non_restorable', + nonRestorableReason: 'candidate_capture_timeout', + }) + + releaseFirstCandidateWrite.resolve() + + await vi.waitFor(() => { + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'non_restorable', + nonRestorableReason: 'candidate_capture_timeout', + }) + }) + await vi.waitFor(async () => { + await expect(store.read(term.terminalId)).resolves.toBeUndefined() + }) + expect(sidecar.markCandidatePersisted).not.toHaveBeenCalled() + } finally { + releaseFirstCandidateWrite.resolve() + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('serializes per-terminal Codex candidate persistence so the first deterministic candidate wins', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + const firstCandidateWriteStarted = deferred() + const releaseFirstCandidateWrite = deferred() + class StoreWithDelayedFirstCandidateWrite extends CodexDurabilityStore { + readonly writeThreadIds: string[] = [] + + override async write(...args: Parameters<CodexDurabilityStore['write']>) { + const threadId = args[0].candidate?.candidateThreadId + if (threadId) this.writeThreadIds.push(threadId) + if (threadId === 'thread-first') { + firstCandidateWriteStarted.resolve() + await releaseFirstCandidateWrite.promise + } + return super.write(...args) + } + } + + try { + const store = new StoreWithDelayedFirstCandidateWrite({ dir: durabilityDir }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-first', + path: path.join(durabilityDir, 'first-rollout.jsonl'), + ephemeral: false, + }, + }) + await firstCandidateWriteStarted.promise + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-second', + path: path.join(durabilityDir, 'second-rollout.jsonl'), + ephemeral: false, + }, + }) + await Promise.resolve() + expect(store.writeThreadIds).toEqual(['thread-first']) + + releaseFirstCandidateWrite.resolve() + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.candidate?.candidateThreadId).toBe('thread-first')) + await vi.waitFor(() => expect(logger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + terminalId: term.terminalId, + existingThreadId: 'thread-first', + candidateThreadId: 'thread-second', + }), + 'Ignoring mismatched Codex restore identity candidate after one was already persisted', + )) + await expect(store.read(term.terminalId)).resolves.toMatchObject({ + candidate: { + candidateThreadId: 'thread-first', + rolloutPath: path.join(durabilityDir, 'first-rollout.jsonl'), + }, + }) + expect(store.writeThreadIds).toEqual(['thread-first']) + expect(sidecar.markCandidatePersisted).toHaveBeenCalledTimes(1) + } finally { + releaseFirstCandidateWrite.resolve() + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('closes the terminal when candidate persistence fails before user input', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + class StoreWithFirstWriteFailure extends CodexDurabilityStore { + private writeCount = 0 + + override async write(...args: Parameters<CodexDurabilityStore['write']>) { + this.writeCount += 1 + if (this.writeCount === 1) { + throw new Error('candidate write failed') + } + return super.write(...args) + } + } + + try { + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new StoreWithFirstWriteFailure({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-write-failed', + path: path.join(durabilityDir, 'rollout.jsonl'), + ephemeral: false, + }, + }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'non_restorable', + nonRestorableReason: 'candidate_persist_failed', + })) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_unavailable', + terminalId: term.terminalId, + reason: 'candidate_persist_failed', + }) + expect(mockPtyProcess.instances[0].write).not.toHaveBeenCalled() + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('promotes Codex to canonical session identity after turn completion rollout proof succeeds', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-proof-ok', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-proof-ok"}}\n', + 'utf8', + ) + sidecar.emitTurnStarted({ threadId: 'thread-proof-ok', turnId: 'turn-1', params: {} }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('turn_in_progress_unproven')) + sidecar.emitTurnCompleted({ threadId: 'thread-proof-ok', turnId: 'turn-1', params: {} }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.resumeSessionId).toBe('thread-proof-ok')) + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-proof-ok', + }) + expect(sent).toContainEqual(expect.objectContaining({ + type: 'terminal.session.associated', + terminalId: term.terminalId, + sessionRef: { + provider: 'codex', + sessionId: 'thread-proof-ok', + }, + })) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('persists and broadcasts durable Codex identity promoted from create-time proof', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const store = new CodexDurabilityStore({ dir: durabilityDir }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + envContext: { tabId: 'tab-create-proof', paneId: 'pane-create-proof' }, + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-create-candidate', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + + await expect(registry.promoteCodexDurabilityFromCreateProof( + term.terminalId, + 'thread-create-durable', + 12345, + )).resolves.toEqual({ + ok: true, + terminalId: term.terminalId, + sessionId: 'thread-create-durable', + }) + + expect(registry.get(term.terminalId)?.resumeSessionId).toBe('thread-create-durable') + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-create-durable', + candidate: { + candidateThreadId: 'thread-create-candidate', + rolloutPath, + }, + }) + await expect(store.read(term.terminalId)).resolves.toMatchObject({ + terminalId: term.terminalId, + tabId: 'tab-create-proof', + paneId: 'pane-create-proof', + serverInstanceId: 'srv-test', + state: 'durable', + durableThreadId: 'thread-create-durable', + candidate: { + candidateThreadId: 'thread-create-candidate', + rolloutPath, + }, + updatedAt: 12345, + }) + expect(sent).toContainEqual(expect.objectContaining({ + type: 'terminal.codex.durability.updated', + terminalId: term.terminalId, + durability: expect.objectContaining({ + state: 'durable', + durableThreadId: 'thread-create-durable', + }), + })) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('uses the bindSession result when promoting create-time Codex durability', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const store = new CodexDurabilityStore({ dir: durabilityDir }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: createFakeSidecar(), + }, + } as any, + }) + vi.spyOn(registry, 'bindSession').mockImplementation((terminalId) => { + registry.get(terminalId)!.resumeSessionId = 'stale-side-effect' + return { ok: true, terminalId, sessionId: 'thread-create-durable' } + }) + + await expect(registry.promoteCodexDurabilityFromCreateProof( + term.terminalId, + 'thread-create-durable', + 67890, + )).resolves.toEqual({ + ok: true, + terminalId: term.terminalId, + sessionId: 'thread-create-durable', + }) + + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-create-durable', + }) + expect(registry.get(term.terminalId)?.resumeSessionId).toBe('thread-create-durable') + await expect(store.read(term.terminalId)).resolves.toMatchObject({ + state: 'durable', + durableThreadId: 'thread-create-durable', + updatedAt: 67890, + }) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('does not broadcast a durable Codex session when rollout proof cannot bind canonical ownership', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const owner = registry.create({ + mode: 'codex', + resumeSessionId: 'thread-binding-owner', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-binding-owner', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-binding-owner"}}\n', + 'utf8', + ) + sidecar.emitTurnCompleted({ threadId: 'thread-binding-owner', turnId: 'turn-1', params: {} }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'non_restorable', + nonRestorableReason: 'session_binding_failed:session_already_owned', + })) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.get(term.terminalId)?.resumeSessionId).toBeUndefined() + expect(registry.findRunningTerminalBySession('codex', 'thread-binding-owner')?.terminalId).toBe(owner.terminalId) + expect(registry.input(term.terminalId, 'hello\r')).toEqual({ + status: 'blocked_codex_identity_unavailable', + terminalId: term.terminalId, + reason: 'session_binding_failed:session_already_owned', + }) + expect(sent).not.toContainEqual(expect.objectContaining({ + type: 'terminal.session.associated', + })) + expect(mockPtyProcess.instances.at(-1)?.kill).toHaveBeenCalledTimes(1) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('does not promote Codex from repair triggers before a turn completes', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-repair-pre-turn', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-repair-pre-turn"}}\n', + 'utf8', + ) + + sidecar.emitRepairTrigger({ kind: 'fs_changed' }) + await new Promise((resolve) => setImmediate(resolve)) + + expect(registry.get(term.terminalId)?.resumeSessionId).toBeUndefined() + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'captured_pre_turn', + candidate: { + candidateThreadId: 'thread-repair-pre-turn', + }, + }) + expect(sent).not.toContainEqual(expect.objectContaining({ + type: 'terminal.session.associated', + })) + + sidecar.emitTurnCompleted({ threadId: 'thread-repair-pre-turn', turnId: 'turn-1', params: {} }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.resumeSessionId).toBe('thread-repair-pre-turn')) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('runs a final rollout proof before marking a fresh Codex PTY exit non-restorable', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-final-proof', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-final-proof"}}\n', + 'utf8', + ) + + mockPtyProcess.instances[0]._emitExit(137) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-final-proof', + }) + expect(sent).toContainEqual(expect.objectContaining({ + type: 'terminal.session.associated', + terminalId: term.terminalId, + sessionRef: { + provider: 'codex', + sessionId: 'thread-final-proof', + }, + })) + expect(logger.warn).not.toHaveBeenCalledWith( + expect.objectContaining({ + kind: 'terminal_exit_without_durable_session', + terminalId: term.terminalId, + }), + 'terminal_exit_without_durable_session', + ) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('runs a final rollout proof before deciding lifecycle loss cannot recover a fresh Codex terminal', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'rollout.jsonl') + const replacementSidecar = createFakeSidecar() + const planCreate = vi.fn(async () => ({ + sessionId: 'thread-final-recovery', + remote: { wsUrl: 'ws://127.0.0.1:43124' }, + sidecar: replacementSidecar, + })) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const currentSidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, + }) + + currentSidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-final-recovery', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-final-recovery"}}\n', + 'utf8', + ) + + currentSidecar.emitLifecycleLoss({ method: 'thread/closed' }) + + await vi.waitFor(() => expect(planCreate).toHaveBeenCalledWith(expect.objectContaining({ + terminalId: term.terminalId, + resumeSessionId: 'thread-final-recovery', + }))) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: term.terminalId, generation: 1 })) + expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-final-recovery', + }) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('marks Codex degraded after turn completion rollout proof fails', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'missing-rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + const sent: unknown[] = [] + const client = { + readyState: 1, + bufferedAmount: 0, + send: vi.fn((message: string) => sent.push(JSON.parse(message))), + } + registry.attach(term.terminalId, client as any) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-proof-missing', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + sidecar.emitTurnCompleted({ threadId: 'thread-proof-missing', turnId: 'turn-1', params: {} }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('durability_unproven_after_completion')) + expect(registry.get(term.terminalId)?.resumeSessionId).toBeUndefined() + expect(registry.get(term.terminalId)?.codexDurability?.lastProofFailure).toMatchObject({ + reason: 'missing', + }) + expect(sent).not.toContainEqual(expect.objectContaining({ + type: 'terminal.session.associated', + })) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + + it('uses exact rollout watch changes as a one-shot repair trigger after proof failure', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const rolloutPath = path.join(durabilityDir, 'late-rollout.jsonl') + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: new CodexDurabilityStore({ dir: durabilityDir }), + serverInstanceId: 'srv-test', + }) + const sidecar = createFakeSidecar() + const term = registry.create({ + mode: 'codex', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, + }) + + sidecar.emitCandidate({ + source: 'thread_start_response', + thread: { + id: 'thread-late-proof', + path: rolloutPath, + ephemeral: false, + }, + }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('captured_pre_turn')) + await vi.waitFor(() => expect(sidecar.watchPath).toHaveBeenCalledWith(rolloutPath, expect.any(String))) + const watchId = sidecar.watchPath.mock.calls[0][1] + + sidecar.emitTurnCompleted({ threadId: 'thread-late-proof', turnId: 'turn-1', params: {} }) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability?.state).toBe('durability_unproven_after_completion')) + + await fsp.writeFile( + rolloutPath, + '{"type":"session_meta","payload":{"id":"thread-late-proof"}}\n', + 'utf8', + ) + sidecar.emitFsChanged({ watchId, changedPaths: [rolloutPath] }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexDurability).toMatchObject({ + state: 'durable', + durableThreadId: 'thread-late-proof', + })) + expect(sidecar.unwatchPath).toHaveBeenCalledWith(watchId) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + it('awaits Codex sidecar teardown when killing a terminal', async () => { const registry = new TerminalRegistry() const shutdown = vi.fn(async () => undefined) @@ -171,7 +1221,7 @@ describe('TerminalRegistry Codex sidecar ownership', () => { expect(currentSidecar.onLifecycleLoss).toHaveBeenCalledTimes(1) currentSidecar.emitLifecycleLoss({ method: 'thread/status/changed', threadId: 'thread-1', status: 'notLoaded' }) - await vi.waitFor(() => expect(replacementSidecar.waitForLoadedThread).toHaveBeenCalledWith('thread-1', expect.any(Object))) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: term.terminalId, generation: 1 })) expect(registry.get(term.terminalId)?.status).toBe('running') expect(planCreate).toHaveBeenCalledWith(expect.objectContaining({ @@ -180,10 +1230,12 @@ describe('TerminalRegistry Codex sidecar ownership', () => { })) expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: term.terminalId, generation: 1 }) expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1) - expect(mockPtyProcess.instances[0].kill).toHaveBeenCalled() + expect(mockPtyProcess.instances[0].kill).toHaveBeenCalledWith('SIGTERM') + await new Promise((resolve) => setTimeout(resolve, 600)) + expect(mockPtyProcess.instances[0].kill).toHaveBeenCalledTimes(1) expect(mockPtyProcess.instances[1].write).toBeDefined() - expect(registry.input(term.terminalId, 'after recovery')).toBe(true) + expect(registry.input(term.terminalId, 'after recovery')).toEqual({ status: 'written' }) expect(mockPtyProcess.instances[0].write).not.toHaveBeenCalled() expect(mockPtyProcess.instances[1].write).toHaveBeenCalledWith('after recovery') }) @@ -244,12 +1296,12 @@ describe('TerminalRegistry Codex sidecar ownership', () => { registry.publishCodexSidecar(term.terminalId) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(replacementSidecar.waitForLoadedThread).toHaveBeenCalledWith('thread-1', expect.any(Object))) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledWith({ terminalId: term.terminalId, generation: 1 })) expect(planCreate).toHaveBeenCalledTimes(1) }) - it('keeps the old Codex generation current when retiring sidecar teardown fails', async () => { + it('closes the Codex terminal when retiring sidecar teardown blocks recovery', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar({ shutdown: async () => { @@ -275,12 +1327,13 @@ describe('TerminalRegistry Codex sidecar ownership', () => { }) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1)) + await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalled()) await vi.waitFor(() => expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(1)) expect(planCreate).toHaveBeenCalledTimes(1) - expect(registry.input(term.terminalId, 'still old generation')).toBe(true) - expect(mockPtyProcess.instances[0].write).toHaveBeenCalledWith('still old generation') + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(registry.input(term.terminalId, 'still old generation')).toEqual({ status: 'not_running' }) + expect(mockPtyProcess.instances[0].write).not.toHaveBeenCalledWith('still old generation') expect(mockPtyProcess.instances[1].write).not.toHaveBeenCalled() }) @@ -310,13 +1363,15 @@ describe('TerminalRegistry Codex sidecar ownership', () => { }) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1)) + await vi.waitFor(() => expect(currentSidecar.shutdown).toHaveBeenCalled()) await vi.waitFor(() => expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(1)) + const currentShutdownCalls = currentSidecar.shutdown.mock.calls.length currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) await new Promise((resolve) => setTimeout(resolve, 25)) expect(planCreate).toHaveBeenCalledTimes(1) + expect(currentSidecar.shutdown).toHaveBeenCalledTimes(currentShutdownCalls) expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(1) }) @@ -324,8 +1379,8 @@ describe('TerminalRegistry Codex sidecar ownership', () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() const replacementSidecar = createFakeSidecar({ - waitForLoadedThread: async () => { - throw new Error('candidate never became ready') + adopt: async () => { + mockPtyProcess.instances[1]._emitExit(42) }, shutdown: async () => { throw new Error('candidate sidecar teardown failed') @@ -394,6 +1449,39 @@ describe('TerminalRegistry Codex sidecar ownership', () => { } }) + it('closes a Codex terminal when lifecycle-loss durable recovery becomes blocked', async () => { + const registry = new TerminalRegistry() + const exited = vi.fn() + registry.on('terminal.exit', exited) + const currentSidecar = createFakeSidecar() + const teardownError = new Error('planner-owned sidecar teardown failed') as Error & { + codexSidecarTeardownFailed?: boolean + } + teardownError.codexSidecarTeardownFailed = true + const planCreate = vi.fn(async () => { + throw teardownError + }) + const term = registry.create({ + mode: 'codex', + resumeSessionId: 'thread-1', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, + }) + mockPtyProcess.instances[0].autoExitOnKill = false + + currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.codexRecoveryBlockedError).toBe(teardownError)) + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + expect(planCreate).toHaveBeenCalledTimes(1) + expect(exited).toHaveBeenCalledWith({ terminalId: term.terminalId, exitCode: 0 }) + }) + it('keeps unpublished candidate teardown failure retryable for final close', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() @@ -401,8 +1489,8 @@ describe('TerminalRegistry Codex sidecar ownership', () => { .mockRejectedValueOnce(new Error('candidate verified teardown failed')) .mockResolvedValueOnce(undefined) const replacementSidecar = createFakeSidecar({ - waitForLoadedThread: async () => { - throw new Error('candidate never became ready') + adopt: async () => { + mockPtyProcess.instances[1]._emitExit(42) }, shutdown: candidateShutdown, }) @@ -439,8 +1527,8 @@ describe('TerminalRegistry Codex sidecar ownership', () => { .mockRejectedValueOnce(new Error('candidate verified teardown failed')) .mockResolvedValueOnce(undefined) const replacementSidecar = createFakeSidecar({ - waitForLoadedThread: async () => { - throw new Error('candidate never became ready') + adopt: async () => { + mockPtyProcess.instances[1]._emitExit(42) }, shutdown: candidateShutdown, }) @@ -468,12 +1556,13 @@ describe('TerminalRegistry Codex sidecar ownership', () => { expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(2) }) - it('does not publish a recovery candidate whose PTY exited before readiness completed', async () => { + it('does not publish a recovery candidate whose PTY exited before publication', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() - const readiness = deferred() const firstCandidate = createFakeSidecar({ - waitForLoadedThread: () => readiness.promise, + adopt: async () => { + mockPtyProcess.instances[1]._emitExit(42) + }, }) const secondCandidate = createFakeSidecar() const planCreate = vi.fn() @@ -500,9 +1589,6 @@ describe('TerminalRegistry Codex sidecar ownership', () => { }) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(firstCandidate.waitForLoadedThread).toHaveBeenCalledTimes(1)) - mockPtyProcess.instances[1]._emitExit(42) - readiness.resolve() await vi.waitFor(() => expect(firstCandidate.shutdown).toHaveBeenCalledTimes(1)) await vi.waitFor(() => expect(secondCandidate.adopt).toHaveBeenCalledTimes(1)) @@ -511,7 +1597,7 @@ describe('TerminalRegistry Codex sidecar ownership', () => { expect(planCreate).toHaveBeenCalledTimes(2) expect(currentSidecar.shutdown).toHaveBeenCalledTimes(1) expect(firstCandidate.shutdown).toHaveBeenCalledTimes(1) - expect(registry.input(term.terminalId, 'after retry')).toBe(true) + expect(registry.input(term.terminalId, 'after retry')).toEqual({ status: 'written' }) expect(mockPtyProcess.instances[1].write).not.toHaveBeenCalled() expect(mockPtyProcess.instances[2].write).toHaveBeenCalledWith('after retry') }) @@ -549,18 +1635,70 @@ describe('TerminalRegistry Codex sidecar ownership', () => { expect(oldPtyExitedDuringShutdown).toBe(true) expect(registry.get(term.terminalId)?.status).toBe('running') expect(replacementSidecar.shutdown).not.toHaveBeenCalled() - expect(registry.input(term.terminalId, 'after atomic handoff')).toBe(true) + expect(registry.input(term.terminalId, 'after atomic handoff')).toEqual({ status: 'written' }) expect(mockPtyProcess.instances[0].write).not.toHaveBeenCalled() expect(mockPtyProcess.instances[1].write).toHaveBeenCalledWith('after atomic handoff') }) + it('deletes Codex durability store records when a published recovery PTY exits finally', async () => { + const durabilityDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'freshell-codex-durability-')) + try { + const store = new CodexDurabilityStore({ dir: durabilityDir }) + const registry = new TerminalRegistry(undefined, undefined, undefined, { + codexDurabilityStore: store, + serverInstanceId: 'srv-test', + }) + const currentSidecar = createFakeSidecar() + const replacementSidecar = createFakeSidecar() + const planCreate = vi.fn(async () => ({ + sessionId: 'thread-1', + remote: { wsUrl: 'ws://127.0.0.1:43124' }, + sidecar: replacementSidecar, + })) + const term = registry.create({ + mode: 'codex', + resumeSessionId: 'thread-1', + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar: currentSidecar, + recovery: { planCreate, retryDelayMs: 0 }, + }, + } as any, + }) + await store.write({ + schemaVersion: CODEX_DURABILITY_SCHEMA_VERSION, + terminalId: term.terminalId, + serverInstanceId: 'srv-test', + state: 'durable', + durableThreadId: 'thread-1', + updatedAt: 123, + }) + + currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledTimes(1)) + const replacementPty = mockPtyProcess.instances[1] + expect(registry.get(term.terminalId)?.pty).toBe(replacementPty) + + registry.get(term.terminalId)!.codexRecovery = undefined + replacementPty._emitExit(17) + + await vi.waitFor(() => expect(registry.get(term.terminalId)?.status).toBe('exited')) + await vi.waitFor(async () => { + await expect(store.read(term.terminalId)).resolves.toBeUndefined() + }) + } finally { + await fsp.rm(durabilityDir, { recursive: true, force: true }) + } + }) + it('waits for a failed recovery candidate to shut down before retrying', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() const firstShutdown = deferred() const firstCandidate = createFakeSidecar({ - waitForLoadedThread: async () => { - throw new Error('candidate not ready') + adopt: async () => { + mockPtyProcess.instances[1]._emitExit(42) }, shutdown: () => firstShutdown.promise, }) @@ -598,7 +1736,7 @@ describe('TerminalRegistry Codex sidecar ownership', () => { await vi.waitFor(() => expect(secondCandidate.adopt).toHaveBeenCalled()) }) - it('does not grow active recovery candidates across repeated readiness failures', async () => { + it('does not grow active recovery candidates across repeated recovery candidate exits', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() let activeCandidates = 0 @@ -611,9 +1749,10 @@ describe('TerminalRegistry Codex sidecar ownership', () => { sessionId: 'thread-1', remote: { wsUrl: `ws://127.0.0.1:${43124 + attempt}` }, sidecar: createFakeSidecar({ - waitForLoadedThread: async () => { - if (attempt >= 3) return - throw new Error('candidate not ready') + adopt: async () => { + if (attempt < 3) { + mockPtyProcess.instances[attempt]._emitExit(42) + } }, shutdown: async () => { activeCandidates -= 1 @@ -676,10 +1815,10 @@ describe('TerminalRegistry Codex sidecar ownership', () => { it('final close with an unpublished recovery candidate awaits candidate shutdown', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() - const readiness = deferred() + const adopt = deferred() const shutdown = deferred() const replacementSidecar = createFakeSidecar({ - waitForLoadedThread: () => readiness.promise, + adopt: () => adopt.promise, shutdown: () => shutdown.promise, }) const planCreate = vi.fn(async () => ({ @@ -700,9 +1839,9 @@ describe('TerminalRegistry Codex sidecar ownership', () => { }) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(replacementSidecar.waitForLoadedThread).toHaveBeenCalledTimes(1)) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledTimes(1)) const close = registry.killAndWait(term.terminalId) - readiness.resolve() + adopt.resolve() await vi.waitFor(() => expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(1)) let closed = false @@ -859,10 +1998,10 @@ describe('TerminalRegistry Codex sidecar ownership', () => { it('awaits recovery candidate teardown for exited Codex terminals while shutting down other running terminals', async () => { const registry = new TerminalRegistry() const currentSidecar = createFakeSidecar() - const readiness = deferred() + const adopt = deferred() const candidateShutdown = deferred() const replacementSidecar = createFakeSidecar({ - waitForLoadedThread: () => readiness.promise, + adopt: () => adopt.promise, shutdown: () => candidateShutdown.promise, }) const planCreate = vi.fn(async () => ({ @@ -883,9 +2022,9 @@ describe('TerminalRegistry Codex sidecar ownership', () => { }) currentSidecar.emitLifecycleLoss({ method: 'thread/closed', threadId: 'thread-1' }) - await vi.waitFor(() => expect(replacementSidecar.waitForLoadedThread).toHaveBeenCalledTimes(1)) + await vi.waitFor(() => expect(replacementSidecar.adopt).toHaveBeenCalledTimes(1)) registry.kill(codexTerm.terminalId) - readiness.resolve() + adopt.resolve() await vi.waitFor(() => expect(replacementSidecar.shutdown).toHaveBeenCalledTimes(1)) registry.create({ mode: 'shell' }) diff --git a/test/unit/server/terminal-registry.test.ts b/test/unit/server/terminal-registry.test.ts index ba2b915df..83e8a2711 100644 --- a/test/unit/server/terminal-registry.test.ts +++ b/test/unit/server/terminal-registry.test.ts @@ -4,16 +4,9 @@ import { isValidClaudeSessionId } from '../../../server/claude-session-id' import * as fs from 'fs' import os from 'os' import { - CODEX_STARTUP_EXPECTED_REPLIES, CODEX_STARTUP_QUERY_FRAMES, } from '../../helpers/codex-startup-probes' -const SERVER_PREATTACH_CODEX_STARTUP_EXPECTED_REPLIES = [ - CODEX_STARTUP_EXPECTED_REPLIES[0], - CODEX_STARTUP_EXPECTED_REPLIES[1], - '\u001b]10;rgb:c9c9/d1d1/d9d9\u001b\\', -] as const - // Mock fs.existsSync for shell existence checks // Need to provide both named export and default export since the implementation uses `import fs from 'fs'` vi.mock('fs', () => { @@ -885,6 +878,34 @@ describe('buildSpawnSpec Unix paths', () => { expectCodexMcpArgs(spec.args) expect(spec.args.slice(-2)).toEqual(['resume', 'session-123']) }) + + it('disables Codex apps for Freshell-managed remote launches', () => { + delete process.env.CODEX_CMD + + const spec = buildSpawnSpec('codex', '/home/user/project', 'system', undefined, { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:4567', + }, + }) + + expect(spec.args.slice(0, 4)).toEqual([ + '--remote', + 'ws://127.0.0.1:4567', + '-c', + 'features.apps=false', + ]) + expectCodexMcpArgs(spec.args) + }) + + it('does not disable Codex apps for ordinary Codex launches', () => { + delete process.env.CODEX_CMD + + const spec = buildSpawnSpec('codex', '/home/user/project', 'system') + + expect(spec.args).not.toContain('features.apps=false') + expect(spec.args).not.toContain('--remote') + expectCodexMcpArgs(spec.args) + }) }) describe('provider settings in spawn spec', () => { @@ -1967,6 +1988,29 @@ describe('TerminalRegistry', () => { expect(terminals).toHaveLength(1) expect(terminals[0].resumeSessionId).toBeUndefined() }) + + it('exposes a Codex sessionRef for an explicit durable resume', () => { + const created = registry.create({ + mode: 'codex', + cwd: '/home/user/project', + resumeSessionId: 'thread-proved-resume', + }) + + expect(registry.list()[0]).toMatchObject({ + resumeSessionId: 'thread-proved-resume', + sessionRef: { + provider: 'codex', + sessionId: 'thread-proved-resume', + }, + codexDurability: { + state: 'durable', + durableThreadId: 'thread-proved-resume', + }, + }) + + const record = registry.get(created.terminalId)! + expect(record.codexInputGate).toBeUndefined() + }) }) describe('list() returns mode', () => { @@ -2538,59 +2582,64 @@ describe('TerminalRegistry', () => { return { promise, resolve, reject } } - it('registers the terminal before a synchronous durable-session callback fires', () => { - let terminalSeenDuringAttach: string | undefined + function createSidecar(overrides: Partial<{ + shutdown: () => Promise<void> + onLifecycleLoss: (handler: (event: unknown) => void) => () => void + }> = {}) { + return { + adopt: vi.fn().mockResolvedValue(undefined), + markCandidatePersisted: vi.fn(), + shutdown: vi.fn(overrides.shutdown ?? (async () => undefined)), + onLifecycleLoss: vi.fn(overrides.onLifecycleLoss ?? (() => vi.fn())), + } + } + it('registers the current Codex sidecar when the terminal is created', () => { + const sidecar = createSidecar() const term = registry.create({ mode: 'codex', cwd: '/home/user/project', - codexSidecar: { - attachTerminal: ({ terminalId, onDurableSession }) => { - terminalSeenDuringAttach = registry.get(terminalId)?.terminalId - onDurableSession('codex-session-sync') + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, }, - shutdown: vi.fn().mockResolvedValue(undefined), - }, + } as any, }) - expect(terminalSeenDuringAttach).toBe(term.terminalId) - expect(registry.get(term.terminalId)?.resumeSessionId).toBe('codex-session-sync') - expect(registry.isSessionBound('codex', 'codex-session-sync')).toBe(true) + expect(sidecar.onLifecycleLoss).toHaveBeenCalledTimes(1) + expect(registry.get(term.terminalId)).toBe(term) }) - it('keeps the newly created terminal alive when a synchronous fatal callback starts recovery', () => { - let createdTerminalId: string | undefined - const exited = vi.fn() - registry.on('terminal.exit', exited) - + it('does not treat sidecar registration as durable identity', () => { + const sidecar = createSidecar() const term = registry.create({ mode: 'codex', cwd: '/home/user/project', - codexSidecar: { - attachTerminal: ({ terminalId, onFatal }) => { - createdTerminalId = terminalId - onFatal(new Error('sidecar failed during attach')) + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, }, - shutdown: vi.fn().mockResolvedValue(undefined), - }, + } as any, }) - expect(createdTerminalId).toBe(term.terminalId) - expect(registry.get(term.terminalId)?.status).toBe('running') - expect(registry.get(term.terminalId)?.codex?.recoveryState).toBe('recovering_pre_durable') - expect(exited).not.toHaveBeenCalled() + expect(registry.get(term.terminalId)?.resumeSessionId).toBeUndefined() + expect(registry.isSessionBound('codex', 'codex-session-sync')).toBe(false) }) it('waits for pending Codex sidecar shutdown work during graceful shutdown', async () => { const sidecarShutdown = deferred() - const sidecar = { - attachTerminal: vi.fn(), - shutdown: vi.fn(() => sidecarShutdown.promise), - } + const sidecar = createSidecar({ shutdown: () => sidecarShutdown.promise }) const term = registry.create({ mode: 'codex', cwd: '/home/user/project', - codexSidecar: sidecar, + providerSettings: { + codexAppServer: { + wsUrl: 'ws://127.0.0.1:43123', + sidecar, + }, + } as any, }) registry.kill(term.terminalId) @@ -2612,7 +2661,7 @@ describe('TerminalRegistry', () => { }) describe('pre-attach codex startup probes', () => { - it('answers codex startup probes before the first client attaches', async () => { + it('leaves Codex startup probe replies to the client-side terminal parser before first attach', async () => { registry.create({ mode: 'codex', cwd: '/home/user/project', @@ -2624,7 +2673,7 @@ describe('TerminalRegistry', () => { onDataCallback(CODEX_STARTUP_QUERY_FRAMES.join('')) - expect(mockPty.write.mock.calls.map(([data]: [string]) => data)).toEqual(SERVER_PREATTACH_CODEX_STARTUP_EXPECTED_REPLIES) + expect(mockPty.write).not.toHaveBeenCalled() }) it('stops server-side startup probe replies after a client has attached once', async () => {