From 28d75fe9f2b2d074be0baf7d973eb5a7f6b857de Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 7 Jun 2026 17:51:21 -0700 Subject: [PATCH 01/10] feat(auq): add gstack-session-kind + echo SESSION_KIND in preamble Classifies the session as spawned | headless | interactive from env markers (OPENCLAW_SESSION / GSTACK_HEADLESS / CONDUCTOR_* / CLAUDE_CODE_ENTRYPOINT / CI), defaulting to interactive. Echoed once at skill start alongside BRANCH/REPO_MODE so the AskUserQuestion-failure fallback can branch without a shell-out at failure time. Degrade-safe: empty/error => interactive. Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/gstack-session-kind | 53 ++++++++++++++ .../preamble/generate-preamble-bash.ts | 3 + test/gstack-session-kind.test.ts | 70 +++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100755 bin/gstack-session-kind create mode 100644 test/gstack-session-kind.test.ts diff --git a/bin/gstack-session-kind b/bin/gstack-session-kind new file mode 100755 index 0000000000..8e9bc4e410 --- /dev/null +++ b/bin/gstack-session-kind @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# gstack-session-kind — classify the current agent session so skills know whether +# a human can answer an interactive prompt (AskUserQuestion). +# +# Usage: gstack-session-kind → prints one of: spawned | headless | interactive +# +# Used by the preamble (generate-preamble-bash.ts) which echoes +# SESSION_KIND: +# so the AskUserQuestion-failure fallback rule can branch without a shell-out at +# failure time: +# spawned → orchestrator session (OpenClaw). Auto-choose recommended option +# per the skill's SPAWNED_SESSION block. Never prose, never BLOCKED. +# headless → no human present (claude -p evals / CI). BLOCK on AUQ failure. +# interactive → a human is present. Prose-fallback on AUQ failure. +# +# Detection is best-effort. On ANY ambiguity it prints `interactive` — BLOCK only on +# a positive headless signal, since a stray prose message in an unmarked one-shot +# `-p` run just ends the turn (harmless), whereas wrongly BLOCKING a real human is not. +# +# Why env vars and not TTY/entrypoint: an interactive Conductor session reports +# CLAUDE_CODE_ENTRYPOINT=sdk-ts with no TTY — identical to a headless SDK eval. The +# signals that actually discriminate are the host/orchestrator/CI env markers below. +set -euo pipefail + +# 1. Orchestrator-spawned session (OpenClaw). Authoritative block lives in the skill; +# we only surface the classification. +if [ -n "${OPENCLAW_SESSION:-}" ]; then + echo "spawned" + exit 0 +fi + +# 2. Explicit headless override (set by the eval/E2E harness for determinism). +if [ -n "${GSTACK_HEADLESS:-}" ]; then + echo "headless" + exit 0 +fi + +# 3. Positive interactive-host signals: a human-driven host is present. +# - Conductor app sets CONDUCTOR_* workspace vars. +# - Plain interactive `claude` CLI sets CLAUDE_CODE_ENTRYPOINT=cli. +if [ -n "${CONDUCTOR_WORKSPACE_PATH:-}" ] || [ -n "${CONDUCTOR_PORT:-}" ] || [ "${CLAUDE_CODE_ENTRYPOINT:-}" = "cli" ]; then + echo "interactive" + exit 0 +fi + +# 4. CI / automation markers with no interactive host → headless. +if [ -n "${CI:-}" ] || [ -n "${GITHUB_ACTIONS:-}" ]; then + echo "headless" + exit 0 +fi + +# 5. No positive headless signal → assume a human is present (degrade-safe default). +echo "interactive" diff --git a/scripts/resolvers/preamble/generate-preamble-bash.ts b/scripts/resolvers/preamble/generate-preamble-bash.ts index d8552421ad..ecc30cef7d 100644 --- a/scripts/resolvers/preamble/generate-preamble-bash.ts +++ b/scripts/resolvers/preamble/generate-preamble-bash.ts @@ -33,6 +33,9 @@ echo "SKILL_PREFIX: $_SKILL_PREFIX" source <(${ctx.paths.binDir}/gstack-repo-mode 2>/dev/null) || true REPO_MODE=\${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" +_SESSION_KIND=$(${ctx.paths.binDir}/gstack-session-kind 2>/dev/null || echo "interactive") +case "$_SESSION_KIND" in spawned|headless|interactive) ;; *) _SESSION_KIND="interactive" ;; esac +echo "SESSION_KIND: $_SESSION_KIND" _LAKE_SEEN=$([ -f ~/.gstack/.completeness-intro-seen ] && echo "yes" || echo "no") echo "LAKE_INTRO: $_LAKE_SEEN" _TEL=$(${ctx.paths.binDir}/gstack-config get telemetry 2>/dev/null || true) diff --git a/test/gstack-session-kind.test.ts b/test/gstack-session-kind.test.ts new file mode 100644 index 0000000000..67a047a1b1 --- /dev/null +++ b/test/gstack-session-kind.test.ts @@ -0,0 +1,70 @@ +/** + * gstack-session-kind — classifies the session so skills know whether a human can + * answer an AskUserQuestion. Drives the AUQ-failure fallback branch: + * spawned → auto-choose (orchestrator) + * headless → BLOCK on AUQ failure + * interactive → prose fallback on AUQ failure + * + * These permutations are the contract the resolver rule depends on. Run with a + * SCRUBBED env (the test process itself runs inside Conductor, so CONDUCTOR_* / + * CLAUDE_CODE_* would leak in and contaminate the classification). + * + * Free, deterministic, gate-tier. + */ +import { describe, test, expect } from 'bun:test'; +import { execFileSync } from 'child_process'; +import * as path from 'path'; + +const BIN = path.resolve(__dirname, '..', 'bin', 'gstack-session-kind'); + +/** Run the helper with ONLY the supplied env (plus PATH so bash resolves). */ +function kind(env: Record): string { + return execFileSync(BIN, [], { + env: { PATH: process.env.PATH ?? '/usr/bin:/bin', ...env }, + encoding: 'utf-8', + }).trim(); +} + +describe('gstack-session-kind', () => { + test('OPENCLAW_SESSION → spawned (highest precedence)', () => { + expect(kind({ OPENCLAW_SESSION: '1' })).toBe('spawned'); + // spawned wins even when other markers are also present + expect(kind({ OPENCLAW_SESSION: '1', GSTACK_HEADLESS: '1', CONDUCTOR_PORT: '5' })).toBe('spawned'); + }); + + test('GSTACK_HEADLESS → headless', () => { + expect(kind({ GSTACK_HEADLESS: '1' })).toBe('headless'); + }); + + test('CONDUCTOR_* → interactive (a human host is present)', () => { + expect(kind({ CONDUCTOR_WORKSPACE_PATH: '/tmp/ws' })).toBe('interactive'); + expect(kind({ CONDUCTOR_PORT: '55010' })).toBe('interactive'); + }); + + test('CLAUDE_CODE_ENTRYPOINT=cli → interactive', () => { + expect(kind({ CLAUDE_CODE_ENTRYPOINT: 'cli' })).toBe('interactive'); + }); + + test('interactive host beats CI markers', () => { + expect(kind({ CONDUCTOR_PORT: '5', CI: '1' })).toBe('interactive'); + }); + + test('CI / GITHUB_ACTIONS with no host → headless', () => { + expect(kind({ CI: '1' })).toBe('headless'); + expect(kind({ GITHUB_ACTIONS: 'true' })).toBe('headless'); + }); + + test('GSTACK_HEADLESS beats CONDUCTOR (explicit override wins)', () => { + expect(kind({ GSTACK_HEADLESS: '1', CONDUCTOR_PORT: '5' })).toBe('headless'); + }); + + test('bare env → interactive (degrade-safe default)', () => { + expect(kind({})).toBe('interactive'); + }); + + test('empty GSTACK_HEADLESS is treated as unset (interactive)', () => { + // The resolver/helper guard on -n, so an empty string must NOT mean headless — + // this is the opt-out path harness suites use to exercise the interactive branch. + expect(kind({ GSTACK_HEADLESS: '' })).toBe('interactive'); + }); +}); From c475d73b34c96d76ad2b95797f1855e6a463562a Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sun, 7 Jun 2026 17:51:22 -0700 Subject: [PATCH 02/10] feat(auq): prose fallback when AskUserQuestion fails (interactive sessions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a genuine AUQ failure (tool absent, or present-but-erroring like Conductor's flaky MCP returning '[Tool result missing due to internal error]'): retry once, then branch on SESSION_KIND — spawned auto-chooses, headless BLOCKs, interactive renders a prose decision brief the user answers by typing a letter. The prose fallback MUST surface the triad: a clear ELI10 of the issue, a per-choice Completeness score, and a recommendation+why (one paragraph per choice). Carves out the [plan-tune auto-decide] denial as NOT a failure, and qualifies the former 'tool_use, not prose' assertions so the rule isn't self-contradicting. Tests pin the triad, the SESSION_KIND branch, the OV2 collision guard, the always-loaded guarantee, and a cross-file invariant on the auto-decide prefix. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../preamble/generate-ask-user-format.ts | 26 +++++- .../preamble/generate-completion-status.ts | 2 +- test/auq-format-always-loaded.test.ts | 5 ++ test/resolver-ask-user-format.test.ts | 87 +++++++++++++++++++ 4 files changed, 116 insertions(+), 4 deletions(-) diff --git a/scripts/resolvers/preamble/generate-ask-user-format.ts b/scripts/resolvers/preamble/generate-ask-user-format.ts index ab24eb507a..12090dc2cd 100644 --- a/scripts/resolvers/preamble/generate-ask-user-format.ts +++ b/scripts/resolvers/preamble/generate-ask-user-format.ts @@ -9,11 +9,31 @@ export function generateAskUserFormat(_ctx: TemplateContext): string { **Rule:** if any \`mcp__*__AskUserQuestion\` variant is in your tool list, prefer it. Hosts may disable native AUQ via \`--disallowedTools AskUserQuestion\` (Conductor does, by default) and route through their MCP variant; calling native there silently fails. Same questions/options shape; same decision-brief format applies. -**If no AskUserQuestion variant appears in your tool list, this skill is BLOCKED.** Stop, report \`BLOCKED — AskUserQuestion unavailable\`, and wait for the user. Do not write decisions to the plan file as a substitute, do not emit them as prose and stop, and do not silently auto-decide (only \`/plan-tune\` AUTO_DECIDE opt-ins authorize auto-picking). +If AskUserQuestion is unavailable (no variant in your tool list) OR a call to it fails, do NOT silently auto-decide or write the decision to the plan file as a substitute. Follow the **failure fallback** below. + +### When AskUserQuestion is unavailable or a call fails + +Tell three outcomes apart: + +1. **Auto-decide denial (NOT a failure).** The result contains \`[plan-tune auto-decide]