From c7b61f4bfdb16bc9cef470bf19436a8b74dac5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Den=20Delimarsky=20=F0=9F=8C=BA?= <53200638+localden@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:35:45 -0700 Subject: [PATCH 1/5] Update with extra commands --- CHANGELOG.md | 8 + README.md | 16 +- pyproject.toml | 2 +- scripts/bash/check-prerequisites.sh | 20 ++- scripts/powershell/check-prerequisites.ps1 | 25 +++- src/specify_cli/__init__.py | 8 +- templates/commands/analyze.md | 103 +++++++++++++ templates/commands/clarify.md | 161 +++++++++++++++++++++ templates/commands/plan.md | 1 + 9 files changed, 324 insertions(+), 20 deletions(-) create mode 100644 templates/commands/analyze.md create mode 100644 templates/commands/clarify.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 125a68b4e..92cb0b8b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ All notable changes to the Specify CLI will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.17] - 2025-09-22 + +### Added + +- New `/clarify` command template to surface up to 5 targeted clarification questions for an existing spec and persist answers into a Clarifications section in the spec. +- New `/analyze` command template providing a non-destructive cross-artifact discrepancy and alignment report (spec, clarifications, plan, tasks, constitution) inserted after `/tasks` and before `/implement`. + - Note: Constitution rules are explicitly treated as non-negotiable; any conflict is a CRITICAL finding requiring artifact remediation, not weakening of principles. + ## [0.0.16] - 2025-09-22 ### Added diff --git a/README.md b/README.md index c1ea551f2..c6d9b8f26 100644 --- a/README.md +++ b/README.md @@ -206,8 +206,10 @@ After running `specify init`, your AI coding agent will have access to these sla |-----------------|-----------------------------------------------------------------------| | `/constitution` | Create or update project governing principles and development guidelines | | `/specify` | Define what you want to build (requirements and user stories) | +| `/clarify` | Clarify underspecified areas (must be run before `/plan` unless explicitly skipped; formerly `/quizme`) | | `/plan` | Create technical implementation plans with your chosen tech stack | | `/tasks` | Generate actionable task lists for implementation | +| `/analyze` | Cross-artifact consistency & coverage analysis (run after /tasks, before /implement) | | `/implement` | Execute all tasks to build the feature according to the plan | ### Environment Variables @@ -392,9 +394,19 @@ At this stage, your project folder contents should resemble the following: └── tasks-template.md ``` -### **STEP 3:** Functional specification clarification +### **STEP 3:** Functional specification clarification (required before planning) -With the baseline specification created, you can go ahead and clarify any of the requirements that were not captured properly within the first shot attempt. For example, you could use a prompt like this within the same Claude Code session: +With the baseline specification created, you can go ahead and clarify any of the requirements that were not captured properly within the first shot attempt. + +You should run the structured clarification workflow **before** creating a technical plan to reduce rework downstream. + +Preferred order: +1. Use `/clarify` (structured) – sequential, coverage-based questioning that records answers in a Clarifications section. +2. Optionally follow up with ad-hoc free-form refinement if something still feels vague. + +If you intentionally want to skip clarification (e.g., spike or exploratory prototype), explicitly state that so the agent doesn't block on missing clarifications. + +Example free-form refinement prompt (after `/clarify` if still needed): ```text For each sample project or project that you create there should be a variable number of tasks between 5 and 15 diff --git a/pyproject.toml b/pyproject.toml index 131f6c40d..559bad2d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "specify-cli" -version = "0.0.16" +version = "0.0.17" description = "Specify CLI, part of GitHub Spec Kit. A tool to bootstrap your projects for Spec-Driven Development (SDD)." requires-python = ">=3.11" dependencies = [ diff --git a/scripts/bash/check-prerequisites.sh b/scripts/bash/check-prerequisites.sh index d8e79301b..f32b6245a 100644 --- a/scripts/bash/check-prerequisites.sh +++ b/scripts/bash/check-prerequisites.sh @@ -82,14 +82,20 @@ source "$SCRIPT_DIR/common.sh" eval $(get_feature_paths) check_feature_branch "$CURRENT_BRANCH" "$HAS_GIT" || exit 1 -# If paths-only mode, output paths and exit +# If paths-only mode, output paths and exit (support JSON + paths-only combined) if $PATHS_ONLY; then - echo "REPO_ROOT: $REPO_ROOT" - echo "BRANCH: $CURRENT_BRANCH" - echo "FEATURE_DIR: $FEATURE_DIR" - echo "FEATURE_SPEC: $FEATURE_SPEC" - echo "IMPL_PLAN: $IMPL_PLAN" - echo "TASKS: $TASKS" + if $JSON_MODE; then + # Minimal JSON paths payload (no validation performed) + printf '{"REPO_ROOT":"%s","BRANCH":"%s","FEATURE_DIR":"%s","FEATURE_SPEC":"%s","IMPL_PLAN":"%s","TASKS":"%s"}\n' \ + "$REPO_ROOT" "$CURRENT_BRANCH" "$FEATURE_DIR" "$FEATURE_SPEC" "$IMPL_PLAN" "$TASKS" + else + echo "REPO_ROOT: $REPO_ROOT" + echo "BRANCH: $CURRENT_BRANCH" + echo "FEATURE_DIR: $FEATURE_DIR" + echo "FEATURE_SPEC: $FEATURE_SPEC" + echo "IMPL_PLAN: $IMPL_PLAN" + echo "TASKS: $TASKS" + fi exit 0 fi diff --git a/scripts/powershell/check-prerequisites.ps1 b/scripts/powershell/check-prerequisites.ps1 index f039419f7..d61c3b994 100644 --- a/scripts/powershell/check-prerequisites.ps1 +++ b/scripts/powershell/check-prerequisites.ps1 @@ -63,14 +63,25 @@ if (-not (Test-FeatureBranch -Branch $paths.CURRENT_BRANCH -HasGit:$paths.HAS_GI exit 1 } -# If paths-only mode, output paths and exit +# If paths-only mode, output paths and exit (support combined -Json -PathsOnly) if ($PathsOnly) { - Write-Output "REPO_ROOT: $($paths.REPO_ROOT)" - Write-Output "BRANCH: $($paths.CURRENT_BRANCH)" - Write-Output "FEATURE_DIR: $($paths.FEATURE_DIR)" - Write-Output "FEATURE_SPEC: $($paths.FEATURE_SPEC)" - Write-Output "IMPL_PLAN: $($paths.IMPL_PLAN)" - Write-Output "TASKS: $($paths.TASKS)" + if ($Json) { + [PSCustomObject]@{ + REPO_ROOT = $paths.REPO_ROOT + BRANCH = $paths.CURRENT_BRANCH + FEATURE_DIR = $paths.FEATURE_DIR + FEATURE_SPEC = $paths.FEATURE_SPEC + IMPL_PLAN = $paths.IMPL_PLAN + TASKS = $paths.TASKS + } | ConvertTo-Json -Compress + } else { + Write-Output "REPO_ROOT: $($paths.REPO_ROOT)" + Write-Output "BRANCH: $($paths.CURRENT_BRANCH)" + Write-Output "FEATURE_DIR: $($paths.FEATURE_DIR)" + Write-Output "FEATURE_SPEC: $($paths.FEATURE_SPEC)" + Write-Output "IMPL_PLAN: $($paths.IMPL_PLAN)" + Write-Output "TASKS: $($paths.TASKS)" + } exit 0 } diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index bb231f5ea..83d2fdf87 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -1060,9 +1060,11 @@ def init( steps_lines.append(f"{step_num}. Start using slash commands with your AI agent:") steps_lines.append(" 2.1 [cyan]/constitution[/] - Establish project principles") steps_lines.append(" 2.2 [cyan]/specify[/] - Create specifications") - steps_lines.append(" 2.3 [cyan]/plan[/] - Create implementation plans") - steps_lines.append(" 2.4 [cyan]/tasks[/] - Generate actionable tasks") - steps_lines.append(" 2.5 [cyan]/implement[/] - Execute implementation") + steps_lines.append(" 2.3 [cyan]/clarify[/] - Clarify and de-risk specification (run before [cyan]/plan[/cyan])") + steps_lines.append(" 2.4 [cyan]/plan[/] - Create implementation plans") + steps_lines.append(" 2.5 [cyan]/tasks[/] - Generate actionable tasks") + steps_lines.append(" 2.6 [cyan]/analyze[/] - Validate alignment & surface inconsistencies (read-only)") + steps_lines.append(" 2.7 [cyan]/implement[/] - Execute implementation") steps_panel = Panel("\n".join(steps_lines), title="Next Steps", border_style="cyan", padding=(1,2)) console.print() diff --git a/templates/commands/analyze.md b/templates/commands/analyze.md new file mode 100644 index 000000000..499dc8712 --- /dev/null +++ b/templates/commands/analyze.md @@ -0,0 +1,103 @@ +--- +description: Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation. +scripts: + sh: scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks + ps: scripts/powershell/check-prerequisites.ps1 -Json -RequireTasks -IncludeTasks +--- + +The user input to you can be provided directly by the agent or as a command argument - you **MUST** consider it before proceeding with the prompt (if not empty). + +User input: + +$ARGUMENTS + +Goal: Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/tasks` has successfully produced a complete `tasks.md`. + +STRICTLY READ-ONLY: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +Constitution Authority: The project constitution (`/memory/constitution.md`) is **non-negotiable** within this analysis scope. Any conflict with a constitution MUST statement is automatically classified as CRITICAL. Resolution MUST adjust the spec, plan, or tasks—never dilute, reinterpret, or silently ignore the principle. If a principle itself needs change, that must occur in a separate, explicit constitution update outside `/analyze`. + +Execution steps: + +1. Run `{SCRIPT}` once from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS. Derive absolute paths: + - SPEC = FEATURE_DIR/spec.md + - PLAN = FEATURE_DIR/plan.md + - TASKS = FEATURE_DIR/tasks.md + Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). + +2. Load artifacts: + - Parse spec.md sections: Overview/Context, Functional Requirements, Non-Functional Requirements, User Stories, Edge Cases (if present). + - Parse plan.md: Architecture/stack choices, Data Model references, Phases, Technical constraints. + - Parse tasks.md: Task IDs, descriptions, phase grouping, parallel markers [P], referenced file paths. + - Load constitution `/memory/constitution.md` for principle validation. + +3. Build internal semantic models: + - Requirements inventory: Each functional + non-functional requirement with a stable key (derive slug based on imperative phrase; e.g., "User can upload file" -> `user-can-upload-file`). + - User story/action inventory. + - Task coverage mapping: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases). + - Constitution rule set: Extract principle names and any MUST/SHOULD normative statements. + +4. Detection passes: + A. Duplication detection: + - Identify near-duplicate requirements (similarity > ~0.85 cosine or high token overlap). Mark lower-quality phrasing for consolidation. + B. Ambiguity detection: + - Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria. + - Flag unresolved placeholders (TODO, TKTK, ???, , etc.). + C. Underspecification: + - Requirements with verbs but missing object or measurable outcome. + - User stories missing acceptance criteria alignment. + - Tasks referencing files or components not defined in spec/plan. + D. Constitution alignment: + - Any requirement or plan element conflicting with a MUST principle. + - Missing mandated sections or quality gates from constitution. + E. Coverage gaps: + - Requirements with zero associated tasks. + - Tasks with no mapped requirement/story. + - Non-functional requirements not reflected in tasks (e.g., performance, security). + F. Inconsistency: + - Terminology drift (same concept named differently across files). + - Data entities referenced in plan but absent in spec (or vice versa). + - Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note). + +5. Severity assignment heuristic: + - CRITICAL: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality. + - HIGH: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion. + - MEDIUM: Terminology drift, missing non-functional task coverage, underspecified edge case. + - LOW: Style/wording improvements, minor redundancy not affecting execution order. + +6. Produce a Markdown report (no file writes) with sections: + + ### Specification Analysis Report + | ID | Category | Severity | Location(s) | Summary | Recommendation | + |----|----------|----------|-------------|---------|----------------| + | A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + (Add one row per finding; generate stable IDs prefixed by category initial.) + + Additional subsections: + - Coverage Summary Table: + | Requirement Key | Has Task? | Task IDs | Notes | + - Constitution Alignment Issues (if any) + - Unmapped Tasks (if any) + - Metrics: + * Total Requirements + * Total Tasks + * Coverage % (requirements with >=1 task) + * Ambiguity Count + * Duplication Count + * Critical Issues Count + +7. At end of report, output a concise Next Actions block: + - If CRITICAL issues exist: Recommend resolving before `/implement`. + - If only LOW/MEDIUM: User may proceed, but provide improvement suggestions. + - Provide explicit command suggestions: e.g., "Run /specify with refinement", "Run /plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'". + +8. Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +Behavior rules: +- NEVER modify files. +- NEVER hallucinate missing sections—if absent, report them. +- KEEP findings deterministic: if rerun without changes, produce consistent IDs and counts. +- LIMIT total findings in the main table to 50; aggregate remainder in a summarized overflow note. +- If zero issues found, emit a success report with coverage statistics and proceed recommendation. + +Context: {ARGS} diff --git a/templates/commands/clarify.md b/templates/commands/clarify.md new file mode 100644 index 000000000..a53be43af --- /dev/null +++ b/templates/commands/clarify.md @@ -0,0 +1,161 @@ +--- +description: Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec. (Renamed from /quizme to /clarify) +scripts: + sh: scripts/bash/check-prerequisites.sh --json --paths-only + ps: scripts/powershell/check-prerequisites.ps1 -Json -PathsOnly +--- + +The user input to you can be provided directly by the agent or as a command argument - you **MUST** consider it before proceeding with the prompt (if not empty). + +User input: + +$ARGUMENTS + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. Run `{SCRIPT}` from repo root **once** (combined `--json --paths-only` mode / `-Json -PathsOnly`). Parse minimal JSON payload fields: + - `FEATURE_DIR` + - `FEATURE_SPEC` + - (Optionally capture `IMPL_PLAN`, `TASKS` for future chained flows.) + - If JSON parsing fails, abort and instruct user to re-run `/specify` or verify feature branch environment. + +2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 5 total questions across the whole session. + - Each question must be answerable with EITHER: + * A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + * A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. + +4. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions render options as a Markdown table: + + | Option | Description | + |--------|-------------| + | A |