|
| 1 | +--- |
| 2 | +name: Claude Token Optimizer |
| 3 | +description: Analyzes the most expensive Claude workflow identified by the token usage analyzer and creates an optimization issue with Anthropic-specific token-saving recommendations including cache efficiency improvements |
| 4 | +on: |
| 5 | + workflow_run: |
| 6 | + workflows: |
| 7 | + - "Claude Token Usage Analyzer" |
| 8 | + types: |
| 9 | + - completed |
| 10 | + branches: |
| 11 | + - main |
| 12 | + workflow_dispatch: |
| 13 | + skip-if-match: 'is:issue is:open in:title "⚡ Claude Token Optimization:"' |
| 14 | + |
| 15 | +permissions: |
| 16 | + contents: read |
| 17 | + actions: read |
| 18 | + issues: read |
| 19 | + pull-requests: read |
| 20 | + |
| 21 | +engine: copilot |
| 22 | +features: |
| 23 | + copilot-requests: true |
| 24 | + |
| 25 | +strict: true |
| 26 | + |
| 27 | +tools: |
| 28 | + bash: |
| 29 | + - "*" |
| 30 | + github: |
| 31 | + toolsets: [default, issues, actions, repos] |
| 32 | + |
| 33 | +safe-outputs: |
| 34 | + create-issue: |
| 35 | + title-prefix: "⚡ Claude Token Optimization: " |
| 36 | + labels: [automated-analysis, token-optimization, claude, cost-reduction] |
| 37 | + expires: 7d |
| 38 | + max: 1 |
| 39 | + close-older-issues: true |
| 40 | + noop: |
| 41 | + |
| 42 | +network: defaults |
| 43 | + |
| 44 | +timeout-minutes: 30 |
| 45 | + |
| 46 | +steps: |
| 47 | + - name: Find and download artifacts from the most expensive Claude workflow |
| 48 | + env: |
| 49 | + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 50 | + run: | |
| 51 | + set -euo pipefail |
| 52 | + mkdir -p /tmp/token-optimizer-claude |
| 53 | +
|
| 54 | + echo "📥 Loading Claude workflow runs from last 24 hours..." |
| 55 | + ./gh-aw logs \ |
| 56 | + --engine claude \ |
| 57 | + --start-date -1d \ |
| 58 | + --json \ |
| 59 | + -c 300 \ |
| 60 | + > /tmp/token-optimizer-claude/claude-runs.json 2>/dev/null || echo "[]" > /tmp/token-optimizer-claude/claude-runs.json |
| 61 | +
|
| 62 | + RUN_COUNT=$(jq '. | length' /tmp/token-optimizer-claude/claude-runs.json 2>/dev/null || echo 0) |
| 63 | + echo "Found ${RUN_COUNT} Claude runs" |
| 64 | +
|
| 65 | + if [ "$RUN_COUNT" -eq 0 ]; then |
| 66 | + echo "No Claude runs found, nothing to optimize" |
| 67 | + exit 0 |
| 68 | + fi |
| 69 | +
|
| 70 | + # Find the most expensive workflow (by total tokens across all its runs) |
| 71 | + echo "🔍 Identifying most expensive workflow..." |
| 72 | + jq -r ' |
| 73 | + group_by(.workflowName) | |
| 74 | + map({ |
| 75 | + workflow: .[0].workflowName, |
| 76 | + total_tokens: (map(.tokenUsage) | add), |
| 77 | + total_cost: (map(.estimatedCost) | add), |
| 78 | + run_count: length, |
| 79 | + avg_tokens: ((map(.tokenUsage) | add) / length), |
| 80 | + run_ids: map(.databaseId), |
| 81 | + latest_run_id: (sort_by(.createdAt) | last | .databaseId), |
| 82 | + latest_run_url: (sort_by(.createdAt) | last | .url) |
| 83 | + }) | |
| 84 | + sort_by(.total_tokens) | reverse | .[0] |
| 85 | + ' /tmp/token-optimizer-claude/claude-runs.json > /tmp/token-optimizer-claude/top-workflow.json |
| 86 | +
|
| 87 | + WORKFLOW_NAME=$(jq -r '.workflow' /tmp/token-optimizer-claude/top-workflow.json) |
| 88 | + LATEST_RUN_ID=$(jq -r '.latest_run_id' /tmp/token-optimizer-claude/top-workflow.json) |
| 89 | + echo "Most expensive workflow: $WORKFLOW_NAME (run: $LATEST_RUN_ID)" |
| 90 | + echo "WORKFLOW_NAME=$WORKFLOW_NAME" >> "$GITHUB_ENV" |
| 91 | +
|
| 92 | + # Download the firewall-audit-logs artifact from the latest run |
| 93 | + ARTIFACT_DIR="/tmp/token-optimizer-claude/artifacts" |
| 94 | + mkdir -p "$ARTIFACT_DIR" |
| 95 | +
|
| 96 | + echo "📥 Downloading firewall-audit-logs from run $LATEST_RUN_ID..." |
| 97 | + gh run download "$LATEST_RUN_ID" \ |
| 98 | + --repo "$GITHUB_REPOSITORY" \ |
| 99 | + --name "firewall-audit-logs" \ |
| 100 | + --dir "$ARTIFACT_DIR" \ |
| 101 | + 2>/dev/null || true |
| 102 | +
|
| 103 | + # Also download agent artifacts |
| 104 | + echo "📥 Downloading agent artifacts from run $LATEST_RUN_ID..." |
| 105 | + gh run download "$LATEST_RUN_ID" \ |
| 106 | + --repo "$GITHUB_REPOSITORY" \ |
| 107 | + --name "agent" \ |
| 108 | + --dir "$ARTIFACT_DIR/agent" \ |
| 109 | + 2>/dev/null || true |
| 110 | +
|
| 111 | + # Find token-usage.jsonl |
| 112 | + USAGE_FILE=$(find "$ARTIFACT_DIR" -name "token-usage.jsonl" 2>/dev/null | head -1) |
| 113 | + if [ -n "$USAGE_FILE" ]; then |
| 114 | + echo "Found token-usage.jsonl: $USAGE_FILE" |
| 115 | + cp "$USAGE_FILE" /tmp/token-optimizer-claude/token-usage.jsonl |
| 116 | + echo "Records: $(wc -l < /tmp/token-optimizer-claude/token-usage.jsonl)" |
| 117 | +
|
| 118 | + # Pre-compute Anthropic-specific metrics |
| 119 | + echo "📊 Computing Anthropic cache efficiency metrics..." |
| 120 | + awk ' |
| 121 | + BEGIN { ti=0; to=0; cr=0; cw=0; tr=0 } |
| 122 | + { |
| 123 | + if (match($0, /"input_tokens" *: *([0-9]+)/, m)) ti += m[1]+0 |
| 124 | + if (match($0, /"output_tokens" *: *([0-9]+)/, m)) to += m[1]+0 |
| 125 | + if (match($0, /"cache_read_tokens" *: *([0-9]+)/, m)) cr += m[1]+0 |
| 126 | + if (match($0, /"cache_write_tokens" *: *([0-9]+)/, m)) cw += m[1]+0 |
| 127 | + tr += 1 |
| 128 | + } |
| 129 | + END { |
| 130 | + total = ti + to + cr + cw |
| 131 | + if (tr == 0) exit |
| 132 | + printf "Requests: %d\n", tr |
| 133 | + printf "Input tokens: %d\n", ti |
| 134 | + printf "Output tokens: %d\n", to |
| 135 | + printf "Cache read tokens: %d\n", cr |
| 136 | + printf "Cache write tokens: %d\n", cw |
| 137 | + printf "Total tokens: %d\n", total |
| 138 | + if (ti + cr > 0) printf "Cache hit rate: %.1f%%\n", (cr / (ti + cr)) * 100 |
| 139 | + if (ti + cw > 0) printf "Cache write rate: %.1f%%\n", (cw / (ti + cw)) * 100 |
| 140 | + if (cw > 0) printf "Cache read/write ratio: %.2f\n", (cr / cw) |
| 141 | + }' /tmp/token-optimizer-claude/token-usage.jsonl > /tmp/token-optimizer-claude/cache-metrics.txt |
| 142 | + cat /tmp/token-optimizer-claude/cache-metrics.txt |
| 143 | + else |
| 144 | + echo "No token-usage.jsonl found in artifacts" |
| 145 | + touch /tmp/token-optimizer-claude/token-usage.jsonl |
| 146 | + touch /tmp/token-optimizer-claude/cache-metrics.txt |
| 147 | + fi |
| 148 | +
|
| 149 | + # Find the workflow markdown source |
| 150 | + WORKFLOW_MD_NAME=$(echo "$WORKFLOW_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-') |
| 151 | + WORKFLOW_MD=".github/workflows/${WORKFLOW_MD_NAME}.md" |
| 152 | + if [ -f "$WORKFLOW_MD" ]; then |
| 153 | + echo "Found workflow source: $WORKFLOW_MD" |
| 154 | + cp "$WORKFLOW_MD" /tmp/token-optimizer-claude/workflow-source.md |
| 155 | + else |
| 156 | + find .github/workflows -name "*.md" -exec grep -l "^name: $WORKFLOW_NAME" {} \; 2>/dev/null | head -1 | while read -r f; do |
| 157 | + echo "Found: $f" |
| 158 | + cp "$f" /tmp/token-optimizer-claude/workflow-source.md |
| 159 | + done |
| 160 | + fi |
| 161 | +
|
| 162 | + # Extract declared tools from workflow source |
| 163 | + if [ -f /tmp/token-optimizer-claude/workflow-source.md ]; then |
| 164 | + sed -n '/^---$/,/^---$/p' /tmp/token-optimizer-claude/workflow-source.md | \ |
| 165 | + grep -A20 "^tools:" | head -30 > /tmp/token-optimizer-claude/declared-tools.txt || true |
| 166 | + fi |
| 167 | +
|
| 168 | +imports: |
| 169 | + - shared/reporting.md |
| 170 | +--- |
| 171 | + |
| 172 | +# Claude Token Optimizer |
| 173 | + |
| 174 | +You are the Claude Token Optimizer. Your job is to analyze the most token-expensive Claude workflow from the past 24 hours and create a targeted optimization issue with specific, actionable recommendations — with special focus on Anthropic's unique caching economics. |
| 175 | + |
| 176 | +## Current Context |
| 177 | + |
| 178 | +- **Repository**: ${{ github.repository }} |
| 179 | +- **Analysis Date**: $(date -u +%Y-%m-%d) |
| 180 | +- **Target Workflow**: ${{ env.WORKFLOW_NAME }} |
| 181 | + |
| 182 | +## Data Sources |
| 183 | + |
| 184 | +All data is in `/tmp/token-optimizer-claude/`: |
| 185 | + |
| 186 | +- **`claude-runs.json`** — All Claude runs from the last 24 hours |
| 187 | +- **`top-workflow.json`** — Statistics for the most expensive workflow |
| 188 | +- **`token-usage.jsonl`** — Per-request token records from the target workflow's last run (may be empty) |
| 189 | +- **`cache-metrics.txt`** — Pre-computed Anthropic cache efficiency summary |
| 190 | +- **`workflow-source.md`** — The workflow's markdown source (may not exist if not found) |
| 191 | +- **`declared-tools.txt`** — Tools declared in the workflow frontmatter |
| 192 | +- **`artifacts/agent/`** — Agent artifacts (prompt, MCP logs) from the last run |
| 193 | + |
| 194 | +## Anthropic Pricing Context |
| 195 | + |
| 196 | +Before analyzing, understand Anthropic's pricing model: |
| 197 | + |
| 198 | +| Token Type | Cost Relative to Input | |
| 199 | +|------------|----------------------| |
| 200 | +| Input tokens | 1× (baseline) | |
| 201 | +| Output tokens | ~3-5× more expensive than input | |
| 202 | +| Cache write tokens | ~1.25× (investment for future savings) | |
| 203 | +| Cache read tokens | ~0.1× (90% discount vs input) | |
| 204 | + |
| 205 | +**Cache ROI formula**: Cache write breaks even after `1.25 / (1 - 0.1)` ≈ **1.4 reads**. |
| 206 | + |
| 207 | +**Cache TTL**: Default 5 minutes; extended TTL requires explicit configuration. If turns are spread > 5 min apart, cache may not be reused. |
| 208 | + |
| 209 | +## Analysis Process |
| 210 | + |
| 211 | +### Phase 1: Load Workflow Statistics |
| 212 | + |
| 213 | +```bash |
| 214 | +cat /tmp/token-optimizer-claude/top-workflow.json |
| 215 | +cat /tmp/token-optimizer-claude/cache-metrics.txt |
| 216 | +``` |
| 217 | + |
| 218 | +### Phase 2: Analyze Anthropic Cache Efficiency |
| 219 | + |
| 220 | +The `cache-metrics.txt` file contains: |
| 221 | +- **Cache hit rate**: `cache_read / (input + cache_read)` — higher is better |
| 222 | +- **Cache write rate**: `cache_write / (input + cache_write)` — shows how much is being cached |
| 223 | +- **Cache read/write ratio**: `cache_read / cache_write` — must be > 1.4 for cache to save money |
| 224 | + |
| 225 | +**Interpret the metrics**: |
| 226 | + |
| 227 | +| Cache Hit Rate | Assessment | Action | |
| 228 | +|----------------|-----------|--------| |
| 229 | +| > 60% | Excellent — caching is very effective | No action needed | |
| 230 | +| 30-60% | Good | Minor optimizations possible | |
| 231 | +| 10-30% | Poor | Restructure prompt to improve caching | |
| 232 | +| < 10% | Very poor | Cache may not be working; check min token threshold (1024) | |
| 233 | + |
| 234 | +| Cache Read/Write Ratio | Assessment | Action | |
| 235 | +|------------------------|-----------|--------| |
| 236 | +| > 5× | Excellent — writes amortized well | No action needed | |
| 237 | +| 1.4-5× | Good — cache is profitable | Minor tuning possible | |
| 238 | +| < 1.4× | Poor — not saving money | Increase cache reuse or disable writes | |
| 239 | +| 0 (no reads) | Cache writes are pure waste | Investigate why cache isn't being read | |
| 240 | + |
| 241 | +### Phase 3: Analyze Tool Usage |
| 242 | + |
| 243 | +```bash |
| 244 | +cat /tmp/token-optimizer-claude/declared-tools.txt |
| 245 | + |
| 246 | +# Check actual tool invocations from agent logs |
| 247 | +find /tmp/token-optimizer-claude/artifacts/agent -name "*.log" 2>/dev/null | \ |
| 248 | + xargs grep -oh 'mcp__[a-z_]*__[a-z_]*' 2>/dev/null | sort | uniq -c | sort -rn | head -30 |
| 249 | +``` |
| 250 | + |
| 251 | +Compare declared vs. used tools. Each unused toolset injects tool descriptions into every turn of the context window (~500 tokens per toolset). |
| 252 | + |
| 253 | +### Phase 4: Review Workflow Source for Prompt Optimization |
| 254 | + |
| 255 | +```bash |
| 256 | +cat /tmp/token-optimizer-claude/workflow-source.md |
| 257 | +``` |
| 258 | + |
| 259 | +Look for: |
| 260 | +1. **Static content repeated every turn** — good candidate for caching (must be at start, ≥ 1024 tokens) |
| 261 | +2. **Verbose instructions** that could be shortened |
| 262 | +3. **Missing `max-turns`** limit — unbounded conversations accumulate context |
| 263 | +4. **High output expectations** — if the prompt asks for very long responses, output tokens are expensive for Claude |
| 264 | + |
| 265 | +### Phase 5: Identify Specific Optimization Opportunities |
| 266 | + |
| 267 | +Prioritize by estimated savings: |
| 268 | + |
| 269 | +#### A. Cache Efficiency Improvements (often highest impact for Claude) |
| 270 | +- **Low hit rate + system prompt ≥ 1024 tokens**: Already has potential — check if tool descriptions are before or after the cache boundary |
| 271 | +- **Low hit rate + system prompt < 1024 tokens**: Add content to reach 1024 token minimum for cache activation |
| 272 | +- **Zero cache reads with writes**: Likely the workflow is not reusing context across turns; check if turns are too far apart |
| 273 | + |
| 274 | +#### B. Output Token Reduction (second highest impact) |
| 275 | +- If output tokens > 30% of total: add instructions like "Be concise, use bullet points, avoid repetition" |
| 276 | +- Look for places in the prompt where Claude is asked to produce long reports that could be structured more efficiently |
| 277 | + |
| 278 | +#### C. Unused Tool Exclusions |
| 279 | +- Identify tools declared but never called |
| 280 | +- Each unused tool description in Anthropic's context costs ~500 input tokens per turn |
| 281 | + |
| 282 | +#### D. Turn Count Reduction |
| 283 | +- High turn count means high cost since each turn resends the (growing) context window |
| 284 | +- Consider pre-fetching data in `steps:` before the agent runs |
| 285 | +- Use more directive prompts to reduce exploration turns |
| 286 | + |
| 287 | +### Phase 6: Create Optimization Issue |
| 288 | + |
| 289 | +Create an issue with the title: `[workflow-name] (avg [N]k tokens/run)` — the prefix `⚡ Claude Token Optimization:` is automatically added. |
| 290 | + |
| 291 | +#### Issue Body Structure |
| 292 | + |
| 293 | +```markdown |
| 294 | +### Target Workflow: [workflow-name] |
| 295 | + |
| 296 | +**Why this workflow?** Highest total token consumption across Claude workflows in the last 24 hours. |
| 297 | + |
| 298 | +| Metric | Value | |
| 299 | +|--------|-------| |
| 300 | +| Runs (24h) | [N] | |
| 301 | +| Avg tokens/run | [N]k | |
| 302 | +| Total est. cost (24h) | $[X] | |
| 303 | +| Avg turns/run | [N] | |
| 304 | + |
| 305 | +### Anthropic Cache Analysis |
| 306 | + |
| 307 | +| Metric | Value | Assessment | |
| 308 | +|--------|-------|------------| |
| 309 | +| Cache hit rate | [pct]% | ✅/⚠️/❌ | |
| 310 | +| Cache write rate | [pct]% | ✅/⚠️/❌ | |
| 311 | +| Cache read/write ratio | [ratio]× | ✅/⚠️/❌ | |
| 312 | +| Estimated cache savings | ~$[X] (vs no cache) | — | |
| 313 | + |
| 314 | +_(from token-usage.jsonl — if unavailable, based on run-level metrics)_ |
| 315 | + |
| 316 | +### Optimization Recommendations |
| 317 | + |
| 318 | +#### 1. [Highest Impact — e.g., Cache Efficiency] |
| 319 | + |
| 320 | +**Potential savings**: ~[N]k tokens/run (~$[X]/run) |
| 321 | + |
| 322 | +**Current state**: [What is happening — e.g., "Cache write rate is 25% but cache read rate is 2%, meaning cache writes are not being amortized (ratio: 0.08×, break-even is 1.4×)"] |
| 323 | + |
| 324 | +**Root cause**: [e.g., "System prompt is only 800 tokens — below Claude's 1024-token minimum for automatic caching"] |
| 325 | + |
| 326 | +**Recommended change**: |
| 327 | +```diff |
| 328 | +# In .github/workflows/[workflow-name].md frontmatter or prompt: |
| 329 | +- [current state] |
| 330 | ++ [recommended state] |
| 331 | +``` |
| 332 | + |
| 333 | +**Why this helps**: [Explanation of mechanism] |
| 334 | + |
| 335 | +#### 2. [Second Recommendation — e.g., Output Reduction] |
| 336 | + |
| 337 | +**Potential savings**: ~[N]k output tokens/run (~$[X]/run) |
| 338 | + |
| 339 | +... |
| 340 | + |
| 341 | +#### 3. [Third Recommendation — e.g., Unused Tools] |
| 342 | + |
| 343 | +... |
| 344 | + |
| 345 | +<details> |
| 346 | +<summary><b>Tool Usage Analysis</b></summary> |
| 347 | + |
| 348 | +**Declared tools** (from frontmatter): |
| 349 | +[list from declared-tools.txt] |
| 350 | + |
| 351 | +**Tools actually invoked** (from agent logs): |
| 352 | +[list from mcp call analysis] |
| 353 | + |
| 354 | +**Unused tools** (candidates for removal): |
| 355 | +- `[toolset/tool]` — never called, saves ~500 tokens/turn if removed |
| 356 | + |
| 357 | +</details> |
| 358 | + |
| 359 | +<details> |
| 360 | +<summary><b>Token Breakdown</b></summary> |
| 361 | + |
| 362 | +| Token Type | Count | % of Total | Est. Cost Weight | |
| 363 | +|------------|-------|------------|-----------------| |
| 364 | +| Input | [n] | [pct]% | 1× | |
| 365 | +| Output | [n] | [pct]% | ~4× | |
| 366 | +| Cache Read | [n] | [pct]% | 0.1× | |
| 367 | +| Cache Write | [n] | [pct]% | 1.25× | |
| 368 | + |
| 369 | +</details> |
| 370 | + |
| 371 | +### Implementation Checklist |
| 372 | + |
| 373 | +- [ ] Apply recommended changes to `.github/workflows/[workflow-name].md` |
| 374 | +- [ ] Run `make recompile` to regenerate the lock file |
| 375 | +- [ ] Trigger a manual run via `workflow_dispatch` to verify |
| 376 | +- [ ] Check cache metrics improve in next analyzer report (look for higher cache hit rate) |
| 377 | + |
| 378 | +### References |
| 379 | + |
| 380 | +- [Last run of [workflow-name]](LATEST_RUN_URL) |
| 381 | +- Analysis triggered by: [§RUN_ID](RUN_URL) |
| 382 | +``` |
| 383 | + |
| 384 | +## Important Guidelines |
| 385 | + |
| 386 | +- **Anthropic caching is nuanced**: Don't flag cache writes as waste unless the read/write ratio is clearly below break-even (< 1.4×). Cache writes at 125% cost are an investment. |
| 387 | +- **Output tokens are disproportionately expensive for Claude** (3-5× input cost) — reducing verbose output has high ROI. |
| 388 | +- **1024-token minimum**: Claude won't cache a prompt shorter than 1024 tokens. If the system prompt is shorter, caching isn't available regardless of configuration. |
| 389 | +- **Be specific**: Name exact tools, exact token counts, exact cost estimates. |
| 390 | +- **Prioritize by impact**: List recommendations from highest to lowest savings. |
| 391 | +- **`noop` when appropriate**: If the workflow is already well-optimized or no meaningful data is available, call `noop` with explanation. |
| 392 | + |
| 393 | +**Important**: You MUST call a safe-output tool (`create-issue` or `noop`) at the end of your analysis. Failing to call any safe-output tool is the most common cause of workflow failures. |
| 394 | + |
| 395 | +```json |
| 396 | +{"noop": {"message": "No action needed: [brief explanation]"}} |
| 397 | +``` |
0 commit comments