From ec4d80f2a32f0e540660905fe1fce60681c6637e Mon Sep 17 00:00:00 2001 From: Larry Ewing Date: Tue, 21 Apr 2026 11:26:28 -0500 Subject: [PATCH] Switch to the arcade-skills plugin for ci-analysis --- .claude/settings.json | 13 + .github/copilot/settings.json | 13 + .github/skills/ci-analysis/SKILL.md | 261 -- .../references/azdo-helix-reference.md | 93 - .../ci-analysis/references/azure-cli.md | 96 - .../references/binlog-comparison.md | 119 - .../references/build-progression-analysis.md | 219 -- .../references/delegation-patterns.md | 124 - .../ci-analysis/references/helix-artifacts.md | 285 --- .../references/manual-investigation.md | 98 - .../ci-analysis/references/sql-tracking.md | 107 - .../ci-analysis/scripts/Get-CIStatus.ps1 | 2274 ----------------- .github/skills/vmr-codeflow-status/SKILL.md | 230 -- .../references/vmr-build-topology.md | 252 -- .../references/vmr-codeflow-reference.md | 144 -- .../scripts/Get-CodeflowStatus.ps1 | 1476 ----------- 16 files changed, 26 insertions(+), 5778 deletions(-) create mode 100644 .claude/settings.json create mode 100644 .github/copilot/settings.json delete mode 100644 .github/skills/ci-analysis/SKILL.md delete mode 100644 .github/skills/ci-analysis/references/azdo-helix-reference.md delete mode 100644 .github/skills/ci-analysis/references/azure-cli.md delete mode 100644 .github/skills/ci-analysis/references/binlog-comparison.md delete mode 100644 .github/skills/ci-analysis/references/build-progression-analysis.md delete mode 100644 .github/skills/ci-analysis/references/delegation-patterns.md delete mode 100644 .github/skills/ci-analysis/references/helix-artifacts.md delete mode 100644 .github/skills/ci-analysis/references/manual-investigation.md delete mode 100644 .github/skills/ci-analysis/references/sql-tracking.md delete mode 100644 .github/skills/ci-analysis/scripts/Get-CIStatus.ps1 delete mode 100644 .github/skills/vmr-codeflow-status/SKILL.md delete mode 100644 .github/skills/vmr-codeflow-status/references/vmr-build-topology.md delete mode 100644 .github/skills/vmr-codeflow-status/references/vmr-codeflow-reference.md delete mode 100644 .github/skills/vmr-codeflow-status/scripts/Get-CodeflowStatus.ps1 diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000000000..94059e1c4313bd --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,13 @@ +{ + "extraKnownMarketplaces": { + "dotnet-arcade-skills": { + "source": { + "source": "github", + "repo": "dotnet/arcade-skills" + } + } + }, + "enabledPlugins": { + "dotnet-dnceng@dotnet-arcade-skills": true + } +} \ No newline at end of file diff --git a/.github/copilot/settings.json b/.github/copilot/settings.json new file mode 100644 index 00000000000000..94059e1c4313bd --- /dev/null +++ b/.github/copilot/settings.json @@ -0,0 +1,13 @@ +{ + "extraKnownMarketplaces": { + "dotnet-arcade-skills": { + "source": { + "source": "github", + "repo": "dotnet/arcade-skills" + } + } + }, + "enabledPlugins": { + "dotnet-dnceng@dotnet-arcade-skills": true + } +} \ No newline at end of file diff --git a/.github/skills/ci-analysis/SKILL.md b/.github/skills/ci-analysis/SKILL.md deleted file mode 100644 index 9c8fb5719b5aa6..00000000000000 --- a/.github/skills/ci-analysis/SKILL.md +++ /dev/null @@ -1,261 +0,0 @@ ---- -name: ci-analysis -description: Analyze CI build and test status from Azure DevOps and Helix for dotnet repository PRs. Use when checking CI status, investigating failures, determining if a PR is ready to merge, or given URLs containing dev.azure.com or helix.dot.net. Also use when asked "why is CI red", "test failures", "retry CI", "rerun tests", "is CI green", "build failed", "checks failing", or "flaky tests". ---- - -# Azure DevOps and Helix CI Analysis - -Analyze CI build status and test failures in Azure DevOps and Helix for dotnet repositories (runtime, sdk, aspnetcore, roslyn, and more). - -> 🚨 **NEVER** use `gh pr review --approve` or `--request-changes`. Only `--comment` is allowed. Approval and blocking are human-only actions. - -> 📝 **AI-generated content disclosure:** When posting any content to GitHub (PR comments, retry commands, analysis summaries) under a user's credentials — i.e., the account is **not** a dedicated "copilot" or "bot" account/app — you **MUST** include a concise, visible note (e.g. a `> [!NOTE]` alert) indicating the content was AI/Copilot-generated. Skip this if the user explicitly asks you to omit it. - -**Workflow**: Gather PR context (Step 0) → run the script → read the human-readable output + `[CI_ANALYSIS_SUMMARY]` JSON → synthesize recommendations yourself. The script collects data; you generate the advice. For supplementary investigation beyond the script, MCP tools (AzDO, Helix, GitHub) provide structured access when available; the script and `gh` CLI work independently when they're not. - -## When to Use This Skill - -Use this skill when: -- Checking CI status on a PR ("is CI passing?", "what's the build status?", "why is CI red?") -- Investigating CI failures or checking why a PR's tests are failing -- Determining if a PR is ready to merge based on CI results -- Debugging Helix test issues or analyzing build errors -- Given URLs containing `dev.azure.com`, `helix.dot.net`, or GitHub PR links with failing checks -- Asked questions like "why is this PR failing", "analyze the CI", "is CI green", "retry CI", "rerun tests", or "test failures" -- Investigating canceled or timed-out jobs for recoverable results - -## Script Limitations - -The `Get-CIStatus.ps1` script targets **Azure DevOps + Helix** infrastructure specifically. It won't help with: -- **GitHub Actions** workflows (different API, different log format) -- Repos not using **Helix** for test distribution (no Helix work items to query) -- Pure **build performance** questions (use MSBuild binlog analysis instead) - -However, the analysis patterns in this skill (interpreting failures, correlating with PR changes, distinguishing infrastructure vs. code issues) apply broadly even outside AzDO/Helix. - -## Quick Start - -```powershell -# Analyze PR failures (most common) - defaults to dotnet/runtime -./scripts/Get-CIStatus.ps1 -PRNumber 123445 -ShowLogs - -# Analyze by build ID -./scripts/Get-CIStatus.ps1 -BuildId 1276327 -ShowLogs - -# Query specific Helix work item -./scripts/Get-CIStatus.ps1 -HelixJob "4b24b2c2-..." -WorkItem "System.Net.Http.Tests" - -# Other dotnet repositories -./scripts/Get-CIStatus.ps1 -PRNumber 12345 -Repository "dotnet/aspnetcore" -./scripts/Get-CIStatus.ps1 -PRNumber 67890 -Repository "dotnet/sdk" -./scripts/Get-CIStatus.ps1 -PRNumber 11111 -Repository "dotnet/roslyn" -``` - -## Key Parameters - -| Parameter | Description | -|-----------|-------------| -| `-PRNumber` | GitHub PR number to analyze | -| `-BuildId` | Azure DevOps build ID | -| `-ShowLogs` | Fetch and display Helix console logs | -| `-Repository` | Target repo (default: dotnet/runtime) | -| `-MaxJobs` | Max failed jobs to show (default: 5) | -| `-SearchMihuBot` | Search MihuBot for related issues | - -## Three Modes - -The script operates in three distinct modes depending on what information you have: - -| You have... | Use | What you get | -|-------------|-----|-------------| -| A GitHub PR number | `-PRNumber 12345` | Full analysis: all builds, failures, known issues, structured JSON summary | -| An AzDO build ID | `-BuildId 1276327` | Single build analysis: timeline, failures, Helix results | -| A Helix job ID (optionally a specific work item) | `-HelixJob "..." [-WorkItem "..."]` | Deep dive: list work items for the job, or with `-WorkItem`, focus on a single work item's console logs, artifacts, and test results | - -> ❌ **Don't guess the mode.** If the user gives a PR URL, use `-PRNumber`. If they paste an AzDO build link, extract the build ID. If they reference a specific Helix job, use `-HelixJob`. - -## What the Script Does - -### PR Analysis Mode (`-PRNumber`) -1. Discovers AzDO builds associated with the PR (from GitHub check status; for full build history, query AzDO builds on `refs/pull/{PR}/merge` branch) -2. Fetches Build Analysis for known issues -3. Gets failed jobs from Azure DevOps timeline -4. **Separates canceled jobs from failed jobs** (canceled may be dependency-canceled or timeout-canceled) -5. Extracts Helix work item failures from each failed job -6. Fetches console logs (with `-ShowLogs`) -7. Searches for known issues with "Known Build Error" label -8. Correlates failures with PR file changes -9. **Emits structured summary** — `[CI_ANALYSIS_SUMMARY]` JSON block with all key facts for the agent to reason over - -> **After the script runs**, you (the agent) generate recommendations. The script collects data; you synthesize the advice. See [Generating Recommendations](#generating-recommendations) below. - -### Build ID Mode (`-BuildId`) -1. Fetches the build timeline directly (skips PR discovery) -2. Performs steps 3–7 from PR Analysis Mode, but does **not** fetch Build Analysis known issues or correlate failures with PR file changes (those require a PR number). Still emits `[CI_ANALYSIS_SUMMARY]` JSON. - -### Helix Job Mode (`-HelixJob` [and optional `-WorkItem`]) -1. With `-HelixJob` alone: enumerates work items for the job and summarizes their status -2. With `-HelixJob` and `-WorkItem`: queries the specific work item for status and artifacts -3. Fetches console logs and file listings, displays detailed failure information - -## Interpreting Results - -**Known Issues section**: Failures matching existing GitHub issues - these are tracked and being investigated. - -**Build Analysis check status**: The "Build Analysis" GitHub check is **green** only when *every* failure is matched to a known issue. If it's **red**, at least one failure is unaccounted for — do NOT claim "all failures are known issues" just because some known issues were found. You must verify each failing job is covered by a specific known issue before calling it safe to retry. - -**Canceled/timed-out jobs**: Jobs canceled due to earlier stage failures or AzDO timeouts. Dependency-canceled jobs don't need investigation. **Timeout-canceled jobs may have all-passing Helix results** — the "failure" is just the AzDO job wrapper timing out, not actual test failures. To verify: use `hlx_status` on each Helix job in the timed-out build (include passed work items). If all work items passed, the build effectively passed. - -> ❌ **Don't dismiss timed-out builds.** A build marked "failed" due to a 3-hour AzDO timeout can have 100% passing Helix work items. Check before concluding it failed. - -**PR Change Correlation**: Files changed by PR appearing in failures - likely PR-related. - -**Build errors**: Compilation failures need code fixes. - -**Helix failures**: Test failures on distributed infrastructure. - -**Local test failures**: Some repos (e.g., dotnet/sdk) run tests directly on build agents. These can also match known issues - search for the test name with the "Known Build Error" label. - -**Per-failure details** (`failedJobDetails` in JSON): Each failed job includes `errorCategory`, `errorSnippet`, and `helixWorkItems`. Use these for per-job classification instead of applying a single `recommendationHint` to all failures. - -Error categories: `test-failure`, `build-error`, `test-timeout`, `crash` (exit codes 139/134/-4), `tests-passed-reporter-failed` (all tests passed but reporter crashed — genuinely infrastructure), `unclassified` (investigate manually). - -> ⚠️ **`crash` does NOT always mean tests failed.** Exit code -4 often means the Helix work item wrapper timed out *after* tests completed. Always check `testResults.xml` before concluding a crash is a real failure. See [Recovering Results from Crashed/Canceled Jobs](#recovering-results-from-crashedcanceled-jobs). - -> ⚠️ **Be cautious labeling failures as "infrastructure."** Only conclude infrastructure with strong evidence: Build Analysis match, identical failure on target branch, or confirmed outage. Exception: `tests-passed-reporter-failed` is genuinely infrastructure. - -> ❌ **Missing packages on flow PRs ≠ infrastructure.** Flow PRs can cause builds to request *different* packages. Check *which* package and *why* before assuming feed delay. - -### Recovering Results from Crashed/Canceled Jobs - -When an AzDO job is canceled (timeout) or Helix work items show `Crash` (exit code -4), the tests may have actually passed. Follow this procedure: - -1. **Find the Helix job IDs** — Read the AzDO "Send to Helix" step log and search for lines containing `Sent Helix Job`. Extract the job GUIDs. - -2. **Check Helix job status** — Get pass/fail summary for each job. Look at `failedCount` vs `passedCount`. - -3. **For work items marked Crash/Failed** — Check if tests actually passed despite the crash. Try structured test results first (TRX parsing), then search for pass/fail counts in result files without downloading, then download as last resort: - - Parse the XML: `total`, `passed`, `failed` attributes on the `` element - - If `failed=0` and `passed > 0`, the tests passed — the "crash" is the wrapper timing out after test completion - -4. **Verdict**: - - All work items passed or crash-with-passing-results → **Tests effectively passed.** The failure is infrastructure (wrapper timeout). - - Some work items have `failed > 0` in testResults.xml → **Real test failures.** Investigate those specific tests. - - No testResults.xml uploaded → Tests may not have run at all. Check console logs for errors. - -> This pattern is common with long-running test suites (e.g., WasmBuildTests) where tests complete but the Helix work item wrapper exceeds its timeout during result upload or cleanup. - -## Generating Recommendations - -After the script outputs the `[CI_ANALYSIS_SUMMARY]` JSON block, **you** synthesize recommendations. Do not parrot the JSON — reason over it. - -### Decision logic - -Read `recommendationHint` as a starting point, then layer in context: - -| Hint | Action | -|------|--------| -| `BUILD_SUCCESSFUL` | No failures. Confirm CI is green. | -| `KNOWN_ISSUES_DETECTED` | Known tracked issues found — but this does NOT mean all failures are covered. Check the Build Analysis check status: if it's red, some failures are unmatched. Only recommend retry for failures that specifically match a known issue; investigate the rest. | -| `LIKELY_PR_RELATED` | Failures correlate with PR changes. Lead with "fix these before retrying" and list `correlatedFiles`. | -| `POSSIBLY_TRANSIENT` | Failures could not be automatically classified — does NOT mean they are transient. Use `failedJobDetails` to investigate each failure individually. | -| `REVIEW_REQUIRED` | Could not auto-determine cause. Review failures manually. | -| `MERGE_CONFLICTS` | PR has merge conflicts — CI won't run. Tell the user to resolve conflicts. Offer to analyze a previous build by ID. | -| `NO_BUILDS` | No AzDO builds found (CI not triggered). Offer to check if CI needs to be triggered or analyze a previous build. | - -Then layer in nuance the heuristic can't capture: - -- **Mixed signals**: Some failures match known issues AND some correlate with PR changes → separate them. Known issues = safe to retry; correlated = fix first. -- **Canceled jobs with recoverable results**: If `canceledJobNames` is non-empty, mention that canceled jobs may have passing Helix results (see "Recovering Results from Crashed/Canceled Jobs"). -- **Build still in progress**: If `lastBuildJobSummary.pending > 0`, note that more failures may appear. -- **Multiple builds**: If `builds` has >1 entry, `lastBuildJobSummary` reflects only the last build — use `totalFailedJobs` for the aggregate count. -- **BuildId mode**: `knownIssues` and `prCorrelation` won't be populated. Say "Build Analysis and PR correlation not available in BuildId mode." - -### How to Retry - -- **AzDO builds**: Comment `/azp run {pipeline-name}` on the PR (e.g., `/azp run dotnet-sdk-public`) -- **All pipelines**: Comment `/azp run` to retry all failing pipelines -- **Helix work items**: Cannot be individually retried — must re-run the entire AzDO build - -### Tone and output format - -Be direct. Lead with the most important finding. Structure your response as: -1. **Summary verdict** (1-2 sentences) — Is CI green? Failures PR-related? Known issues? -2. **Failure details** (2-4 bullets) — what failed, why, evidence -3. **Recommended actions** (numbered) — retry, fix, investigate. Include `/azp run` commands. - -Synthesize from: JSON summary (structured facts) + human-readable output (details/logs) + Step 0 context (PR type, author intent). - -## Analysis Workflow - -### Step 0: Gather Context (before running anything) - -Before running the script, read the PR to understand what you're analyzing. Context changes how you interpret every failure. - -1. **Read PR metadata** — title, description, author, labels, linked issues -2. **Classify the PR type** — this determines your interpretation framework: - -| PR Type | How to detect | Interpretation shift | -|---------|--------------|---------------------| -| **Code PR** | Human author, code changes | Failures likely relate to the changes | -| **Flow/Codeflow PR** | Author is `dotnet-maestro[bot]`, title mentions "Update dependencies" | Missing packages may be behavioral, not infrastructure (see anti-pattern below) | -| **Backport** | Title mentions "backport", targets a release branch | Failures may be branch-specific; check if test exists on target branch | -| **Merge PR** | Merging between branches (e.g., release → main) | Conflicts and merge artifacts cause failures, not the individual changes | -| **Dependency update** | Bumps package versions, global.json changes | Build failures often trace to the dependency, not the PR's own code | - -3. **Check existing comments** — has someone already diagnosed the failures? Is there a retry pending? -4. **Note the changed files** — you'll use these to evaluate correlation after the script runs - -> ❌ **Don't skip Step 0.** Running the script without PR context leads to misdiagnosis — especially for flow PRs where "package not found" looks like infrastructure but is actually a code issue. - -### Step 1: Run the script - -Run with `-ShowLogs` for detailed failure info. - -### Step 2: Analyze results - -1. **Check Build Analysis** — If the Build Analysis GitHub check is **green**, all failures matched known issues and it's safe to retry. If it's **red**, some failures are unaccounted for — you must identify which failing jobs are covered by known issues and which are not. For 3+ failures, use SQL tracking to avoid missed matches (see [references/sql-tracking.md](references/sql-tracking.md)). -2. **Correlate with PR changes** — Same files failing = likely PR-related -3. **Compare with baseline** — If a test passes on the target branch but fails on the PR, compare Helix binlogs. See [references/binlog-comparison.md](references/binlog-comparison.md) — **delegate binlog download/extraction to subagents** to avoid burning context on mechanical work. -4. **Check build progression** — If the PR has multiple builds (multiple pushes), check whether earlier builds passed. A failure that appeared after a specific push narrows the investigation to those commits. See [references/build-progression-analysis.md](references/build-progression-analysis.md). Present findings as facts, not fix recommendations. -5. **Interpret patterns** (but don't jump to conclusions): - - Same error across many jobs → Real code issue - - Build Analysis flags a known issue → That *specific failure* is safe to retry (but others may not be) - - Failure is **not** in Build Analysis → Investigate further before assuming transient - - Device failures, Docker pulls, network timeouts → *Could* be infrastructure, but verify against the target branch first - - Test timeout but tests passed → Executor issue, not test failure -6. **Check for mismatch with user's question** — The script only reports builds for the current head SHA. If the user asks about a job, error, or cancellation that doesn't appear in the results, **ask** if they're referring to a prior build. Common triggers: - - User mentions a canceled job but `canceledJobNames` is empty - - User says "CI is failing" but the latest build is green - - User references a specific job name not in the current results - Offer to re-run with `-BuildId` if the user can provide the earlier build ID from AzDO. - -### Step 3: Verify before claiming - -Before stating a failure's cause, verify your claim: - -- **"Infrastructure failure"** → Did Build Analysis flag it? Does the same test pass on the target branch? If neither, don't call it infrastructure. -- **"Transient/flaky"** → Has it failed before? Is there a known issue? A single non-reproducing failure isn't enough to call it flaky. -- **"PR-related"** → Do the changed files actually relate to the failing test? Correlation in the script output is heuristic, not proof. -- **"Safe to retry"** → Are ALL failures accounted for (known issues or infrastructure), or are you ignoring some? Check the Build Analysis check status — if it's red, not all failures are matched. Map each failing job to a specific known issue before concluding "safe to retry." -- **"Not related to this PR"** → Have you checked if the test passes on the target branch? Don't assume — verify. - -## References - -- **Helix artifacts & binlogs**: See [references/helix-artifacts.md](references/helix-artifacts.md) -- **Binlog comparison (passing vs failing)**: See [references/binlog-comparison.md](references/binlog-comparison.md) -- **Build progression (commit-to-build correlation)**: See [references/build-progression-analysis.md](references/build-progression-analysis.md) -- **Subagent delegation patterns**: See [references/delegation-patterns.md](references/delegation-patterns.md) -- **Azure CLI deep investigation**: See [references/azure-cli.md](references/azure-cli.md) -- **Manual investigation steps**: See [references/manual-investigation.md](references/manual-investigation.md) -- **SQL tracking for investigations**: See [references/sql-tracking.md](references/sql-tracking.md) -- **AzDO/Helix details**: See [references/azdo-helix-reference.md](references/azdo-helix-reference.md) - -## Tips - -1. Check if same test fails on the target branch before assuming transient -2. Look for `[ActiveIssue]` attributes for known skipped tests -3. Use `-SearchMihuBot` for semantic search of related issues -4. Use binlog analysis tools to search binlogs for Helix job IDs, build errors, and properties -5. `gh pr checks --json` valid fields: `bucket`, `completedAt`, `description`, `event`, `link`, `name`, `startedAt`, `state`, `workflow` — no `conclusion` field, `state` has `SUCCESS`/`FAILURE` directly -6. "Canceled" ≠ "Failed" — canceled jobs may have recoverable Helix results. Check artifacts before concluding results are lost. diff --git a/.github/skills/ci-analysis/references/azdo-helix-reference.md b/.github/skills/ci-analysis/references/azdo-helix-reference.md deleted file mode 100644 index ace39f0932eeb6..00000000000000 --- a/.github/skills/ci-analysis/references/azdo-helix-reference.md +++ /dev/null @@ -1,93 +0,0 @@ -# Azure DevOps and Helix Reference - -## Supported Repositories - -The script works with any dotnet repository that uses Azure DevOps and Helix: - -| Repository | Common Pipelines | -|------------|-----------------| -| `dotnet/runtime` | runtime, runtime-dev-innerloop, dotnet-linker-tests | -| `dotnet/sdk` | dotnet-sdk (mix of local and Helix tests) | -| `dotnet/aspnetcore` | aspnetcore-ci | -| `dotnet/roslyn` | roslyn-CI | -| `dotnet/maui` | maui-public | - -Use `-Repository` to specify the target: -```powershell -./scripts/Get-CIStatus.ps1 -PRNumber 12345 -Repository "dotnet/aspnetcore" -``` - -## Build Definition IDs (Example: dotnet/runtime) - -Each repository has its own build definition IDs. Here are common ones for dotnet/runtime: - -| Definition ID | Name | Description | -|---------------|------|-------------| -| `129` | runtime | Main PR validation build | -| `133` | runtime-dev-innerloop | Fast innerloop validation | -| `139` | dotnet-linker-tests | ILLinker/trimming tests | - -**Note:** The script auto-discovers builds for a PR, so you rarely need to know definition IDs. - -## Azure DevOps Organizations - -**Public builds (default):** -- Organization: `dnceng-public` -- Project: `cbb18261-c48f-4abb-8651-8cdcb5474649` - -**Internal/private builds:** -- Organization: `dnceng` -- Project GUID: Varies by pipeline - -Override with: -```powershell -./scripts/Get-CIStatus.ps1 -BuildId 1276327 -Organization "dnceng" -Project "internal-project-guid" -``` - -## Common Pipeline Names (Example: dotnet/runtime) - -| Pipeline | Description | -|----------|-------------| -| `runtime` | Main PR validation build | -| `runtime-dev-innerloop` | Fast innerloop validation | -| `dotnet-linker-tests` | ILLinker/trimming tests | -| `runtime-wasm-perf` | WASM performance tests | -| `runtime-libraries enterprise-linux` | Enterprise Linux compatibility | - -Other repos have different pipelines - the script discovers them automatically from the PR. - -## Useful Links - -- [Helix Portal](https://helix.dot.net/): View Helix jobs and work items (all repos) -- [Helix API Documentation](https://helix.dot.net/swagger/): Swagger docs for Helix REST API -- [Build Analysis](https://github.com/dotnet/arcade/blob/main/Documentation/Projects/Build%20Analysis/LandingPage.md): Known issues tracking (arcade infrastructure) -- [dnceng-public AzDO](https://dev.azure.com/dnceng-public/public/_build): Public builds for all dotnet repos - -### Repository-specific docs: -- [runtime: Triaging Failures](https://github.com/dotnet/runtime/blob/main/docs/workflow/ci/triaging-failures.md) -- [runtime: Area Owners](https://github.com/dotnet/runtime/blob/main/docs/area-owners.md) - -## Test Execution Types - -### Helix Tests -Tests run on Helix distributed test infrastructure. The script extracts console log URLs and can fetch detailed failure info with `-ShowLogs`. - -### Local Tests (Non-Helix) -Some repositories (e.g., dotnet/sdk) run tests directly on the build agent. The script detects these and extracts Azure DevOps Test Run URLs. - -## Known Issue Labels - -- `Known Build Error` - Used by Build Analysis across all dotnet repositories -- Search syntax: `repo:/ is:issue is:open label:"Known Build Error" ` - -Example searches (use `search_issues` when GitHub MCP is available, `gh` CLI otherwise): -```bash -# Search in runtime -gh issue list --repo dotnet/runtime --label "Known Build Error" --search "FileSystemWatcher" - -# Search in aspnetcore -gh issue list --repo dotnet/aspnetcore --label "Known Build Error" --search "Blazor" - -# Search in sdk -gh issue list --repo dotnet/sdk --label "Known Build Error" --search "template" -``` diff --git a/.github/skills/ci-analysis/references/azure-cli.md b/.github/skills/ci-analysis/references/azure-cli.md deleted file mode 100644 index b0371fb0fdac78..00000000000000 --- a/.github/skills/ci-analysis/references/azure-cli.md +++ /dev/null @@ -1,96 +0,0 @@ -# Deep Investigation with Azure CLI - -The AzDO MCP tools handle most pipeline queries directly. This reference covers the Azure CLI fallback for cases where MCP tools are unavailable or the endpoint isn't exposed (e.g., downloading artifacts, inspecting pipeline definitions). - -When the CI script and GitHub APIs aren't enough (e.g., investigating internal pipeline definitions or downloading build artifacts), use the Azure CLI with the `azure-devops` extension. - -> 💡 **Prefer `az pipelines` / `az devops` commands over raw REST API calls.** The CLI handles authentication, pagination, and JSON output formatting. Only fall back to manual `Invoke-RestMethod` calls when the CLI doesn't expose the endpoint you need (e.g., build timelines). The CLI's `--query` (JMESPath) and `-o table` flags are powerful for filtering without extra scripting. - -## Checking Authentication - -Before making AzDO API calls, verify the CLI is installed and authenticated: - -```powershell -# Ensure az is on PATH (Windows may need a refresh after install) -$env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User") - -# Check if az CLI is available -az --version 2>$null | Select-Object -First 1 - -# Check if logged in and get current account -az account show --query "{name:name, user:user.name}" -o table 2>$null - -# If not logged in, prompt the user to authenticate: -# az login # Interactive browser login -# az login --use-device-code # Device code flow (for remote/headless) - -# Get an AAD access token for AzDO REST API calls (only needed for raw REST) -$accessToken = (az account get-access-token --resource 499b84ac-1321-427f-aa17-267ca6975798 --query accessToken -o tsv) -$headers = @{ "Authorization" = "Bearer $accessToken" } -``` - -> ⚠️ If `az` is not installed, use `winget install -e --id Microsoft.AzureCLI` (Windows). The `azure-devops` extension is also required — install or verify it with `az extension add --name azure-devops` (safe to run if already installed). Ask the user to authenticate if needed. - -> ⚠️ **Do NOT use `az devops configure --defaults`** — it sets user-wide defaults that may not match the organization/project needed for dotnet repositories. Always pass `--org` and `--project` (or `-p`) explicitly on each command. - -## Querying Pipeline Definitions and Builds - -```powershell -$org = "https://dev.azure.com/dnceng" -$project = "internal" - -# Find a pipeline definition by name -az pipelines list --name "dotnet-unified-build" --org $org -p $project --query "[].{id:id, name:name, path:path}" -o table - -# Get pipeline definition details (shows YAML path, triggers, etc.) -az pipelines show --id 1330 --org $org -p $project --query "{id:id, name:name, yamlPath:process.yamlFilename, repo:repository.name}" -o table - -# List recent builds for a pipeline (replace {TARGET_BRANCH} with the PR's base branch, e.g., main or release/9.0) -az pipelines runs list --pipeline-ids 1330 --branch "refs/heads/{TARGET_BRANCH}" --top 5 --org $org -p $project --query "[].{id:id, result:result, finish:finishTime}" -o table - -# Get a specific build's details -az pipelines runs show --id $buildId --org $org -p $project --query "{id:id, result:result, sourceBranch:sourceBranch}" -o table - -# List build artifacts -az pipelines runs artifact list --run-id $buildId --org $org -p $project --query "[].{name:name, type:resource.type}" -o table - -# Download a build artifact -az pipelines runs artifact download --run-id $buildId --artifact-name "TestBuild_linux_x64" --path "$env:TEMP\artifact" --org $org -p $project -``` - -## REST API Fallback - -Fall back to REST API only when the CLI doesn't expose what you need: - -```powershell -# Get build timeline (stages, jobs, tasks with results and durations) — no CLI equivalent -$accessToken = (az account get-access-token --resource 499b84ac-1321-427f-aa17-267ca6975798 --query accessToken -o tsv) -$headers = @{ "Authorization" = "Bearer $accessToken" } -$timelineUrl = "https://dev.azure.com/dnceng/internal/_apis/build/builds/$buildId/timeline?api-version=7.1" -$timeline = (Invoke-RestMethod -Uri $timelineUrl -Headers $headers) -$timeline.records | Where-Object { $_.result -eq "failed" -and $_.type -eq "Job" } -``` - -## Examining Pipeline YAML - -All dotnet repos that use arcade put their pipeline definitions under `eng/pipelines/`. Use `az pipelines show` to find the YAML file path, then fetch it: - -```powershell -# Find the YAML path for a pipeline -az pipelines show --id 1330 --org $org -p $project --query "{yamlPath:process.yamlFilename, repo:repository.name}" -o table - -# Fetch the YAML from the repo (example: dotnet/runtime's runtime-official pipeline) -# Read the pipeline YAML from the repo to understand build stages and conditions -# e.g., eng/pipelines/runtime-official.yml in dotnet/runtime - -# For VMR unified builds, the YAML is in dotnet/dotnet: -# eng/pipelines/unified-build.yml - -# Templates are usually in eng/pipelines/common/ or eng/pipelines/templates/ -``` - -This is especially useful when: -- A job name doesn't clearly indicate what it builds -- You need to understand stage dependencies (why a job was canceled) -- You want to find which template defines a specific step -- Investigating whether a pipeline change caused new failures diff --git a/.github/skills/ci-analysis/references/binlog-comparison.md b/.github/skills/ci-analysis/references/binlog-comparison.md deleted file mode 100644 index 8cbd2a6a20cab0..00000000000000 --- a/.github/skills/ci-analysis/references/binlog-comparison.md +++ /dev/null @@ -1,119 +0,0 @@ -# Deep Investigation: Binlog Comparison - -When a test **passes on the target branch but fails on a PR**, comparing MSBuild binlogs from both runs reveals the exact difference in task parameters without guessing. - -## When to Use This Pattern - -- Test assertion compares "expected vs actual" build outputs (e.g., CSC args, reference lists) -- A build succeeds on one branch but fails on another with different MSBuild behavior -- You need to find which MSBuild property/item change caused a specific task to behave differently - -## The Pattern: Delegate to Subagents - -> ⚠️ **Do NOT download, load, and parse binlogs in the main conversation context.** This burns 10+ turns on mechanical work. Delegate to subagents instead. - -### Step 1: Identify the two work items to compare - -Use `Get-CIStatus.ps1` to find the failing Helix job + work item, then find a corresponding passing build (recent PR merged to the target branch, or a CI run on that branch). - -**Finding Helix job IDs from build artifacts (binlogs to find binlogs):** -When the failing work item's Helix job ID isn't visible (e.g., canceled jobs, or finding a matching job from a passing build), the IDs are inside the build's `SendToHelix.binlog`: - -1. Download the build artifact with `az`: - ``` - az pipelines runs artifact list --run-id $buildId --org "https://dev.azure.com/dnceng-public" -p public --query "[].name" -o tsv - az pipelines runs artifact download --run-id $buildId --artifact-name "TestBuild_linux_x64" --path "$env:TEMP\artifact" --org "https://dev.azure.com/dnceng-public" -p public - ``` -2. Load the `SendToHelix.binlog` and search for `Sent Helix Job` to find the GUIDs. -3. Query each Helix job GUID with the CI script: - ``` - ./scripts/Get-CIStatus.ps1 -HelixJob "{GUID}" -FindBinlogs - ``` - -**For Helix work item binlogs (the common case):** -The CI script shows binlog URLs directly when you query a specific work item: -``` -./scripts/Get-CIStatus.ps1 -HelixJob "{JOB_ID}" -WorkItem "{WORK_ITEM}" -# Output includes: 🔬 msbuild.binlog: https://helix...blob.core.windows.net/... -``` - -### Step 2: Dispatch parallel subagents for extraction - -Launch two `task` subagents (can run in parallel), each with a prompt like: - -``` -Download the msbuild.binlog from Helix job {JOB_ID} work item {WORK_ITEM}. -Use the CI skill script to get the artifact URL: - ./scripts/Get-CIStatus.ps1 -HelixJob "{JOB_ID}" -WorkItem "{WORK_ITEM}" -Download the binlog, load it, find the {TASK_NAME} task, and extract CommandLineArguments. -Normalize paths (see table below) and sort args. -Parse into individual args using regex: (?:"[^"]+"|/[^\s]+|[^\s]+) -Report the total arg count prominently. -``` - -**Important:** When diffing, look for **extra or missing args** (different count), not value differences in existing args. A Debug/Release difference in `/define:` is expected noise — an extra `/analyzerconfig:` or `/reference:` arg is the real signal. - -### Step 3: Diff the results - -With two normalized arg lists, `Compare-Object` instantly reveals the difference. - -## Common Binlog Search Patterns - -When investigating binlogs, these search query patterns are most useful: - -- Search for a property: `analysislevel` -- Search within a target: `under($target AddGlobalAnalyzerConfigForPackage_MicrosoftCodeAnalysisNetAnalyzers)` -- Find all properties matching a pattern: `GlobalAnalyzerConfig` - -## Path Normalization - -Helix work items run on different machines with different paths. Normalize before comparing: - -| Pattern | Replacement | Example | -|---------|-------------|---------| -| `/datadisks/disk1/work/[A-F0-9]{8}` | `{W}` | Helix work directory (Linux) | -| `C:\h\w\[A-F0-9]{8}` | `{W}` | Helix work directory (Windows) | -| `Program-[a-f0-9]{64}` | `Program-{H}` | Runfile content hash | -| `dotnetSdkTests\.[a-zA-Z0-9]+` | `dotnetSdkTests.{T}` | Temp test directory | - -### After normalizing paths, focus on structural differences - -> ⚠️ **Ignore value-only differences in existing args** (e.g., Debug vs Release in `/define:`, different hash paths). These are expected configuration differences. Focus on **extra or missing args** — a different arg count indicates a real build behavior change. - -## Example: CscArguments Investigation - -A merge PR (release/10.0.3xx → main) had 208 CSC args vs 207 on main. The diff: - -``` -FAIL-ONLY: /analyzerconfig:{W}/p/d/sdk/11.0.100-ci/Sdks/Microsoft.NET.Sdk/analyzers/build/config/analysislevel_11_default.globalconfig -``` - -### What the binlog properties showed - -Both builds had identical property resolution: -- `EffectiveAnalysisLevel = 11.0` -- `_GlobalAnalyzerConfigFileName = analysislevel_11_default.globalconfig` -- `_GlobalAnalyzerConfigFile = .../config/analysislevel_11_default.globalconfig` - -### The actual root cause - -The `AddGlobalAnalyzerConfigForPackage` target has an `Exists()` condition: -```xml - - - -``` - -The merge's SDK layout **shipped** `analysislevel_11_default.globalconfig` on disk (from a newer roslyn-analyzers that flowed from 10.0.3xx), while main's SDK didn't have that file yet. Same property values, different files on disk = different build behavior. - -### Lesson learned - -Same MSBuild property resolution + different files on disk = different build behavior. Always check what's actually in the SDK layout, not just what the targets compute. - -## Anti-Patterns - -> ❌ **Don't manually split/parse CSC command lines in the main conversation.** CSC args have quoted paths, spaces, and complex structure. Regex parsing in PowerShell is fragile and burns turns on trial-and-error. Use a subagent. - -> ❌ **Don't assume the MSBuild property diff explains the behavior diff.** Two branches can compute identical property values but produce different outputs because of different files on disk, different NuGet packages, or different task assemblies. Compare the actual task invocation. - -> ❌ **Don't load large binlogs and browse them interactively in main context.** Use targeted searches rather than browsing interactively. Get in, get the data, get out. diff --git a/.github/skills/ci-analysis/references/build-progression-analysis.md b/.github/skills/ci-analysis/references/build-progression-analysis.md deleted file mode 100644 index 5d13b819bb36b7..00000000000000 --- a/.github/skills/ci-analysis/references/build-progression-analysis.md +++ /dev/null @@ -1,219 +0,0 @@ -# Deep Investigation: Build Progression Analysis - -When the current build is failing, the PR's build history can reveal whether the failure existed from the start or appeared after specific changes. This is a fact-gathering technique — like target-branch comparison — that provides context for understanding the current failure. - -## When to Use This Pattern - -- Standard analysis (script + logs) hasn't identified the root cause of the current failure -- The PR has multiple pushes and you want to know whether earlier builds passed or failed -- You need to understand whether a failure is inherent to the PR's approach or was introduced by a later change - -## The Pattern - -### Step 0: Start with the recent builds - -Don't try to analyze the full build history upfront — especially on large PRs with many pushes. Start with the most recent N builds (5-8), present the progression table, and let the user decide whether to dig deeper into earlier builds. - -On large PRs, the user is usually iterating toward a solution. The recent builds are the most relevant. Offer: "Here are the last N builds — the pass→fail transition was between X and Y. Want me to look at earlier builds?" - -### Step 1: List builds for the PR - -`gh pr checks` only shows checks for the current HEAD SHA. To see the full build history, use AzDO or CLI: - -**With AzDO (preferred):** - -Query AzDO for builds on `refs/pull/{PR}/merge` branch, sorted by queue time descending, top 20, in the `public` project. The response includes `triggerInfo` with `pr.sourceSha` — the PR's HEAD commit for each build. - -> 💡 Key parameters: `branchName: "refs/pull/{PR}/merge"`, `queryOrder: "QueueTimeDescending"`, `top: 20`, project `public` (for dnceng-public org). - -**Without MCP (fallback):** -```powershell -$org = "https://dev.azure.com/dnceng-public" -$project = "public" -az pipelines runs list --branch "refs/pull/{PR}/merge" --top 20 --org $org -p $project -o json -``` - -### Step 2: Map builds to the PR's head commit - -Each build's `triggerInfo` contains `pr.sourceSha` — the PR's HEAD commit when the build was triggered. Extract it from the build response or CLI output. - -> ⚠️ **`sourceVersion` is the merge commit**, not the PR's head commit. Use `triggerInfo.'pr.sourceSha'` instead. - -> ⚠️ **Target branch moves between builds.** Each build merges `pr.sourceSha` into the target branch HEAD *at the time the build starts*. If `main` received new commits between build N and N+1, the two builds merged against different baselines — even if `pr.sourceSha` is the same. Always extract the target branch HEAD to detect baseline shifts. - -### Step 2b: Extract the target branch HEAD - -**Shortcut for the latest build — use the GitHub merge commit:** - -For the current/latest build, the merge ref (`refs/pull/{PR}/merge`) is available via the GitHub API. The merge commit's first parent is the target branch HEAD at the time GitHub computed the merge: - -Look up the merge commit's parents — the first parent is the target branch HEAD. Use the GitHub API or MCP (`get_commit` with the `sourceVersion` SHA) to get the commit details. The `sourceVersion` from the AzDO build is the merge commit SHA (not `pr.sourceSha`). Example: - -``` -gh api repos/{owner}/{repo}/git/commits/{sourceVersion} --jq '.parents[0].sha' -``` - -This is simpler than parsing checkout logs. - -> ⚠️ **This only works for the latest build.** GitHub recomputes `refs/pull/{PR}/merge` on each push, so the merge commit changes. For historical builds in a progression analysis, the merge ref no longer reflects what was built — use the checkout log method below. - -**For historical builds — extract from checkout logs:** - -The AzDO build API doesn't expose the target branch SHA. Extract it from the checkout task log. - -**With AzDO (preferred):** - -Fetch the checkout task log for the build — typically **log ID 5**, starting around **line 500+** (skip the early git-fetch output). Search the output for the merge line: -``` -HEAD is now at {mergeCommit} Merge {prSourceSha} into {targetBranchHead} -``` - -> 💡 `logId: 5` is the first checkout task in most dotnet pipelines. If it doesn't contain the merge line, check the build timeline for "Checkout" tasks to find the correct log ID. - -**Without MCP (fallback):** -```powershell -$token = az account get-access-token --resource "499b84ac-1321-427f-aa17-267ca6975798" --query accessToken -o tsv -$headers = @{ Authorization = "Bearer $token" } -$logUrl = "https://dev.azure.com/{org}/{project}/_apis/build/builds/{BUILD_ID}/logs/5" -$log = Invoke-RestMethod -Uri $logUrl -Headers $headers -``` - -> Note: log ID 5 is the first checkout task in most pipelines. The merge line is typically around line 500-650. If log 5 doesn't contain it, check the build timeline for "Checkout" tasks. - -Note: a PR may have more unique `pr.sourceSha` values than commits visible on GitHub, because force-pushes replace the commit history. Each force-push triggers a new build with a new merge commit and a new `pr.sourceSha`. - -### Step 3: Store progression in SQL - -Use the SQL tool to track builds as you discover them. This avoids losing context and enables queries across the full history: - -```sql -CREATE TABLE IF NOT EXISTS build_progression ( - build_id INT PRIMARY KEY, - pr_sha TEXT, - target_sha TEXT, - result TEXT, -- passed, failed, canceled - queued_at TEXT, - failed_jobs TEXT, -- comma-separated job names - notes TEXT -); -``` - -Insert rows as you extract data from each build: - -```sql -INSERT INTO build_progression VALUES - (1283986, '7af79ad', '2d638dc', 'failed', '2026-02-08T10:00:00Z', 'WasmBuildTests', 'Initial commits'), - (1284169, '28ec8a0', '0b691ba', 'failed', '2026-02-08T14:00:00Z', 'WasmBuildTests', 'Iteration 2'), - (1284433, '39dc0a6', '18a3069', 'passed', '2026-02-09T09:00:00Z', NULL, 'Iteration 3'); -``` - -Then query to find the pass→fail transition: - -```sql --- Find where it went from passing to failing -SELECT * FROM build_progression ORDER BY queued_at; - --- Did the target branch move between pass and fail? -SELECT pr_sha, target_sha, result FROM build_progression -WHERE result IN ('passed', 'failed') ORDER BY queued_at; - --- Which builds share the same PR SHA? (force-push detection) -SELECT pr_sha, COUNT(*) as builds, GROUP_CONCAT(result) as results -FROM build_progression GROUP BY pr_sha HAVING builds > 1; -``` - -Present the table to the user: - -| PR HEAD | Target HEAD | Builds | Result | Notes | -|---------|-------------|--------|--------|-------| -| 7af79ad | 2d638dc | 1283986 | ❌ | Initial commits | -| 28ec8a0 | 0b691ba | 1284169 | ❌ | Iteration 2 | -| 39dc0a6 | 18a3069 | 1284433 | ✅ | Iteration 3 | -| f186b93 | 5709f35 | 1286087 | ❌ | Added commit C; target moved ~35 commits | -| 2e74845 | 482d8f9 | 1286967 | ❌ | Modified commit C | - -When both `pr.sourceSha` AND `Target HEAD` change between a pass→fail transition, either could be the cause. Analyze the failure content to determine which. If only the target moved (same `pr.sourceSha`), the failure came from the new baseline. - -#### Tracking individual test failures across builds - -For deeper analysis, track which tests failed in each build: - -```sql -CREATE TABLE IF NOT EXISTS build_failures ( - build_id INT, - job_name TEXT, - test_name TEXT, - error_snippet TEXT, - helix_job TEXT, - work_item TEXT, - PRIMARY KEY (build_id, job_name, test_name) -); -``` - -Insert failures as you investigate each build, then query for patterns: - -```sql --- Tests that fail in every build (persistent, not flaky) -SELECT test_name, COUNT(DISTINCT build_id) as fail_count, GROUP_CONCAT(build_id) as builds -FROM build_failures GROUP BY test_name HAVING fail_count > 1; - --- New failures in the latest build (what changed?) -SELECT f.* FROM build_failures f -LEFT JOIN build_failures prev ON f.test_name = prev.test_name AND prev.build_id = {PREV_BUILD_ID} -WHERE f.build_id = {LATEST_BUILD_ID} AND prev.test_name IS NULL; - --- Flaky tests: fail in some builds, pass in others -SELECT test_name FROM build_failures GROUP BY test_name -HAVING COUNT(DISTINCT build_id) < (SELECT COUNT(*) FROM build_progression WHERE result = 'failed'); -``` - -### Step 4: Present findings, not conclusions - -Report what the progression shows: -- Which builds passed and which failed -- What commits were added between the last passing and first failing build -- Whether the failing commits were added in response to review feedback (check review threads) - -> 💡 **Stop when you have the progression table and the pass→fail transition identified.** The table + transition commits + error category is enough for the user to act. Don't investigate further (e.g., comparing individual commits, checking passing builds, exploring main branch history) unless the user asks. - -**Do not** make fix recommendations based solely on build progression. The progression narrows the investigation — it doesn't determine the right fix. The human may have context about why changes were made, what constraints exist, or what the reviewer intended. - -## Checking review context - -When the progression shows that a failure appeared after new commits, check whether those commits were review-requested: - -```powershell -# Get review comments with timestamps -gh api "repos/{OWNER}/{REPO}/pulls/{PR}/comments" ` - --jq '.[] | {author: .user.login, body: .body, created: .created_at}' -``` - -Present this as additional context: "Commit C was pushed after reviewer X commented requesting Y." Let the author decide how to proceed. - -## Combining with Binlog Comparison - -Build progression identifies **which change** correlates with the current failure. Binlog comparison (see [binlog-comparison.md](binlog-comparison.md)) shows **what's different** in the build between a passing and failing state. Together they provide a complete picture: - -1. Progression → "The current failure first appeared in build N+1, which added commit C" -2. Binlog comparison → "In the current (failing) build, task X receives parameter Y=Z, whereas in the passing build it received Y=W" - -## Relationship to Target-Branch Comparison - -Both techniques compare a failing build against a passing one: - -| Technique | Passing build from | Answers | -|-----------|-------------------|---------| -| **Target-branch comparison** | Recent build on the base branch (e.g., main) | "Does this test pass without the PR's changes at all?" | -| **Build progression** | Earlier build on the same PR | "Did this test pass with the PR's *earlier* changes?" | - -Use target-branch comparison first to confirm the failure is PR-related. Use build progression to narrow down *which part* of the PR introduced it. If build progression shows a pass→fail transition with the same `pr.sourceSha`, the target branch is the more likely culprit — use target-branch comparison to confirm. - -## Anti-Patterns - -> ❌ **Don't treat build history as a substitute for analyzing the current build.** The current build determines CI status. Build history is context for understanding and investigating the current failure. - -> ❌ **Don't make fix recommendations from progression alone.** "Build N passed and build N+1 failed after adding commit C" is a fact worth reporting. "Therefore revert commit C" is a judgment that requires more context than the agent has — the commit may be addressing a critical review concern, fixing a different bug, or partially correct. - -> ❌ **Don't assume earlier passing builds prove the original approach was complete.** A build may pass because it didn't change enough to trigger the failing test scenario. The reviewer who requested additional changes may have identified a real gap. - -> ❌ **Don't assume MSBuild changes only affect the platform you're looking at.** MSBuild properties, conditions, and targets are shared infrastructure. A commit that changes a condition, moves a property, or modifies a restore flag can impact any platform that evaluates the same code path. When a commit touches MSBuild files, verify its impact across all platforms — don't assume it's scoped to the one you're investigating. diff --git a/.github/skills/ci-analysis/references/delegation-patterns.md b/.github/skills/ci-analysis/references/delegation-patterns.md deleted file mode 100644 index b7dd706fca947b..00000000000000 --- a/.github/skills/ci-analysis/references/delegation-patterns.md +++ /dev/null @@ -1,124 +0,0 @@ -# Subagent Delegation Patterns - -CI investigations involve repetitive, mechanical work that burns main conversation context. Delegate data gathering to subagents; keep interpretation in the main agent. - -## Pattern 1: Scanning Multiple Console Logs - -**When:** Multiple failing work items across several jobs. - -**Delegate:** -``` -Extract all unique test failures from these Helix work items: - -Job: {JOB_ID_1}, Work items: {ITEM_1}, {ITEM_2} -Job: {JOB_ID_2}, Work items: {ITEM_3} - -For each, search console logs for lines ending with [FAIL] (xUnit format). -If hlx MCP is not available, fall back to: - ./scripts/Get-CIStatus.ps1 -HelixJob "{JOB}" -WorkItem "{ITEM}" - -Extract lines ending with [FAIL] (xUnit format). Ignore [OUTPUT] and [PASS] lines. - -Return JSON: { "failures": [{ "test": "Namespace.Class.Method", "workItems": ["item1", "item2"] }] } -``` - -## Pattern 2: Finding a Baseline Build - -**When:** A test fails on a PR — need to confirm it passes on the target branch. - -**Delegate:** -``` -Find a recent passing build on {TARGET_BRANCH} of dotnet/{REPO} that ran the same test leg. - -Failing build: {BUILD_ID}, job: {JOB_NAME}, work item: {WORK_ITEM} - -Steps: -1. Search for recently merged PRs: - Search for recently merged PRs on {TARGET_BRANCH} -2. Run: ./scripts/Get-CIStatus.ps1 -PRNumber {MERGED_PR} -Repository "dotnet/{REPO}" -3. Find the build with same job name that passed -4. Locate the Helix job ID (may need artifact download — see [azure-cli.md](azure-cli.md)) - -Return JSON: { "found": true, "buildId": N, "helixJob": "...", "workItem": "...", "result": "Pass" } -Or: { "found": false, "reason": "no passing build in last 5 merged PRs" } - -If authentication fails or API returns errors, STOP and return the error — don't troubleshoot. -``` - -## Pattern 3: Extracting Merge PR Changed Files - -**When:** A large merge PR (hundreds of files) has test failures — need the file list for the main agent to analyze. - -**Delegate:** -``` -List all changed files on merge PR #{PR_NUMBER} in dotnet/{REPO}. - -Get the list of changed files for PR #{PR_NUMBER} in dotnet/{REPO} - -For each file, note: path, change type (added/modified/deleted), lines changed. - -Return JSON: { "totalFiles": N, "files": [{ "path": "...", "changeType": "modified", "linesChanged": N }] } -``` - -> The main agent decides which files are relevant to the specific failures — don't filter in the subagent. - -## Pattern 4: Parallel Artifact Extraction - -**When:** Multiple builds or artifacts need independent analysis — binlog comparison, canceled job recovery, multi-build progression. - -**Key insight:** Launch one subagent per build/artifact in parallel. Each does its mechanical extraction independently. The main agent synthesizes results across all of them. - -**Delegate (per build, for binlog analysis):** -``` -Download and analyze binlog from AzDO build {BUILD_ID}, artifact {ARTIFACT_NAME}. - -Steps: -1. Download the artifact (see [azure-cli.md](azure-cli.md)) -2. Load the binlog, find the {TASK_NAME} task invocations, get full task details including CommandLineArguments. - -Return JSON: { "buildId": N, "project": "...", "args": ["..."] } -``` - -**Delegate (per build, for canceled job recovery):** -``` -Check if canceled job "{JOB_NAME}" from build {BUILD_ID} has recoverable Helix results. - -Steps: -1. Check if TRX test results are available for the work item. Parse them for pass/fail counts. -2. If no structured results, check for testResults.xml -3. Parse the XML for pass/fail counts on the element - -Return JSON: { "jobName": "...", "hasResults": true, "passed": N, "failed": N } -Or: { "jobName": "...", "hasResults": false, "reason": "no testResults.xml uploaded" } -``` - -This pattern scales to any number of builds — launch N subagents for N builds, collect results, compare. - -## Pattern 5: Build Progression with Target HEAD Extraction - -**When:** PR has multiple builds and you need the full progression table with target branch HEADs. - -**Delegate (one subagent per build):** -``` -Extract the target branch HEAD from AzDO build {BUILD_ID}. - -Fetch the checkout task log (typically LOG ID 5, starting around LINE 500+ to skip git-fetch output) - -Search for: "HEAD is now at {mergeCommit} Merge {prSourceSha} into {targetBranchHead}" - -Return JSON: { "buildId": N, "targetHead": "abc1234", "mergeCommit": "def5678" } -Or: { "buildId": N, "targetHead": null, "error": "merge line not found in log 5" } -``` - -Launch one per build in parallel. The main agent combines with the build list to build the full progression table. - -## General Guidelines - -- **Use `general-purpose` agent type** — it has shell + MCP access for Helix, AzDO, binlog, and GitHub queries -- **Run independent tasks in parallel** — the whole point of delegation -- **Include script paths** — subagents don't inherit skill context -- **Require structured JSON output** — enables comparison across subagents -- **Don't delegate interpretation** — subagents return facts, main agent reasons -- **STOP on errors** — subagents should return error details immediately, not troubleshoot auth/environment issues -- **Use SQL for many results** — when launching 5+ subagents or doing multi-phase delegation, store results in a SQL table (`CREATE TABLE results (agent_id TEXT, build_id INT, data TEXT, status TEXT)`) so you can query across all results instead of holding them in context -- **Specify `model: "claude-sonnet-4"` for MCP-heavy tasks** — default model may time out on multi-step MCP tool chains diff --git a/.github/skills/ci-analysis/references/helix-artifacts.md b/.github/skills/ci-analysis/references/helix-artifacts.md deleted file mode 100644 index 756ea26ef5ef89..00000000000000 --- a/.github/skills/ci-analysis/references/helix-artifacts.md +++ /dev/null @@ -1,285 +0,0 @@ -# Helix Work Item Artifacts - -Guide to finding and analyzing artifacts from Helix test runs. - -## Accessing Artifacts - -### Via the Script - -Query a specific work item to see its artifacts: - -```powershell -./scripts/Get-CIStatus.ps1 -HelixJob "4b24b2c2-..." -WorkItem "Microsoft.NET.Sdk.Tests.dll.1" -ShowLogs -``` - -### Via API - -```bash -# Get work item details including Files array -curl -s "https://helix.dot.net/api/2019-06-17/jobs/{jobId}/workitems/{workItemName}" -``` - -The `Files` array contains artifacts with `FileName` and `Uri` properties. - -## Artifact Availability Varies - -**Not all test types produce the same artifacts.** What you see depends on the repo, test type, and configuration: - -- **Build/publish tests** (SDK, WASM) → Multiple binlogs -- **AOT compilation tests** (iOS/Android) → `AOTBuild.binlog` plus device logs -- **Standard unit tests** → Console logs only, no binlogs -- **Crash failures** (exit code 134) → Core dumps may be present - -Always query the specific work item to see what's available rather than assuming a fixed structure. - -## Common Artifact Patterns - -| File Pattern | Purpose | When Useful | -|--------------|---------|-------------| -| `*.binlog` | MSBuild binary logs | AOT/build failures, MSB4018 errors | -| `console.*.log` | Console output | Always available, general output | -| `run-*.log` | XHarness execution logs | Mobile test failures | -| `device-*.log` | Device-specific logs | iOS/Android device issues | -| `dotnetTestLog.*.log` | dotnet test output | Test framework issues | -| `vstest.*.log` | VSTest output | aspnetcore/SDK test issues | -| `core.*`, `*.dmp` | Core dumps | Crashes, hangs | -| `testResults.xml` | Test results | Detailed pass/fail info | - -Artifacts may be at the root level or nested in subdirectories like `xharness-output/logs/`. - -> **Note:** The Helix work item Details API has a known bug ([dotnet/dnceng#6072](https://github.com/dotnet/dnceng/issues/6072)) where -> file URIs for subdirectory files are incorrect, and unicode characters in filenames are rejected. -> The script works around this by using the separate `ListFiles` endpoint (`GET .../workitems/{workItemName}/files`) -> which returns direct blob storage URIs that work for all filenames regardless of subdirectories or unicode. - -## Binlog Files - -Binlogs are **only present for tests that invoke MSBuild** (build/publish tests, AOT compilation). Standard unit tests don't produce binlogs. - -### Common Names - -| File | Description | -|------|-------------| -| `build.msbuild.binlog` | Build phase | -| `publish.msbuild.binlog` | Publish phase | -| `AOTBuild.binlog` | AOT compilation | -| `msbuild.binlog` | General MSBuild operations | -| `msbuild0.binlog`, `msbuild1.binlog` | Per-test-run logs (numbered) | - -### Analyzing Binlogs - -**Online viewer (no download):** -1. Copy the binlog URI from the script output -2. Go to https://live.msbuildlog.com/ -3. Paste the URL to load and analyze - -**Download and view locally:** -```bash -curl -o build.binlog "https://helix.dot.net/api/jobs/{jobId}/workitems/{workItem}/files/build.msbuild.binlog?api-version=2019-06-17" -# Open with MSBuild Structured Log Viewer -``` - -**AI-assisted analysis:** -Use the MSBuild MCP server to analyze binlogs for errors and warnings. - -## Core Dumps - -Core dumps appear when tests crash (typically exit code 134 on Linux/macOS): - -``` -core.1000.34 # Format: core.{uid}.{pid} -``` - -## Mobile Test Artifacts (iOS/Android) - -Mobile device tests typically include XHarness orchestration logs: - -- `run-ios-device.log` / `run-android.log` - Execution log -- `device-{machine}-*.log` - Device output -- `list-ios-device-*.log` - Device discovery -- `AOTBuild.binlog` - AOT compilation (when applicable) -- `*.crash` - iOS crash reports - -## Finding the Right Work Item - -1. Run the script with `-ShowLogs` to see Helix job/work item info -2. Look for lines like: - ``` - Helix Job: 4b24b2c2-ad5a-4c46-8a84-844be03b1d51 - Work Item: Microsoft.NET.Sdk.Tests.dll.1 - ``` -3. Query that specific work item for full artifact list - -## AzDO Build Artifacts (Pre-Helix) - -Helix work items contain artifacts from **test execution**. But there's another source of binlogs: **AzDO build artifacts** from the build phase before tests are sent to Helix. - -### When to Use Build Artifacts - -- Failed work item has no binlogs (unit tests don't produce them) -- You need to see how tests were **built**, not how they **executed** -- Investigating build/restore issues that happen before Helix - -### Listing Build Artifacts - -```powershell -# List all artifacts for a build -$org = "dnceng-public" -$project = "public" -$buildId = 1280125 - -$url = "https://dev.azure.com/$org/$project/_apis/build/builds/$buildId/artifacts?api-version=5.0" -$artifacts = (Invoke-RestMethod -Uri $url).value - -# Show artifacts with sizes -$artifacts | ForEach-Object { - $sizeMB = [math]::Round($_.resource.properties.artifactsize / 1MB, 2) - Write-Host "$($_.name) - $sizeMB MB" -} -``` - -### Common Build Artifacts - -| Artifact Pattern | Contents | Size | -|------------------|----------|------| -| `TestBuild_*` | Test build outputs + binlogs | 30-100 MB | -| `BuildConfiguration` | Build config metadata | <1 MB | -| `TemplateEngine_*` | Template engine outputs | ~40 MB | -| `AoT_*` | AOT compilation outputs | ~3 MB | -| `FullFramework_*` | .NET Framework test outputs | ~40 MB | - -### Downloading and Finding Binlogs - -```powershell -# Download a specific artifact -$artifactName = "TestBuild_linux_x64" -$downloadUrl = "https://dev.azure.com/$org/$project/_apis/build/builds/$buildId/artifacts?artifactName=$artifactName&api-version=5.0&`$format=zip" -$zipPath = "$env:TEMP\$artifactName.zip" -$extractPath = "$env:TEMP\$artifactName" - -Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath -Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force - -# Find binlogs -Get-ChildItem -Path $extractPath -Filter "*.binlog" -Recurse | ForEach-Object { - $sizeMB = [math]::Round($_.Length / 1MB, 2) - Write-Host "$($_.Name) ($sizeMB MB) - $($_.FullName)" -} -``` - -### Typical Binlogs in Build Artifacts - -| File | Description | -|------|-------------| -| `log/Release/Build.binlog` | Main build log | -| `log/Release/TestBuildTests.binlog` | Test build verification | -| `log/Release/ToolsetRestore.binlog` | Toolset restore | - -### Build vs Helix Binlogs - -| Source | When Generated | What It Shows | -|--------|----------------|---------------| -| AzDO build artifacts | During CI build phase | How tests were compiled/packaged | -| Helix work item artifacts | During test execution | What happened when tests ran `dotnet build` etc. | - -If a test runs `dotnet build` internally (like SDK end-to-end tests), both sources may have relevant binlogs. - -## Downloaded Artifact Layout - -When you download artifacts via MCP tools or manually, the directory structure can be confusing. Here's what to expect. - -### Helix Work Item Downloads - -MCP tools for downloading Helix artifacts: -- **`hlx_download`** — downloads multiple files from a work item. Returns local file paths. -- **`hlx_download_url`** — downloads a single file by direct URI (from `hlx_files` output). Use when you know exactly which file you need. - -> 💡 **Prefer remote investigation first**: search file contents, parse test results, and search logs remotely before downloading. Only download when you need to load binlogs or do offline analysis. - -`hlx_download` saves files to a temp directory. The structure is **flat** — all files from the work item land in one directory: - -``` -C:\...\Temp\helix-{hash}\ -├── console.d991a56d.log # Console output -├── testResults.xml # Test pass/fail details -├── msbuild.binlog # Only if test invoked MSBuild -├── publish.msbuild.binlog # Only if test did a publish -├── msbuild0.binlog # Numbered: first test's build -├── msbuild1.binlog # Numbered: second test's build -└── core.1000.34 # Only on crash -``` - -**Key confusion point:** Numbered binlogs (`msbuild0.binlog`, `msbuild1.binlog`) correspond to individual test cases within the work item, not to build phases. A work item like `Microsoft.NET.Build.Tests.dll.18` runs dozens of tests, each invoking MSBuild separately. To map a binlog to a specific test: -1. Load it with the binlog analysis tools -2. Check the project paths inside — they usually contain the test name -3. Or check `testResults.xml` to correlate test execution order with binlog numbering - -### AzDO Build Artifact Downloads - -AzDO artifacts download as **ZIP files** with nested directory structures: - -``` -$env:TEMP\TestBuild_linux_x64\ -└── TestBuild_linux_x64\ # Artifact name repeated as subfolder - └── log\Release\ - ├── Build.binlog # Main build - ├── TestBuildTests.binlog # Test build verification - ├── ToolsetRestore.binlog # Toolset restore - └── SendToHelix.binlog # Contains Helix job GUIDs -``` - -**Key confusion point:** The artifact name appears twice in the path (extract folder + subfolder inside the ZIP). Use the full nested path when loading binlogs. - -### Mapping Binlogs to Failures - -This table shows the **typical** source for each binlog type. The boundaries aren't absolute — some repos run tests on the build agent (producing test binlogs in AzDO artifacts), and Helix work items for SDK/Blazor tests invoke `dotnet build` internally (producing build binlogs as Helix artifacts). - -| You want to investigate... | Look here first | But also check... | -|---------------------------|-----------------|-------------------| -| Why a test's internal `dotnet build` failed | Helix work item (`msbuild{N}.binlog`) | AzDO artifact if tests ran on agent | -| Why the CI build itself failed to compile | AzDO build artifact (`Build.binlog`) | — | -| Which Helix jobs were dispatched | AzDO build artifact (`SendToHelix.binlog`) | — | -| AOT compilation failure | Helix work item (`AOTBuild.binlog`) | — | -| Test build/publish behavior | Helix work item (`publish.msbuild.binlog`) | AzDO artifact (`TestBuildTests.binlog`) | - -> **Rule of thumb:** If the failing job name contains "Helix" or "Send to Helix", the test binlogs are in Helix. If the job runs tests directly (common in dotnet/sdk), check AzDO artifacts. - -### Tracking Downloaded Artifacts with SQL - -When downloading from multiple work items (e.g., binlog comparison between passing and failing builds), use SQL to avoid losing track of what's where: - -```sql -CREATE TABLE IF NOT EXISTS downloaded_artifacts ( - local_path TEXT PRIMARY KEY, - helix_job TEXT, - work_item TEXT, - build_id INT, - artifact_source TEXT, -- 'helix' or 'azdo' - file_type TEXT, -- 'binlog', 'testResults', 'console', 'crash' - notes TEXT -- e.g., 'passing baseline', 'failing PR build' -); -``` - -Key queries: -```sql --- Find the pair of binlogs for comparison -SELECT local_path, notes FROM downloaded_artifacts -WHERE file_type = 'binlog' ORDER BY notes; - --- What have I downloaded from a specific work item? -SELECT local_path, file_type FROM downloaded_artifacts -WHERE work_item = 'Microsoft.NET.Build.Tests.dll.18'; -``` - -Use this whenever you're juggling artifacts from 2+ Helix jobs (especially during the binlog comparison pattern in [binlog-comparison.md](binlog-comparison.md)). - -### Tips - -- **Multiple binlogs ≠ multiple builds.** A single work item can produce several binlogs if the test suite runs multiple `dotnet build`/`dotnet publish` commands. -- **Helix and AzDO binlogs can overlap.** Helix binlogs are *usually* from test execution and AzDO binlogs from the build phase, but SDK/Blazor tests invoke MSBuild inside Helix (producing build-like binlogs), and some repos run tests directly on the build agent (producing test binlogs in AzDO). Check both sources if you can't find what you need. -- **Not all work items have binlogs.** Standard unit tests only produce `testResults.xml` and console logs. -- **Use `hlx_download` with `pattern:"*.binlog"`** to filter downloads and avoid pulling large console logs. - -## Artifact Retention - -Helix artifacts are retained for a limited time (typically 30 days). Download important artifacts promptly if needed for long-term analysis. diff --git a/.github/skills/ci-analysis/references/manual-investigation.md b/.github/skills/ci-analysis/references/manual-investigation.md deleted file mode 100644 index b3b319eaacb9d2..00000000000000 --- a/.github/skills/ci-analysis/references/manual-investigation.md +++ /dev/null @@ -1,98 +0,0 @@ -# Manual Investigation Guide - -If the script doesn't provide enough information, use these manual investigation steps. - -## Table of Contents -- [Get Build Timeline](#get-build-timeline) -- [Find Helix Tasks](#find-helix-tasks) -- [Get Build Logs](#get-build-logs) -- [Query Helix APIs](#query-helix-apis) -- [Download Artifacts](#download-artifacts) -- [Analyze Binlogs](#analyze-binlogs) -- [Extract Environment Variables](#extract-environment-variables) - -## Get Build Timeline - -```powershell -$buildId = 1276327 -$response = Invoke-RestMethod -Uri "https://dev.azure.com/dnceng-public/cbb18261-c48f-4abb-8651-8cdcb5474649/_apis/build/builds/$buildId/timeline?api-version=7.0" -$failedJobs = $response.records | Where-Object { $_.type -eq "Job" -and $_.result -eq "failed" } -$failedJobs | Select-Object id, name, result | Format-Table -``` - -## Find Helix Tasks - -```powershell -$jobId = "90274d9a-fbd8-54f8-6a7d-8dfc4e2f6f3f" # From timeline -$helixTasks = $response.records | Where-Object { $_.parentId -eq $jobId -and $_.name -like "*Helix*" } -$helixTasks | Select-Object id, name, result, log | Format-Table -``` - -## Get Build Logs - -```powershell -$logId = 565 # From task.log.id -$logContent = Invoke-RestMethod -Uri "https://dev.azure.com/dnceng-public/cbb18261-c48f-4abb-8651-8cdcb5474649/_apis/build/builds/$buildId/logs/${logId}?api-version=7.0" -$logContent | Select-String -Pattern "error|FAIL" -Context 2,5 -``` - -## Query Helix APIs - -> 💡 **Prefer MCP tools when available** — they handle most Helix queries without manual curl commands. Use the APIs below only as fallback. - -```bash -# Get job details -curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID" - -# List work items -curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems" - -# Get work item details -curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME" - -# Get console log -curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME/console" -``` - -## Download Artifacts - -```powershell -$workItem = Invoke-RestMethod -Uri "https://helix.dot.net/api/2019-06-17/jobs/$jobId/workitems/$workItemName" -$workItem.Files | ForEach-Object { Write-Host "$($_.FileName): $($_.Uri)" } -``` - -Common artifacts: -- `console.*.log` - Console output -- `*.binlog` - MSBuild binary logs -- `run-*.log` - XHarness/test runner logs -- Core dumps and crash reports - -## Analyze Binlogs - -Binlogs contain detailed MSBuild execution traces for diagnosing: -- AOT compilation failures -- Static web asset issues -- NuGet restore problems -- Target execution order issues - -**Using MSBuild binlog MCP tools:** - -Load the binlog, then search for errors/diagnostics or specific queries. The binlog MCP tools handle loading, searching, and extracting task details. - -**Manual Analysis:** -Use [MSBuild Structured Log Viewer](https://msbuildlog.com/) or https://live.msbuildlog.com/ - -## Extract Environment Variables - -```bash -curl -s "https://helix.dot.net/api/2019-06-17/jobs/JOB_ID/workitems/WORK_ITEM_NAME/console" | grep "DOTNET_" -``` - -Example output: -``` -DOTNET_JitStress=1 -DOTNET_TieredCompilation=0 -DOTNET_GCStress=0xC -``` - -These are critical for reproducing failures locally. diff --git a/.github/skills/ci-analysis/references/sql-tracking.md b/.github/skills/ci-analysis/references/sql-tracking.md deleted file mode 100644 index 950e2f61a4465e..00000000000000 --- a/.github/skills/ci-analysis/references/sql-tracking.md +++ /dev/null @@ -1,107 +0,0 @@ -# SQL Tracking for CI Investigations - -Use the SQL tool to track structured data during complex investigations. This avoids losing context across tool calls and enables queries that catch mistakes (like claiming "all failures known" when some are unmatched). - -## Failed Job Tracking - -Track each failure from the script output and map it to known issues as you verify them: - -```sql -CREATE TABLE IF NOT EXISTS failed_jobs ( - build_id INT, - job_name TEXT, - error_category TEXT, -- from failedJobDetails: test-failure, build-error, crash, etc. - error_snippet TEXT, - known_issue_url TEXT, -- NULL if unmatched - known_issue_title TEXT, - is_pr_correlated BOOLEAN DEFAULT FALSE, - recovery_status TEXT DEFAULT 'not-checked', -- effectively-passed, real-failure, no-results - notes TEXT, - PRIMARY KEY (build_id, job_name) -); -``` - -### Key queries - -```sql --- Unmatched failures (Build Analysis red = these exist) -SELECT job_name, error_category, error_snippet FROM failed_jobs -WHERE known_issue_url IS NULL; - --- Are ALL failures accounted for? -SELECT COUNT(*) as total, - SUM(CASE WHEN known_issue_url IS NOT NULL THEN 1 ELSE 0 END) as matched -FROM failed_jobs; - --- Which crash/canceled jobs need recovery verification? -SELECT job_name, build_id FROM failed_jobs -WHERE error_category IN ('crash', 'unclassified') AND recovery_status = 'not-checked'; - --- PR-correlated failures (fix before retrying) -SELECT job_name, error_snippet FROM failed_jobs WHERE is_pr_correlated = TRUE; -``` - -### Workflow - -1. After the script runs, insert one row per failed job from `failedJobDetails` (each entry includes `buildId`) -2. For each known issue from `knownIssues`, UPDATE matching rows with the issue URL -3. Query for unmatched failures — these need investigation -4. For crash/canceled jobs, update `recovery_status` after checking Helix results - -## Build Progression - -See [build-progression-analysis.md](build-progression-analysis.md) for the `build_progression` and `build_failures` tables that track pass/fail across multiple builds. - -> **`failed_jobs` vs `build_failures` — when to use each:** -> - `failed_jobs` (above): **Job-level** — maps each failed AzDO job to a known issue. Use for single-build triage ("are all failures accounted for?"). -> - `build_failures` (build-progression-analysis.md): **Test-level** — tracks individual test names across builds. Use for progression analysis ("which tests started failing after commit X?"). - -## PR Comment Tracking - -For deep-dive analysis — especially across a chain of related PRs (e.g., dependency flow failures, sequential merge PRs, or long-lived PRs with weeks of triage) — store PR comments so you can query them without re-fetching: - -```sql -CREATE TABLE IF NOT EXISTS pr_comments ( - pr_number INT, - repo TEXT DEFAULT 'dotnet/runtime', - comment_id INT PRIMARY KEY, - author TEXT, - created_at TEXT, - body TEXT, - is_triage BOOLEAN DEFAULT FALSE -- set TRUE if comment diagnoses a failure -); -``` - -### Key queries - -```sql --- What has already been diagnosed? (avoid re-investigating) -SELECT author, created_at, substr(body, 1, 200) FROM pr_comments -WHERE is_triage = TRUE ORDER BY created_at; - --- Cross-PR: same failure discussed in multiple PRs? -SELECT pr_number, author, substr(body, 1, 150) FROM pr_comments -WHERE body LIKE '%BlazorWasm%' ORDER BY created_at; - --- Who was asked to investigate what? -SELECT author, substr(body, 1, 200) FROM pr_comments -WHERE body LIKE '%PTAL%' OR body LIKE '%could you%look%'; -``` - -### When to use - -- Long-lived PRs (>1 week) with 10+ comments containing triage context -- Analyzing a chain of related PRs where earlier PRs have relevant diagnosis -- When the same failure appears across multiple merge/flow PRs and you need to know what was already tried - -## When to Use SQL vs. Not - -| Situation | Use SQL? | -|-----------|----------| -| 1-2 failed jobs, all match known issues | No — straightforward, hold in context | -| 3+ failed jobs across multiple builds | Yes — prevents missed matches | -| Build progression with 5+ builds | Yes — see [build-progression-analysis.md](build-progression-analysis.md) | -| Crash recovery across multiple work items | Yes — cache testResults.xml findings | -| Single build, single failure | No — overkill | -| PR chain or long-lived PR with extensive triage comments | Yes — preserves diagnosis context across tool calls | -| Downloading artifacts from 2+ Helix jobs (e.g., binlog comparison) | Yes — see [helix-artifacts.md](helix-artifacts.md) | diff --git a/.github/skills/ci-analysis/scripts/Get-CIStatus.ps1 b/.github/skills/ci-analysis/scripts/Get-CIStatus.ps1 deleted file mode 100644 index 07a7e29ce280dd..00000000000000 --- a/.github/skills/ci-analysis/scripts/Get-CIStatus.ps1 +++ /dev/null @@ -1,2274 +0,0 @@ -<# -.SYNOPSIS - Retrieves test failures from Azure DevOps builds and Helix test runs. - -.DESCRIPTION - This script queries Azure DevOps for failed jobs in a build and retrieves - the corresponding Helix console logs to show detailed test failure information. - It can also directly query a specific Helix job and work item. - -.PARAMETER BuildId - The Azure DevOps build ID to query. - -.PARAMETER PRNumber - The GitHub PR number to find the associated build. - -.PARAMETER HelixJob - The Helix job ID (GUID) to query directly. - -.PARAMETER WorkItem - The Helix work item name to query (requires -HelixJob). - -.PARAMETER Repository - The GitHub repository (owner/repo format). Default: dotnet/runtime - -.PARAMETER Organization - The Azure DevOps organization. Default: dnceng-public - -.PARAMETER Project - The Azure DevOps project GUID. Default: cbb18261-c48f-4abb-8651-8cdcb5474649 - -.PARAMETER ShowLogs - If specified, fetches and displays the Helix console logs for failed tests. - -.PARAMETER MaxJobs - Maximum number of failed jobs to process. Default: 5 - -.PARAMETER MaxFailureLines - Maximum number of lines to capture per test failure. Default: 50 - -.PARAMETER TimeoutSec - Timeout in seconds for API calls. Default: 30 - -.PARAMETER ContextLines - Number of context lines to show before errors. Default: 0 - -.PARAMETER NoCache - Bypass cache and fetch fresh data for all API calls. - -.PARAMETER CacheTTLSeconds - Cache lifetime in seconds. Default: 30 - -.PARAMETER ClearCache - Clear all cached files and exit. - -.PARAMETER ContinueOnError - Continue processing remaining jobs if an API call fails, showing partial results. - -.PARAMETER SearchMihuBot - Search MihuBot's semantic database for related issues and discussions. - Uses https://mihubot.xyz/mcp to find conceptually related issues across dotnet repositories. - -.PARAMETER FindBinlogs - Scan work items in a Helix job to find which ones contain MSBuild binlog files. - Useful when the failed work item doesn't have binlogs (e.g., unit tests) but you need - to find related build tests that do have binlogs for deeper analysis. - -.EXAMPLE - .\Get-CIStatus.ps1 -BuildId 1276327 - -.EXAMPLE - .\Get-CIStatus.ps1 -PRNumber 123445 -ShowLogs - -.EXAMPLE - .\Get-CIStatus.ps1 -PRNumber 123445 -Repository dotnet/aspnetcore - -.EXAMPLE - .\Get-CIStatus.ps1 -HelixJob "4b24b2c2-ad5a-4c46-8a84-844be03b1d51" -WorkItem "iOS.Device.Aot.Test" - -.EXAMPLE - .\Get-CIStatus.ps1 -BuildId 1276327 -SearchMihuBot - -.EXAMPLE - .\Get-CIStatus.ps1 -HelixJob "4b24b2c2-ad5a-4c46-8a84-844be03b1d51" -FindBinlogs - # Scans work items to find which ones contain MSBuild binlog files - -.EXAMPLE - .\Get-CIStatus.ps1 -ClearCache -#> - -[CmdletBinding(DefaultParameterSetName = 'BuildId')] -param( - [Parameter(ParameterSetName = 'BuildId', Mandatory = $true)] - [int]$BuildId, - - [Parameter(ParameterSetName = 'PRNumber', Mandatory = $true)] - [int]$PRNumber, - - [Parameter(ParameterSetName = 'HelixJob', Mandatory = $true)] - [string]$HelixJob, - - [Parameter(ParameterSetName = 'HelixJob')] - [string]$WorkItem, - - [Parameter(ParameterSetName = 'ClearCache', Mandatory = $true)] - [switch]$ClearCache, - - [string]$Repository = "dotnet/runtime", - [string]$Organization = "dnceng-public", - [string]$Project = "cbb18261-c48f-4abb-8651-8cdcb5474649", - [switch]$ShowLogs, - [int]$MaxJobs = 5, - [int]$MaxFailureLines = 50, - [int]$TimeoutSec = 30, - [int]$ContextLines = 0, - [switch]$NoCache, - [int]$CacheTTLSeconds = 30, - [switch]$ContinueOnError, - [switch]$SearchMihuBot, - [switch]$FindBinlogs -) - -$ErrorActionPreference = "Stop" - -#region Caching Functions - -# Cross-platform temp directory detection -function Get-TempDirectory { - # Try common environment variables in order of preference - $tempPath = $env:TEMP - if (-not $tempPath) { $tempPath = $env:TMP } - if (-not $tempPath) { $tempPath = $env:TMPDIR } # macOS - if (-not $tempPath -and $IsLinux) { $tempPath = "/tmp" } - if (-not $tempPath -and $IsMacOS) { $tempPath = "/tmp" } - if (-not $tempPath) { - # Fallback: use .cache in user's home directory - $home = $env:HOME - if (-not $home) { $home = $env:USERPROFILE } - if ($home) { - $tempPath = Join-Path $home ".cache" - if (-not (Test-Path $tempPath)) { - New-Item -ItemType Directory -Path $tempPath -Force | Out-Null - } - } - } - if (-not $tempPath) { - throw "Could not determine temp directory. Set TEMP, TMP, or TMPDIR environment variable." - } - return $tempPath -} - -$script:TempDir = Get-TempDirectory - -# Handle -ClearCache parameter -if ($ClearCache) { - $cacheDir = Join-Path $script:TempDir "ci-analysis-cache" - if (Test-Path $cacheDir) { - $files = Get-ChildItem -Path $cacheDir -File - $count = $files.Count - Remove-Item -Path $cacheDir -Recurse -Force - Write-Host "Cleared $count cached files from $cacheDir" -ForegroundColor Green - } - else { - Write-Host "Cache directory does not exist: $cacheDir" -ForegroundColor Yellow - } - exit 0 -} - -# Setup caching -$script:CacheDir = Join-Path $script:TempDir "ci-analysis-cache" -if (-not (Test-Path $script:CacheDir)) { - New-Item -ItemType Directory -Path $script:CacheDir -Force | Out-Null -} - -# Clean up expired cache files on startup (files older than 2x TTL) -function Clear-ExpiredCache { - param([int]$TTLSeconds = $CacheTTLSeconds) - - $maxAge = $TTLSeconds * 2 - $cutoff = (Get-Date).AddSeconds(-$maxAge) - - Get-ChildItem -Path $script:CacheDir -File -ErrorAction SilentlyContinue | Where-Object { - $_.LastWriteTime -lt $cutoff - } | ForEach-Object { - Write-Verbose "Removing expired cache file: $($_.Name)" - try { - Remove-Item $_.FullName -Force -ErrorAction Stop - } - catch { - Write-Verbose "Failed to remove cache file '$($_.Name)': $($_.Exception.Message)" - } - } -} - -# Run cache cleanup at startup (non-blocking) -if (-not $NoCache) { - Clear-ExpiredCache -TTLSeconds $CacheTTLSeconds -} - -function Get-UrlHash { - param([string]$Url) - - $sha256 = [System.Security.Cryptography.SHA256]::Create() - try { - return [System.BitConverter]::ToString( - $sha256.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($Url)) - ).Replace("-", "") - } - finally { - $sha256.Dispose() - } -} - -function Get-CachedResponse { - param( - [string]$Url, - [int]$TTLSeconds = $CacheTTLSeconds - ) - - if ($NoCache) { return $null } - - $hash = Get-UrlHash -Url $Url - $cacheFile = Join-Path $script:CacheDir "$hash.json" - - if (Test-Path $cacheFile) { - $cacheInfo = Get-Item $cacheFile - $age = (Get-Date) - $cacheInfo.LastWriteTime - - if ($age.TotalSeconds -lt $TTLSeconds) { - Write-Verbose "Cache hit for $Url (age: $([int]$age.TotalSeconds) sec)" - return Get-Content $cacheFile -Raw - } - else { - Write-Verbose "Cache expired for $Url" - } - } - - return $null -} - -function Set-CachedResponse { - param( - [string]$Url, - [string]$Content - ) - - if ($NoCache) { return } - - $hash = Get-UrlHash -Url $Url - $cacheFile = Join-Path $script:CacheDir "$hash.json" - - # Use atomic write: write to temp file, then rename - $tempFile = Join-Path $script:CacheDir "$hash.tmp.$([System.Guid]::NewGuid().ToString('N'))" - try { - $Content | Set-Content -LiteralPath $tempFile -Force - Move-Item -LiteralPath $tempFile -Destination $cacheFile -Force - Write-Verbose "Cached response for $Url" - } - catch { - # Clean up temp file on failure - if (Test-Path $tempFile) { - Remove-Item -LiteralPath $tempFile -Force -ErrorAction SilentlyContinue - } - Write-Verbose "Failed to cache response: $_" - } -} - -function Invoke-CachedRestMethod { - param( - [string]$Uri, - [int]$TimeoutSec = 30, - [switch]$AsJson, - [switch]$SkipCache, - [switch]$SkipCacheWrite - ) - - # Check cache first (unless skipping) - if (-not $SkipCache) { - $cached = Get-CachedResponse -Url $Uri - if ($cached) { - if ($AsJson) { - try { - return $cached | ConvertFrom-Json -ErrorAction Stop - } - catch { - Write-Verbose "Failed to parse cached response as JSON, treating as cache miss: $_" - } - } - else { - return $cached - } - } - } - - # Make the actual request - Write-Verbose "GET $Uri" - $response = Invoke-RestMethod -Uri $Uri -Method Get -TimeoutSec $TimeoutSec - - # Cache the response (unless skipping write) - if (-not $SkipCache -and -not $SkipCacheWrite) { - if ($AsJson -or $response -is [PSCustomObject]) { - $content = $response | ConvertTo-Json -Depth 100 -Compress - Set-CachedResponse -Url $Uri -Content $content - } - else { - Set-CachedResponse -Url $Uri -Content $response - } - } - - return $response -} - -#endregion Caching Functions - -#region Validation Functions - -function Test-RepositoryFormat { - param([string]$Repo) - - # Validate repository format to prevent command injection - $repoPattern = '^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$' - if ($Repo -notmatch $repoPattern) { - throw "Invalid repository format '$Repo'. Expected 'owner/repo' (e.g., 'dotnet/runtime')." - } - return $true -} - -function Get-SafeSearchTerm { - param([string]$Term) - - # Sanitize search term to avoid passing unsafe characters to gh CLI - # Keep: alphanumeric, spaces, dots, hyphens, colons (for namespaces like System.Net), - # and slashes (for paths). These are safe for GitHub search and common in .NET names. - $safeTerm = $Term -replace '[^\w\s\-.:/]', '' - return $safeTerm.Trim() -} - -#endregion Validation Functions - -#region Azure DevOps API Functions - -function Get-AzDOBuildIdFromPR { - param([int]$PR) - - # Check for gh CLI dependency - if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - throw "GitHub CLI (gh) is required for PR lookup. Install from https://cli.github.com/ or use -BuildId instead." - } - - # Validate repository format - Test-RepositoryFormat -Repo $Repository | Out-Null - - Write-Host "Finding builds for PR #$PR in $Repository..." -ForegroundColor Cyan - Write-Verbose "Running: gh pr checks $PR --repo $Repository" - - # Use gh cli to get the checks with splatted arguments - $checksOutput = & gh pr checks $PR --repo $Repository 2>&1 - $ghExitCode = $LASTEXITCODE - - if ($ghExitCode -ne 0 -and -not ($checksOutput | Select-String -Pattern "buildId=")) { - throw "Failed to fetch CI status for PR #$PR in $Repository - check PR number and permissions" - } - - # Check if PR has merge conflicts (no CI runs when mergeable_state is dirty) - $prMergeState = $null - $prMergeStateOutput = & gh api "repos/$Repository/pulls/$PR" --jq '.mergeable_state' 2>$null - $ghMergeStateExitCode = $LASTEXITCODE - if ($ghMergeStateExitCode -eq 0 -and $prMergeStateOutput) { - $prMergeState = $prMergeStateOutput.Trim() - } else { - Write-Verbose "Could not determine PR merge state (gh exit code $ghMergeStateExitCode)." - } - - # Find ALL failing Azure DevOps builds - $failingBuilds = @{} - foreach ($line in $checksOutput) { - if ($line -match 'fail.*buildId=(\d+)') { - $buildId = $Matches[1] - # Extract pipeline name (first column before 'fail') - $pipelineName = ($line -split '\s+fail')[0].Trim() - if (-not $failingBuilds.ContainsKey($buildId)) { - $failingBuilds[$buildId] = $pipelineName - } - } - } - - if ($failingBuilds.Count -eq 0) { - # No failing builds - try to find any build - $anyBuild = $checksOutput | Select-String -Pattern "buildId=(\d+)" | Select-Object -First 1 - if ($anyBuild) { - $anyBuildMatch = [regex]::Match($anyBuild.ToString(), "buildId=(\d+)") - if ($anyBuildMatch.Success) { - $buildIdStr = $anyBuildMatch.Groups[1].Value - $buildIdInt = 0 - if ([int]::TryParse($buildIdStr, [ref]$buildIdInt)) { - return @{ BuildIds = @($buildIdInt); Reason = $null; MergeState = $prMergeState } - } - } - } - if ($prMergeState -eq 'dirty') { - Write-Host "`nPR #$PR has merge conflicts (mergeable_state: dirty)" -ForegroundColor Red - Write-Host "CI will not run until conflicts are resolved." -ForegroundColor Yellow - Write-Host "Resolve conflicts and push to trigger CI, or use -BuildId to analyze a previous build." -ForegroundColor Gray - return @{ BuildIds = @(); Reason = "MERGE_CONFLICTS"; MergeState = $prMergeState } - } - Write-Host "`nNo CI build found for PR #$PR in $Repository" -ForegroundColor Red - Write-Host "The CI pipeline has not been triggered yet." -ForegroundColor Yellow - return @{ BuildIds = @(); Reason = "NO_BUILDS"; MergeState = $prMergeState } - } - - # Return all unique failing build IDs - $buildIds = $failingBuilds.Keys | ForEach-Object { [int]$_ } | Sort-Object -Unique - - if ($buildIds.Count -gt 1) { - Write-Host "Found $($buildIds.Count) failing builds:" -ForegroundColor Yellow - foreach ($id in $buildIds) { - Write-Host " - Build $id ($($failingBuilds[$id.ToString()]))" -ForegroundColor Gray - } - } - - return @{ BuildIds = $buildIds; Reason = $null; MergeState = $prMergeState } -} - -function Get-BuildAnalysisKnownIssues { - param([int]$PR) - - # Check for gh CLI dependency - if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - Write-Verbose "GitHub CLI (gh) not available for Build Analysis check" - return @() - } - - Write-Verbose "Fetching Build Analysis check for PR #$PR..." - - try { - # Get the head commit SHA for the PR - $headSha = gh pr view $PR --repo $Repository --json headRefOid --jq '.headRefOid' 2>&1 - if ($LASTEXITCODE -ne 0) { - Write-Verbose "Failed to get PR head SHA: $headSha" - return @() - } - - # Validate headSha is a valid git SHA (40 hex characters) - if ($headSha -notmatch '^[a-fA-F0-9]{40}$') { - Write-Verbose "Invalid head SHA format: $headSha" - return @() - } - - # Get the Build Analysis check run - $checkRuns = gh api "repos/$Repository/commits/$headSha/check-runs" --jq '.check_runs[] | select(.name == "Build Analysis") | .output' 2>&1 - if ($LASTEXITCODE -ne 0 -or -not $checkRuns) { - Write-Verbose "No Build Analysis check found" - return @() - } - - $output = $checkRuns | ConvertFrom-Json -ErrorAction SilentlyContinue - if (-not $output -or -not $output.text) { - Write-Verbose "Build Analysis check has no output text" - return @() - } - - # Parse known issues from the output text - # Format: Issue Title - $knownIssues = @() - $issuePattern = '([^<]+)' - $matches = [regex]::Matches($output.text, $issuePattern) - - foreach ($match in $matches) { - $issueUrl = $match.Groups[1].Value - $issueNumber = $match.Groups[2].Value - $issueTitle = $match.Groups[3].Value - - # Avoid duplicates - if (-not ($knownIssues | Where-Object { $_.Number -eq $issueNumber })) { - $knownIssues += @{ - Number = $issueNumber - Url = $issueUrl - Title = $issueTitle - } - } - } - - if ($knownIssues.Count -gt 0) { - Write-Host "`nBuild Analysis found $($knownIssues.Count) known issue(s):" -ForegroundColor Yellow - foreach ($issue in $knownIssues) { - Write-Host " - #$($issue.Number): $($issue.Title)" -ForegroundColor Gray - Write-Host " $($issue.Url)" -ForegroundColor DarkGray - } - } - - return $knownIssues - } - catch { - Write-Verbose "Error fetching Build Analysis: $_" - return @() - } -} - -function Get-PRChangedFiles { - param( - [int]$PR, - [int]$MaxFiles = 100 - ) - - # Check for gh CLI dependency - if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - Write-Verbose "GitHub CLI (gh) not available for PR file lookup" - return @() - } - - Write-Verbose "Fetching changed files for PR #$PR..." - - try { - # Get the file count first to avoid fetching huge PRs - $fileCount = gh pr view $PR --repo $Repository --json files --jq '.files | length' 2>&1 - if ($LASTEXITCODE -ne 0) { - Write-Verbose "Failed to get PR file count: $fileCount" - return @() - } - - $count = [int]$fileCount - if ($count -gt $MaxFiles) { - Write-Verbose "PR has $count files (exceeds limit of $MaxFiles) - skipping correlation" - Write-Host "PR has $count changed files - skipping detailed correlation (limit: $MaxFiles)" -ForegroundColor Gray - return @() - } - - # Get the list of changed files - $filesJson = gh pr view $PR --repo $Repository --json files --jq '.files[].path' 2>&1 - if ($LASTEXITCODE -ne 0) { - Write-Verbose "Failed to get PR files: $filesJson" - return @() - } - - $files = $filesJson -split "`n" | Where-Object { $_ } - return $files - } - catch { - Write-Verbose "Error fetching PR files: $_" - return @() - } -} - -function Get-PRCorrelation { - param( - [array]$ChangedFiles, - [array]$AllFailures - ) - - $result = @{ CorrelatedFiles = @(); TestFiles = @() } - if ($ChangedFiles.Count -eq 0 -or $AllFailures.Count -eq 0) { return $result } - - $failureText = ($AllFailures | ForEach-Object { - $_.TaskName - $_.JobName - $_.Errors -join "`n" - $_.HelixLogs -join "`n" - $_.FailedTests -join "`n" - }) -join "`n" - - foreach ($file in $ChangedFiles) { - $fileName = [System.IO.Path]::GetFileNameWithoutExtension($file) - $fileNameWithExt = [System.IO.Path]::GetFileName($file) - $baseTestName = $fileName -replace '\.[^.]+$', '' - - $isCorrelated = $false - if ($failureText -match [regex]::Escape($fileName) -or - $failureText -match [regex]::Escape($fileNameWithExt) -or - $failureText -match [regex]::Escape($file) -or - ($baseTestName -and $failureText -match [regex]::Escape($baseTestName))) { - $isCorrelated = $true - } - - if ($isCorrelated) { - $isTestFile = $file -match '\.Tests?\.' -or $file -match '[/\\]tests?[/\\]' -or $file -match 'Test\.cs$' -or $file -match 'Tests\.cs$' - if ($isTestFile) { $result.TestFiles += $file } else { $result.CorrelatedFiles += $file } - } - } - - $result.CorrelatedFiles = @($result.CorrelatedFiles | Select-Object -Unique) - $result.TestFiles = @($result.TestFiles | Select-Object -Unique) - return $result -} - -function Show-PRCorrelationSummary { - param( - [array]$ChangedFiles, - [array]$AllFailures - ) - - if ($ChangedFiles.Count -eq 0) { - return - } - - $correlation = Get-PRCorrelation -ChangedFiles $ChangedFiles -AllFailures $AllFailures - $correlatedFiles = $correlation.CorrelatedFiles - $testFiles = $correlation.TestFiles - - # Show results - if ($correlatedFiles.Count -gt 0 -or $testFiles.Count -gt 0) { - Write-Host "`n=== PR Change Correlation ===" -ForegroundColor Magenta - - if ($testFiles.Count -gt 0) { - Write-Host "⚠️ Test files changed by this PR are failing:" -ForegroundColor Yellow - $shown = 0 - foreach ($file in $testFiles) { - if ($shown -ge 10) { - Write-Host " ... and $($testFiles.Count - 10) more test files" -ForegroundColor Gray - break - } - Write-Host " $file" -ForegroundColor Red - $shown++ - } - } - - if ($correlatedFiles.Count -gt 0) { - Write-Host "⚠️ Files changed by this PR appear in failures:" -ForegroundColor Yellow - $shown = 0 - foreach ($file in $correlatedFiles) { - if ($shown -ge 10) { - Write-Host " ... and $($correlatedFiles.Count - 10) more files" -ForegroundColor Gray - break - } - Write-Host " $file" -ForegroundColor Red - $shown++ - } - } - - Write-Host "`nCorrelated files found — check JSON summary for details." -ForegroundColor Yellow - } -} - -function Get-AzDOBuildStatus { - param([int]$Build) - - $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/${Build}?api-version=7.0" - - try { - # First check cache to see if we have a completed status - $cached = Get-CachedResponse -Url $url - if ($cached) { - $cachedData = $cached | ConvertFrom-Json - # Only use cache if build was completed - in-progress status goes stale quickly - if ($cachedData.status -eq "completed") { - return @{ - Status = $cachedData.status - Result = $cachedData.result - StartTime = $cachedData.startTime - FinishTime = $cachedData.finishTime - } - } - Write-Verbose "Skipping cached in-progress build status" - } - - # Fetch fresh status - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson -SkipCache - - # Only cache if completed - if ($response.status -eq "completed") { - $content = $response | ConvertTo-Json -Depth 10 -Compress - Set-CachedResponse -Url $url -Content $content - } - - return @{ - Status = $response.status # notStarted, inProgress, completed - Result = $response.result # succeeded, failed, canceled (only set when completed) - StartTime = $response.startTime - FinishTime = $response.finishTime - } - } - catch { - Write-Verbose "Failed to fetch build status: $_" - return $null - } -} - -function Get-AzDOTimeline { - param( - [int]$Build, - [switch]$BuildInProgress - ) - - $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/$Build/timeline?api-version=7.0" - Write-Host "Fetching build timeline..." -ForegroundColor Cyan - - try { - # Don't cache timeline for in-progress builds - it changes as jobs complete - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson -SkipCacheWrite:$BuildInProgress - return $response - } - catch { - if ($ContinueOnError) { - Write-Warning "Failed to fetch build timeline: $_" - return $null - } - throw "Failed to fetch build timeline: $_" - } -} - -function Get-FailedJobs { - param($Timeline) - - if ($null -eq $Timeline -or $null -eq $Timeline.records) { - return @() - } - - $failedJobs = $Timeline.records | Where-Object { - $_.type -eq "Job" -and $_.result -eq "failed" - } - - return $failedJobs -} - -function Get-CanceledJobs { - param($Timeline) - - if ($null -eq $Timeline -or $null -eq $Timeline.records) { - return @() - } - - $canceledJobs = $Timeline.records | Where-Object { - $_.type -eq "Job" -and $_.result -eq "canceled" - } - - return $canceledJobs -} - -function Get-HelixJobInfo { - param($Timeline, $JobId) - - if ($null -eq $Timeline -or $null -eq $Timeline.records) { - return @() - } - - # Find tasks in this job that mention Helix - $helixTasks = $Timeline.records | Where-Object { - $_.parentId -eq $JobId -and - $_.name -like "*Helix*" -and - $_.result -eq "failed" - } - - return $helixTasks -} - -function Get-BuildLog { - param([int]$Build, [int]$LogId) - - $url = "https://dev.azure.com/$Organization/$Project/_apis/build/builds/$Build/logs/${LogId}?api-version=7.0" - - try { - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec - return $response - } - catch { - Write-Warning "Failed to fetch log ${LogId}: $_" - return $null - } -} - -#endregion Azure DevOps API Functions - -#region Log Parsing Functions - -function Extract-HelixUrls { - param([string]$LogContent) - - $urls = @() - - # First, normalize the content by removing line breaks that might split URLs - $normalizedContent = $LogContent -replace "`r`n", "" -replace "`n", "" - - # Match Helix console log URLs - workitem names can contain dots, dashes, and other chars - $urlMatches = [regex]::Matches($normalizedContent, 'https://helix\.dot\.net/api/[^/]+/jobs/[a-f0-9-]+/workitems/[^/\s]+/console') - foreach ($match in $urlMatches) { - $urls += $match.Value - } - - Write-Verbose "Found $($urls.Count) Helix URLs" - return $urls | Select-Object -Unique -} - -function Extract-TestFailures { - param([string]$LogContent) - - $failures = @() - - # Match test failure patterns from MSBuild output - $pattern = 'error\s*:\s*.*Test\s+(\S+)\s+has failed' - $failureMatches = [regex]::Matches($LogContent, $pattern, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) - - foreach ($match in $failureMatches) { - $failures += @{ - TestName = $match.Groups[1].Value - FullMatch = $match.Value - } - } - - Write-Verbose "Found $($failures.Count) test failures" - return $failures -} - -function Extract-BuildErrors { - param( - [string]$LogContent, - [int]$Context = 5 - ) - - $errors = @() - $lines = $LogContent -split "`n" - - # Patterns for common build errors - ordered from most specific to least specific - $errorPatterns = @( - 'error\s+CS\d+:.*', # C# compiler errors - 'error\s+MSB\d+:.*', # MSBuild errors - 'error\s+NU\d+:.*', # NuGet errors - '\.pcm: No such file or directory', # Clang module cache - 'EXEC\s*:\s*error\s*:.*', # Exec task errors - 'fatal error:.*', # Fatal errors (clang, etc) - ':\s*error:', # Clang/GCC errors (file.cpp:123: error:) - 'undefined reference to', # Linker errors - 'cannot find -l', # Linker missing library - 'collect2: error:', # GCC linker wrapper errors - '##\[error\].*' # AzDO error annotations (last - catch-all) - ) - - $combinedPattern = ($errorPatterns -join '|') - - # Track if we only found MSBuild wrapper errors - $foundRealErrors = $false - $msbWrapperLines = @() - - for ($i = 0; $i -lt $lines.Count; $i++) { - if ($lines[$i] -match $combinedPattern) { - # Skip MSBuild wrapper "exited with code" if we find real errors - if ($lines[$i] -match 'exited with code \d+') { - $msbWrapperLines += $i - continue - } - - # Skip duplicate MSBuild errors (they often repeat) - if ($lines[$i] -match 'error MSB3073.*exited with code') { - continue - } - - $foundRealErrors = $true - - # Clean up the line (remove timestamps, etc) - $cleanLine = $lines[$i] -replace '^\d{4}-\d{2}-\d{2}T[\d:\.]+Z\s*', '' - $cleanLine = $cleanLine -replace '##\[error\]', 'ERROR: ' - - # Add context lines if requested - if ($Context -gt 0) { - $contextStart = [Math]::Max(0, $i - $Context) - $contextLines = @() - for ($j = $contextStart; $j -lt $i; $j++) { - $contextLines += " " + $lines[$j].Trim() - } - if ($contextLines.Count -gt 0) { - $errors += ($contextLines -join "`n") - } - } - - $errors += $cleanLine.Trim() - } - } - - # If we only found MSBuild wrapper errors, show context around them - if (-not $foundRealErrors -and $msbWrapperLines.Count -gt 0) { - $wrapperLine = $msbWrapperLines[0] - # Look for real errors in the 50 lines before the wrapper error - $searchStart = [Math]::Max(0, $wrapperLine - 50) - for ($i = $searchStart; $i -lt $wrapperLine; $i++) { - $line = $lines[$i] - # Look for C++/clang/gcc style errors - if ($line -match ':\s*error:' -or $line -match 'fatal error:' -or $line -match 'undefined reference') { - $cleanLine = $line -replace '^\d{4}-\d{2}-\d{2}T[\d:\.]+Z\s*', '' - $errors += $cleanLine.Trim() - } - } - } - - return $errors | Select-Object -First 20 | Select-Object -Unique -} - -function Extract-HelixLogUrls { - param([string]$LogContent) - - $urls = @() - - # Match Helix console log URLs from log content - # Pattern: https://helix.dot.net/api/2019-06-17/jobs/{jobId}/workitems/{workItemName}/console - $pattern = 'https://helix\.dot\.net/api/[^/]+/jobs/([a-f0-9-]+)/workitems/([^/\s]+)/console' - $urlMatches = [regex]::Matches($LogContent, $pattern) - - foreach ($match in $urlMatches) { - $urls += @{ - Url = $match.Value - JobId = $match.Groups[1].Value - WorkItem = $match.Groups[2].Value - } - } - - # Deduplicate by URL - $uniqueUrls = @{} - foreach ($url in $urls) { - if (-not $uniqueUrls.ContainsKey($url.Url)) { - $uniqueUrls[$url.Url] = $url - } - } - - return $uniqueUrls.Values -} - -#endregion Log Parsing Functions - -#region Known Issues Search - -function Search-MihuBotIssues { - param( - [string[]]$SearchTerms, - [string]$ExtraContext = "", - [string]$Repository = "dotnet/runtime", - [bool]$IncludeOpen = $true, - [bool]$IncludeClosed = $true, - [int]$TimeoutSec = 30 - ) - - $results = @() - - if (-not $SearchTerms -or $SearchTerms.Count -eq 0) { - return $results - } - - try { - # MihuBot MCP endpoint - call as JSON-RPC style request - $mcpUrl = "https://mihubot.xyz/mcp" - - # Build the request payload matching the MCP tool schema - $payload = @{ - jsonrpc = "2.0" - method = "tools/call" - id = [guid]::NewGuid().ToString() - params = @{ - name = "search_dotnet_repos" - arguments = @{ - repository = $Repository - searchTerms = $SearchTerms - extraSearchContext = $ExtraContext - includeOpen = $IncludeOpen - includeClosed = $IncludeClosed - includeIssues = $true - includePullRequests = $true - includeComments = $false - } - } - } | ConvertTo-Json -Depth 10 - - Write-Verbose "Calling MihuBot MCP endpoint with terms: $($SearchTerms -join ', ')" - - $response = Invoke-RestMethod -Uri $mcpUrl -Method Post -Body $payload -ContentType "application/json" -TimeoutSec $TimeoutSec - - # Parse MCP response - if ($response.result -and $response.result.content) { - foreach ($content in $response.result.content) { - if ($content.type -eq "text" -and $content.text) { - $issueData = $content.text | ConvertFrom-Json -ErrorAction SilentlyContinue - if ($issueData) { - foreach ($issue in $issueData) { - $results += @{ - Number = $issue.Number - Title = $issue.Title - Url = $issue.Url - Repository = $issue.Repository - State = $issue.State - Source = "MihuBot" - } - } - } - } - } - } - - # Deduplicate by issue number and repo - $unique = @{} - foreach ($issue in $results) { - $key = "$($issue.Repository)#$($issue.Number)" - if (-not $unique.ContainsKey($key)) { - $unique[$key] = $issue - } - } - - return $unique.Values | Select-Object -First 5 - } - catch { - Write-Verbose "MihuBot search failed: $_" - return @() - } -} - -function Search-KnownIssues { - param( - [string]$TestName, - [string]$ErrorMessage, - [string]$Repository = "dotnet/runtime" - ) - - # Search for known issues using the "Known Build Error" label - # This label is used by Build Analysis across dotnet repositories - - $knownIssues = @() - - # Check if gh CLI is available - if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - Write-Verbose "GitHub CLI not available for searching known issues" - return $knownIssues - } - - try { - # Extract search terms from test name and error message - $searchTerms = @() - - # First priority: Look for [FAIL] test names in the error message - # Pattern: "TestName [FAIL]" - the test name comes BEFORE [FAIL] - if ($ErrorMessage -match '(\S+)\s+\[FAIL\]') { - $failedTest = $Matches[1] - # Extract just the method name (after last .) - if ($failedTest -match '\.([^.]+)$') { - $searchTerms += $Matches[1] - } - # Also add the full test name - $searchTerms += $failedTest - } - - # Second priority: Extract test class/method from stack traces - if ($ErrorMessage -match 'at\s+(\w+\.\w+)\(' -and $searchTerms.Count -eq 0) { - $searchTerms += $Matches[1] - } - - if ($TestName) { - # Try to get the test method name from the work item - if ($TestName -match '\.([^.]+)$') { - $methodName = $Matches[1] - # Only add if it looks like a test name (not just "Tests") - if ($methodName -ne "Tests" -and $methodName.Length -gt 5) { - $searchTerms += $methodName - } - } - # Also try the full test name if it's not too long and looks specific - if ($TestName.Length -lt 100 -and $TestName -notmatch '^System\.\w+\.Tests$') { - $searchTerms += $TestName - } - } - - # Third priority: Extract specific exception patterns (but not generic TimeoutException) - if ($ErrorMessage -and $searchTerms.Count -eq 0) { - # Look for specific exception types - if ($ErrorMessage -match '(System\.(?:InvalidOperation|ArgumentNull|Format)\w*Exception)') { - $searchTerms += $Matches[1] - } - } - - # Deduplicate and limit search terms - $searchTerms = $searchTerms | Select-Object -Unique | Select-Object -First 3 - - foreach ($term in $searchTerms) { - if (-not $term) { continue } - - # Sanitize the search term to avoid passing unsafe characters to gh CLI - $safeTerm = Get-SafeSearchTerm -Term $term - if (-not $safeTerm) { continue } - - Write-Verbose "Searching for known issues with term: $safeTerm" - - # Search for open issues with the "Known Build Error" label - $results = & gh issue list ` - --repo $Repository ` - --label "Known Build Error" ` - --state open ` - --search $safeTerm ` - --limit 3 ` - --json number,title,url 2>$null | ConvertFrom-Json - - if ($results) { - foreach ($issue in $results) { - # Check if the title actually contains our search term (avoid false positives) - if ($issue.title -match [regex]::Escape($safeTerm)) { - $knownIssues += @{ - Number = $issue.number - Title = $issue.title - Url = $issue.url - SearchTerm = $safeTerm - } - } - } - } - - # If we found issues, stop searching - if ($knownIssues.Count -gt 0) { - break - } - } - - # Deduplicate by issue number - $unique = @{} - foreach ($issue in $knownIssues) { - if (-not $unique.ContainsKey($issue.Number)) { - $unique[$issue.Number] = $issue - } - } - - return $unique.Values - } - catch { - Write-Verbose "Failed to search for known issues: $_" - return @() - } -} - -function Show-KnownIssues { - param( - [string]$TestName = "", - [string]$ErrorMessage = "", - [string]$Repository = $script:Repository, - [switch]$IncludeMihuBot - ) - - # Search for known issues if we have a test name or error - if ($TestName -or $ErrorMessage) { - $knownIssues = Search-KnownIssues -TestName $TestName -ErrorMessage $ErrorMessage -Repository $Repository - if ($knownIssues -and $knownIssues.Count -gt 0) { - Write-Host "`n Known Issues:" -ForegroundColor Magenta - foreach ($issue in $knownIssues) { - Write-Host " #$($issue.Number): $($issue.Title)" -ForegroundColor Magenta - Write-Host " $($issue.Url)" -ForegroundColor Gray - } - } - - # Search MihuBot for related issues/discussions - if ($IncludeMihuBot) { - $searchTerms = @() - - # Extract meaningful search terms - if ($ErrorMessage -match '(\S+)\s+\[FAIL\]') { - $failedTest = $Matches[1] - if ($failedTest -match '\.([^.]+)$') { - $searchTerms += $Matches[1] - } - } - - if ($TestName -and $TestName -match '\.([^.]+)$') { - $methodName = $Matches[1] - if ($methodName -ne "Tests" -and $methodName.Length -gt 5) { - $searchTerms += $methodName - } - } - - # Add test name as context - if ($TestName) { - $searchTerms += $TestName - } - - $searchTerms = $searchTerms | Select-Object -Unique | Select-Object -First 3 - - if ($searchTerms.Count -gt 0) { - $mihuBotResults = Search-MihuBotIssues -SearchTerms $searchTerms -Repository $Repository -ExtraContext "test failure $TestName" - if ($mihuBotResults -and $mihuBotResults.Count -gt 0) { - # Filter out issues already shown from Known Build Error search - $knownNumbers = @() - if ($knownIssues) { - $knownNumbers = $knownIssues | ForEach-Object { $_.Number } - } - $newResults = $mihuBotResults | Where-Object { $_.Number -notin $knownNumbers } - - if ($newResults -and @($newResults).Count -gt 0) { - Write-Host "`n Related Issues (MihuBot):" -ForegroundColor Blue - foreach ($issue in $newResults) { - $stateIcon = if ($issue.State -eq "open") { "[open]" } else { "[closed]" } - Write-Host " #$($issue.Number): $($issue.Title) $stateIcon" -ForegroundColor Blue - Write-Host " $($issue.Url)" -ForegroundColor Gray - } - } - } - } - } - } -} - -#endregion Known Issues Search - -#region Test Results Functions - -function Get-AzDOTestResults { - param( - [string]$RunId, - [string]$Org = "https://dev.azure.com/$Organization" - ) - - # Check if az devops CLI is available - if (-not (Get-Command az -ErrorAction SilentlyContinue)) { - Write-Verbose "Azure CLI not available for fetching test results" - return $null - } - - try { - Write-Verbose "Fetching test results for run $RunId via az devops CLI..." - $results = az devops invoke ` - --org $Org ` - --area test ` - --resource Results ` - --route-parameters project=$Project runId=$RunId ` - --api-version 7.0 ` - --query "value[?outcome=='Failed'].{name:testCaseTitle, outcome:outcome, error:errorMessage}" ` - -o json 2>$null | ConvertFrom-Json - - return $results - } - catch { - Write-Verbose "Failed to fetch test results via az devops: $_" - return $null - } -} - -function Extract-TestRunUrls { - param([string]$LogContent) - - $testRuns = @() - - # Match Azure DevOps Test Run URLs - # Pattern: Published Test Run : https://dev.azure.com/dnceng-public/public/_TestManagement/Runs?runId=35626550&_a=runCharts - $pattern = 'Published Test Run\s*:\s*(https://dev\.azure\.com/[^/]+/[^/]+/_TestManagement/Runs\?runId=(\d+)[^\s]*)' - $matches = [regex]::Matches($LogContent, $pattern) - - foreach ($match in $matches) { - $testRuns += @{ - Url = $match.Groups[1].Value - RunId = $match.Groups[2].Value - } - } - - Write-Verbose "Found $($testRuns.Count) test run URLs" - return $testRuns -} - -function Get-LocalTestFailures { - param( - [object]$Timeline, - [int]$BuildId - ) - - $localFailures = @() - - # Find failed test tasks (non-Helix) - # Look for tasks with "Test" in name that have issues but no Helix URLs - $testTasks = $Timeline.records | Where-Object { - ($_.name -match 'Test|xUnit' -or $_.type -eq 'Task') -and - $_.issues -and - $_.issues.Count -gt 0 - } - - foreach ($task in $testTasks) { - # Check if this task has test failures (XUnit errors) - $testErrors = $task.issues | Where-Object { - $_.message -match 'Tests failed:' -or - $_.message -match 'error\s*:.*Test.*failed' - } - - if ($testErrors.Count -gt 0) { - # This is a local test failure - find the parent job for URL construction - $parentJob = $Timeline.records | Where-Object { $_.id -eq $task.parentId -and $_.type -eq "Job" } | Select-Object -First 1 - - $failure = @{ - TaskName = $task.name - TaskId = $task.id - ParentJobId = if ($parentJob) { $parentJob.id } else { $task.parentId } - LogId = if ($task.log) { $task.log.id } else { $null } - Issues = $testErrors - TestRunUrls = @() - } - - # Try to get test run URLs from the publish task - $publishTask = $Timeline.records | Where-Object { - $_.parentId -eq $task.parentId -and - $_.name -match 'Publish.*Test.*Results' -and - $_.log - } | Select-Object -First 1 - - if ($publishTask -and $publishTask.log) { - $logContent = Get-BuildLog -Build $BuildId -LogId $publishTask.log.id - if ($logContent) { - $testRunUrls = Extract-TestRunUrls -LogContent $logContent - $failure.TestRunUrls = $testRunUrls - } - } - - $localFailures += $failure - } - } - - return $localFailures -} - -#endregion Test Results Functions - -#region Helix API Functions - -function Get-HelixJobDetails { - param([string]$JobId) - - $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId" - - try { - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson - return $response - } - catch { - Write-Warning "Failed to fetch Helix job ${JobId}: $_" - return $null - } -} - -function Get-HelixWorkItems { - param([string]$JobId) - - $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId/workitems" - - try { - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson - return $response - } - catch { - Write-Warning "Failed to fetch work items for job ${JobId}: $_" - return $null - } -} - -function Get-HelixWorkItemFiles { - <# - .SYNOPSIS - Fetches work item files via the ListFiles endpoint which returns direct blob storage URIs. - .DESCRIPTION - Workaround for https://github.com/dotnet/dnceng/issues/6072: - The Details endpoint returns incorrect permalink URIs for files in subdirectories - and rejects unicode characters in filenames. The ListFiles endpoint returns direct - blob storage URIs that always work, regardless of subdirectory depth or unicode. - #> - param([string]$JobId, [string]$WorkItemName) - - $encodedWorkItem = [uri]::EscapeDataString($WorkItemName) - $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId/workitems/$encodedWorkItem/files" - - try { - $files = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson - return $files - } - catch { - Write-Warning "Failed to fetch files for work item ${WorkItemName}: $_" - return $null - } -} - -function Get-HelixWorkItemDetails { - param([string]$JobId, [string]$WorkItemName) - - $encodedWorkItem = [uri]::EscapeDataString($WorkItemName) - $url = "https://helix.dot.net/api/2019-06-17/jobs/$JobId/workitems/$encodedWorkItem" - - try { - $response = Invoke-CachedRestMethod -Uri $url -TimeoutSec $TimeoutSec -AsJson - - # Replace Files from the Details endpoint with results from ListFiles. - # The Details endpoint has broken URIs for subdirectory and unicode filenames - # (https://github.com/dotnet/dnceng/issues/6072). ListFiles returns direct - # blob storage URIs that always work. - $listFiles = Get-HelixWorkItemFiles -JobId $JobId -WorkItemName $WorkItemName - if ($null -ne $listFiles) { - $response.Files = @($listFiles | ForEach-Object { - [PSCustomObject]@{ - FileName = $_.Name - Uri = $_.Link - } - }) - } - - return $response - } - catch { - Write-Warning "Failed to fetch work item ${WorkItemName}: $_" - return $null - } -} - -function Get-HelixConsoleLog { - param([string]$Url) - - try { - $response = Invoke-CachedRestMethod -Uri $Url -TimeoutSec $TimeoutSec - return $response - } - catch { - Write-Warning "Failed to fetch Helix log from ${Url}: $_" - return $null - } -} - -function Find-WorkItemsWithBinlogs { - <# - .SYNOPSIS - Scans work items in a Helix job to find which ones contain binlog files. - .DESCRIPTION - Not all work items produce binlogs - only build/publish tests do. - This function helps locate work items that have binlogs for deeper analysis. - #> - param( - [Parameter(Mandatory)] - [string]$JobId, - [int]$MaxItems = 30, - [switch]$IncludeDetails - ) - - $workItems = Get-HelixWorkItems -JobId $JobId - if (-not $workItems) { - Write-Warning "No work items found for job $JobId" - return @() - } - - Write-Host "Scanning up to $MaxItems work items for binlogs..." -ForegroundColor Gray - - $results = @() - $scanned = 0 - - foreach ($wi in $workItems | Select-Object -First $MaxItems) { - $scanned++ - $details = Get-HelixWorkItemDetails -JobId $JobId -WorkItemName $wi.Name - if ($details -and $details.Files) { - $binlogs = @($details.Files | Where-Object { $_.FileName -like "*.binlog" }) - if ($binlogs.Count -gt 0) { - $result = @{ - Name = $wi.Name - BinlogCount = $binlogs.Count - Binlogs = $binlogs | ForEach-Object { $_.FileName } - ExitCode = $details.ExitCode - State = $details.State - } - if ($IncludeDetails) { - $result.BinlogUris = $binlogs | ForEach-Object { $_.Uri } - } - $results += $result - } - } - - # Progress indicator every 10 items - if ($scanned % 10 -eq 0) { - Write-Host " Scanned $scanned/$MaxItems..." -ForegroundColor DarkGray - } - } - - return $results -} - -#endregion Helix API Functions - -#region Output Formatting - -function Format-TestFailure { - param( - [string]$LogContent, - [int]$MaxLines = $MaxFailureLines, - [int]$MaxFailures = 3 - ) - - $lines = $LogContent -split "`n" - $allFailures = @() - $currentFailure = @() - $inFailure = $false - $emptyLineCount = 0 - $failureCount = 0 - - # Expanded failure detection patterns - # CAUTION: These trigger "failure block" capture. Overly broad patterns (e.g. \w+Error:) - # will grab Python harness/reporter noise and swamp the real test failure. - $failureStartPatterns = @( - '\[FAIL\]', - 'Assert\.\w+\(\)\s+Failure', - 'Expected:.*but was:', - 'BUG:', - 'FAILED\s*$', - 'END EXECUTION - FAILED', - 'System\.\w+Exception:', - 'Timed Out \(timeout' - ) - $combinedPattern = ($failureStartPatterns -join '|') - - foreach ($line in $lines) { - # Check for new failure start - if ($line -match $combinedPattern) { - # Save previous failure if exists - if ($currentFailure.Count -gt 0) { - $allFailures += ($currentFailure -join "`n") - $failureCount++ - if ($failureCount -ge $MaxFailures) { - break - } - } - # Start new failure - $currentFailure = @($line) - $inFailure = $true - $emptyLineCount = 0 - continue - } - - if ($inFailure) { - $currentFailure += $line - - # Track consecutive empty lines to detect end of stack trace - if ($line -match '^\s*$') { - $emptyLineCount++ - } - else { - $emptyLineCount = 0 - } - - # Stop this failure after stack trace ends (2+ consecutive empty lines) or max lines reached - if ($emptyLineCount -ge 2 -or $currentFailure.Count -ge $MaxLines) { - $allFailures += ($currentFailure -join "`n") - $currentFailure = @() - $inFailure = $false - $failureCount++ - if ($failureCount -ge $MaxFailures) { - break - } - } - } - } - - # Don't forget last failure - if ($currentFailure.Count -gt 0 -and $failureCount -lt $MaxFailures) { - $allFailures += ($currentFailure -join "`n") - } - - if ($allFailures.Count -eq 0) { - return $null - } - - $result = $allFailures -join "`n`n--- Next Failure ---`n`n" - - if ($failureCount -ge $MaxFailures) { - $result += "`n`n... (more failures exist, showing first $MaxFailures)" - } - - return $result -} - -# Helper to display test results from a test run -function Show-TestRunResults { - param( - [object[]]$TestRunUrls, - [string]$Org = "https://dev.azure.com/$Organization" - ) - - if (-not $TestRunUrls -or $TestRunUrls.Count -eq 0) { return } - - Write-Host "`n Test Results:" -ForegroundColor Yellow - foreach ($testRun in $TestRunUrls) { - Write-Host " Run $($testRun.RunId): $($testRun.Url)" -ForegroundColor Gray - - $testResults = Get-AzDOTestResults -RunId $testRun.RunId -Org $Org - if ($testResults -and $testResults.Count -gt 0) { - Write-Host "`n Failed tests ($($testResults.Count)):" -ForegroundColor Red - foreach ($result in $testResults | Select-Object -First 10) { - Write-Host " - $($result.name)" -ForegroundColor White - } - if ($testResults.Count -gt 10) { - Write-Host " ... and $($testResults.Count - 10) more" -ForegroundColor Gray - } - } - } -} - -#endregion Output Formatting - -#region Main Execution - -# Main execution -try { - # Handle direct Helix job query - if ($PSCmdlet.ParameterSetName -eq 'HelixJob') { - Write-Host "`n=== Helix Job $HelixJob ===" -ForegroundColor Yellow - Write-Host "URL: https://helix.dot.net/api/jobs/$HelixJob" -ForegroundColor Gray - - # Get job details - $jobDetails = Get-HelixJobDetails -JobId $HelixJob - if ($jobDetails) { - Write-Host "`nQueue: $($jobDetails.QueueId)" -ForegroundColor Cyan - Write-Host "Source: $($jobDetails.Source)" -ForegroundColor Cyan - } - - if ($WorkItem) { - # Query specific work item - Write-Host "`n--- Work Item: $WorkItem ---" -ForegroundColor Cyan - - $workItemDetails = Get-HelixWorkItemDetails -JobId $HelixJob -WorkItemName $WorkItem - if ($workItemDetails) { - Write-Host " State: $($workItemDetails.State)" -ForegroundColor $(if ($workItemDetails.State -eq 'Passed') { 'Green' } else { 'Red' }) - Write-Host " Exit Code: $($workItemDetails.ExitCode)" -ForegroundColor White - Write-Host " Machine: $($workItemDetails.MachineName)" -ForegroundColor Gray - Write-Host " Duration: $($workItemDetails.Duration)" -ForegroundColor Gray - - # Show artifacts with binlogs highlighted - if ($workItemDetails.Files -and $workItemDetails.Files.Count -gt 0) { - Write-Host "`n Artifacts:" -ForegroundColor Yellow - $binlogs = $workItemDetails.Files | Where-Object { $_.FileName -like "*.binlog" } - $otherFiles = $workItemDetails.Files | Where-Object { $_.FileName -notlike "*.binlog" } - - # Show binlogs first with special formatting - foreach ($file in $binlogs | Select-Object -Unique FileName, Uri) { - Write-Host " 📋 $($file.FileName): $($file.Uri)" -ForegroundColor Cyan - } - if ($binlogs.Count -gt 0) { - Write-Host " (Tip: Use MSBuild MCP server or https://live.msbuildlog.com/ to analyze binlogs)" -ForegroundColor DarkGray - } - - # Show other files - foreach ($file in $otherFiles | Select-Object -Unique FileName, Uri | Select-Object -First 10) { - Write-Host " $($file.FileName): $($file.Uri)" -ForegroundColor Gray - } - } - - # Fetch console log - $consoleUrl = "https://helix.dot.net/api/2019-06-17/jobs/$HelixJob/workitems/$WorkItem/console" - Write-Host "`n Console Log: $consoleUrl" -ForegroundColor Yellow - - $consoleLog = Get-HelixConsoleLog -Url $consoleUrl - if ($consoleLog) { - $failureInfo = Format-TestFailure -LogContent $consoleLog - if ($failureInfo) { - Write-Host $failureInfo -ForegroundColor White - - # Search for known issues - Show-KnownIssues -TestName $WorkItem -ErrorMessage $failureInfo -IncludeMihuBot:$SearchMihuBot - } - else { - # Show last 50 lines if no failure pattern detected - $lines = $consoleLog -split "`n" - $lastLines = $lines | Select-Object -Last 50 - Write-Host ($lastLines -join "`n") -ForegroundColor White - } - } - } - } - else { - # List all work items in the job - Write-Host "`nWork Items:" -ForegroundColor Yellow - $workItems = Get-HelixWorkItems -JobId $HelixJob - if ($workItems) { - Write-Host " Total: $($workItems.Count)" -ForegroundColor Cyan - Write-Host " Checking for failures..." -ForegroundColor Gray - - # Need to fetch details for each to find failures (list API only shows 'Finished') - $failedItems = @() - foreach ($wi in $workItems | Select-Object -First 20) { - $details = Get-HelixWorkItemDetails -JobId $HelixJob -WorkItemName $wi.Name - if ($details -and $null -ne $details.ExitCode -and $details.ExitCode -ne 0) { - $failedItems += @{ - Name = $wi.Name - ExitCode = $details.ExitCode - State = $details.State - } - } - } - - if ($failedItems.Count -gt 0) { - Write-Host "`n Failed Work Items:" -ForegroundColor Red - foreach ($wi in $failedItems | Select-Object -First $MaxJobs) { - Write-Host " - $($wi.Name) (Exit: $($wi.ExitCode))" -ForegroundColor White - } - Write-Host "`n Use -WorkItem '' to see details" -ForegroundColor Gray - } - else { - Write-Host " No failures found in first 20 work items" -ForegroundColor Green - } - - Write-Host "`n All work items:" -ForegroundColor Yellow - foreach ($wi in $workItems | Select-Object -First 10) { - Write-Host " - $($wi.Name)" -ForegroundColor White - } - if ($workItems.Count -gt 10) { - Write-Host " ... and $($workItems.Count - 10) more" -ForegroundColor Gray - } - - # Find work items with binlogs if requested - if ($FindBinlogs) { - Write-Host "`n === Binlog Search ===" -ForegroundColor Yellow - $binlogResults = Find-WorkItemsWithBinlogs -JobId $HelixJob -MaxItems 30 -IncludeDetails - - if ($binlogResults.Count -gt 0) { - Write-Host "`n Work items with binlogs:" -ForegroundColor Cyan - foreach ($result in $binlogResults) { - $stateColor = if ($result.ExitCode -eq 0) { 'Green' } else { 'Red' } - Write-Host " $($result.Name)" -ForegroundColor $stateColor - Write-Host " Binlogs ($($result.BinlogCount)):" -ForegroundColor Gray - foreach ($binlog in $result.Binlogs | Select-Object -First 5) { - Write-Host " - $binlog" -ForegroundColor White - } - if ($result.Binlogs.Count -gt 5) { - Write-Host " ... and $($result.Binlogs.Count - 5) more" -ForegroundColor DarkGray - } - } - Write-Host "`n Tip: Use -WorkItem '' to get full binlog URIs" -ForegroundColor DarkGray - } - else { - Write-Host " No binlogs found in scanned work items." -ForegroundColor Yellow - Write-Host " This job may contain only unit tests (which don't produce binlogs)." -ForegroundColor Gray - } - } - } - } - - exit 0 - } - - # Get build ID(s) if using PR number - $buildIds = @() - $knownIssuesFromBuildAnalysis = @() - $prChangedFiles = @() - $noBuildReason = $null - if ($PSCmdlet.ParameterSetName -eq 'PRNumber') { - $buildResult = Get-AzDOBuildIdFromPR -PR $PRNumber - if ($buildResult.Reason) { - # No builds found — emit summary with reason and exit - $noBuildReason = $buildResult.Reason - $buildIds = @() - $summary = [ordered]@{ - mode = "PRNumber" - repository = $Repository - prNumber = $PRNumber - builds = @() - totalFailedJobs = 0 - totalLocalFailures = 0 - lastBuildJobSummary = [ordered]@{ - total = 0; succeeded = 0; failed = 0; canceled = 0; pending = 0; warnings = 0; skipped = 0 - } - failedJobNames = @() - failedJobDetails = @() - canceledJobNames = @() - knownIssues = @() - prCorrelation = [ordered]@{ - changedFileCount = 0 - hasCorrelation = $false - correlatedFiles = @() - } - recommendationHint = if ($noBuildReason -eq "MERGE_CONFLICTS") { "MERGE_CONFLICTS" } else { "NO_BUILDS" } - noBuildReason = $noBuildReason - mergeState = $buildResult.MergeState - } - Write-Host "" - Write-Host "[CI_ANALYSIS_SUMMARY]" - Write-Host ($summary | ConvertTo-Json -Depth 5) - Write-Host "[/CI_ANALYSIS_SUMMARY]" - exit 0 - } - $buildIds = @($buildResult.BuildIds) - - # Check Build Analysis for known issues - $knownIssuesFromBuildAnalysis = @(Get-BuildAnalysisKnownIssues -PR $PRNumber) - - # Get changed files for correlation - $prChangedFiles = @(Get-PRChangedFiles -PR $PRNumber) - if ($prChangedFiles.Count -gt 0) { - Write-Verbose "PR has $($prChangedFiles.Count) changed files" - } - } - else { - $buildIds = @($BuildId) - } - - # Process each build - $totalFailedJobs = 0 - $totalLocalFailures = 0 - $allFailuresForCorrelation = @() - $allFailedJobNames = @() - $allCanceledJobNames = @() - $allFailedJobDetails = @() - $lastBuildJobSummary = $null - - foreach ($currentBuildId in $buildIds) { - Write-Host "`n=== Azure DevOps Build $currentBuildId ===" -ForegroundColor Yellow - Write-Host "URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray - - # Get and display build status - $buildStatus = Get-AzDOBuildStatus -Build $currentBuildId - if ($buildStatus) { - $statusColor = switch ($buildStatus.Status) { - "inProgress" { "Cyan" } - "completed" { if ($buildStatus.Result -eq "succeeded") { "Green" } else { "Red" } } - default { "Gray" } - } - $statusText = $buildStatus.Status - if ($buildStatus.Status -eq "completed" -and $buildStatus.Result) { - $statusText = "$($buildStatus.Status) ($($buildStatus.Result))" - } - elseif ($buildStatus.Status -eq "inProgress") { - $statusText = "IN PROGRESS - showing failures so far" - } - Write-Host "Status: $statusText" -ForegroundColor $statusColor - } - - # Get timeline - $isInProgress = $buildStatus -and $buildStatus.Status -eq "inProgress" - $timeline = Get-AzDOTimeline -Build $currentBuildId -BuildInProgress:$isInProgress - - # Handle timeline fetch failure - if (-not $timeline) { - Write-Host "`nCould not fetch build timeline" -ForegroundColor Red - Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray - continue - } - - # Get failed jobs - $failedJobs = Get-FailedJobs -Timeline $timeline - - # Get canceled jobs (different from failed - typically due to dependency failures) - $canceledJobs = Get-CanceledJobs -Timeline $timeline - - # Also check for local test failures (non-Helix) - $localTestFailures = Get-LocalTestFailures -Timeline $timeline -BuildId $currentBuildId - - # Accumulate totals and compute job summary BEFORE any continue branches - $totalFailedJobs += $failedJobs.Count - $totalLocalFailures += $localTestFailures.Count - $allFailedJobNames += @($failedJobs | ForEach-Object { $_.name }) - $allCanceledJobNames += @($canceledJobs | ForEach-Object { $_.name }) - - $allJobs = @() - $succeededJobs = 0 - $pendingJobs = 0 - $canceledJobCount = 0 - $skippedJobs = 0 - $warningJobs = 0 - if ($timeline -and $timeline.records) { - $allJobs = @($timeline.records | Where-Object { $_.type -eq "Job" }) - $succeededJobs = @($allJobs | Where-Object { $_.result -eq "succeeded" }).Count - $warningJobs = @($allJobs | Where-Object { $_.result -eq "succeededWithIssues" }).Count - $pendingJobs = @($allJobs | Where-Object { -not $_.result -or $_.state -eq "pending" -or $_.state -eq "inProgress" }).Count - $canceledJobCount = @($allJobs | Where-Object { $_.result -eq "canceled" }).Count - $skippedJobs = @($allJobs | Where-Object { $_.result -eq "skipped" }).Count - } - $lastBuildJobSummary = [ordered]@{ - total = $allJobs.Count - succeeded = $succeededJobs - failed = if ($failedJobs) { $failedJobs.Count } else { 0 } - canceled = $canceledJobCount - pending = $pendingJobs - warnings = $warningJobs - skipped = $skippedJobs - } - - if ((-not $failedJobs -or $failedJobs.Count -eq 0) -and $localTestFailures.Count -eq 0) { - if ($buildStatus -and $buildStatus.Status -eq "inProgress") { - Write-Host "`nNo failures yet - build still in progress" -ForegroundColor Cyan - Write-Host "Run again later to check for failures, or use -NoCache to get fresh data" -ForegroundColor Gray - } - else { - Write-Host "`nNo failed jobs found in build $currentBuildId" -ForegroundColor Green - } - # Still show canceled jobs if any - if ($canceledJobs -and $canceledJobs.Count -gt 0) { - Write-Host "`nNote: $($canceledJobs.Count) job(s) were canceled (not failed):" -ForegroundColor DarkYellow - foreach ($job in $canceledJobs | Select-Object -First 5) { - Write-Host " - $($job.name)" -ForegroundColor DarkGray - } - if ($canceledJobs.Count -gt 5) { - Write-Host " ... and $($canceledJobs.Count - 5) more" -ForegroundColor DarkGray - } - Write-Host " (Canceled jobs are typically due to earlier stage failures or timeouts)" -ForegroundColor DarkGray - } - continue - } - - # Report local test failures first (these may exist even without failed jobs) - if ($localTestFailures.Count -gt 0) { - Write-Host "`n=== Local Test Failures (non-Helix) ===" -ForegroundColor Yellow - Write-Host "Build: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Gray - - foreach ($failure in $localTestFailures) { - Write-Host "`n--- $($failure.TaskName) ---" -ForegroundColor Cyan - - # Collect issues for correlation - $issueMessages = $failure.Issues | ForEach-Object { $_.message } - $allFailuresForCorrelation += @{ - TaskName = $failure.TaskName - JobName = "Local Test" - Errors = $issueMessages - HelixLogs = @() - FailedTests = @() - } - - # Show build and log links - $jobLogUrl = "https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($failure.ParentJobId)" - if ($failure.TaskId) { - $jobLogUrl += "&t=$($failure.TaskId)" - } - Write-Host " Log: $jobLogUrl" -ForegroundColor Gray - - # Show issues - foreach ($issue in $failure.Issues) { - Write-Host " $($issue.message)" -ForegroundColor Red - } - - # Show test run URLs if available - if ($failure.TestRunUrls.Count -gt 0) { - Show-TestRunResults -TestRunUrls $failure.TestRunUrls -Org "https://dev.azure.com/$Organization" - } - - # Try to get more details from the task log - if ($failure.LogId) { - $logContent = Get-BuildLog -Build $currentBuildId -LogId $failure.LogId - if ($logContent) { - # Extract test run URLs from this log too - $additionalRuns = Extract-TestRunUrls -LogContent $logContent - if ($additionalRuns.Count -gt 0 -and $failure.TestRunUrls.Count -eq 0) { - Show-TestRunResults -TestRunUrls $additionalRuns -Org "https://dev.azure.com/$Organization" - } - - # Search for known issues based on build errors and task name - $buildErrors = Extract-BuildErrors -LogContent $logContent - if ($buildErrors.Count -gt 0) { - Show-KnownIssues -ErrorMessage ($buildErrors -join "`n") -IncludeMihuBot:$SearchMihuBot - } - elseif ($failure.TaskName) { - # If no specific errors, try searching by task name - Show-KnownIssues -TestName $failure.TaskName -IncludeMihuBot:$SearchMihuBot - } - } - } - } - } - - if (-not $failedJobs -or $failedJobs.Count -eq 0) { - Write-Host "`n=== Summary ===" -ForegroundColor Yellow - Write-Host "Local test failures: $($localTestFailures.Count)" -ForegroundColor Red - Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Cyan - continue - } - - Write-Host "`nFound $($failedJobs.Count) failed job(s):" -ForegroundColor Red - - # Show canceled jobs if any (these are different from failed) - if ($canceledJobs -and $canceledJobs.Count -gt 0) { - Write-Host "Also $($canceledJobs.Count) job(s) were canceled (due to earlier failures/timeouts):" -ForegroundColor DarkYellow - foreach ($job in $canceledJobs | Select-Object -First 3) { - Write-Host " - $($job.name)" -ForegroundColor DarkGray - } - if ($canceledJobs.Count -gt 3) { - Write-Host " ... and $($canceledJobs.Count - 3) more" -ForegroundColor DarkGray - } - } - - $processedJobs = 0 - $errorCount = 0 - foreach ($job in $failedJobs) { - if ($processedJobs -ge $MaxJobs) { - Write-Host "`n... and $($failedJobs.Count - $MaxJobs) more failed jobs (use -MaxJobs to see more)" -ForegroundColor Yellow - break - } - - try { - Write-Host "`n--- $($job.name) ---" -ForegroundColor Cyan - Write-Host " Build: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($job.id)" -ForegroundColor Gray - - # Track per-job failure details for JSON summary - $jobDetail = [ordered]@{ - jobName = $job.name - buildId = $currentBuildId - errorSnippet = "" - helixWorkItems = @() - errorCategory = "unclassified" - } - - # Get Helix tasks for this job - $helixTasks = Get-HelixJobInfo -Timeline $timeline -JobId $job.id - - if ($helixTasks) { - foreach ($task in $helixTasks) { - if ($task.log) { - Write-Host " Fetching Helix task log..." -ForegroundColor Gray - $logContent = Get-BuildLog -Build $currentBuildId -LogId $task.log.id - - if ($logContent) { - # Extract test failures - $failures = Extract-TestFailures -LogContent $logContent - - if ($failures.Count -gt 0) { - Write-Host " Failed tests:" -ForegroundColor Red - foreach ($failure in $failures) { - Write-Host " - $($failure.TestName)" -ForegroundColor White - } - - # Collect for PR correlation - $allFailuresForCorrelation += @{ - TaskName = $task.name - JobName = $job.name - Errors = @() - HelixLogs = @() - FailedTests = $failures | ForEach-Object { $_.TestName } - } - $jobDetail.errorCategory = "test-failure" - $jobDetail.errorSnippet = ($failures | Select-Object -First 3 | ForEach-Object { $_.TestName }) -join "; " - } - - # Extract and optionally fetch Helix URLs - $helixUrls = Extract-HelixUrls -LogContent $logContent - - if ($helixUrls.Count -gt 0 -and $ShowLogs) { - Write-Host "`n Helix Console Logs:" -ForegroundColor Yellow - - foreach ($url in $helixUrls | Select-Object -First 3) { - Write-Host "`n $url" -ForegroundColor Gray - - # Extract work item name from URL for known issue search - $workItemName = "" - if ($url -match '/workitems/([^/]+)/console') { - $workItemName = $Matches[1] - $jobDetail.helixWorkItems += $workItemName - } - - $helixLog = Get-HelixConsoleLog -Url $url - if ($helixLog) { - $failureInfo = Format-TestFailure -LogContent $helixLog - if ($failureInfo) { - Write-Host $failureInfo -ForegroundColor White - - # Categorize failure from log content - if ($failureInfo -match 'Timed Out \(timeout') { - $jobDetail.errorCategory = "test-timeout" - } elseif ($failureInfo -match 'Exit Code:\s*(139|134|-4)' -or $failureInfo -match 'createdump') { - # Crash takes highest precedence — don't downgrade - if ($jobDetail.errorCategory -notin @("crash")) { - $jobDetail.errorCategory = "crash" - } - } elseif ($failureInfo -match 'Traceback \(most recent call last\)' -and $helixLog -match 'Tests run:.*Failures:\s*0') { - # Work item failed (non-zero exit from reporter crash) but all tests passed. - # The Python traceback is from Helix infrastructure, not from the test itself. - if ($jobDetail.errorCategory -notin @("crash", "test-timeout")) { - $jobDetail.errorCategory = "tests-passed-reporter-failed" - } - } elseif ($jobDetail.errorCategory -eq "unclassified") { - $jobDetail.errorCategory = "test-failure" - } - if (-not $jobDetail.errorSnippet) { - $jobDetail.errorSnippet = $failureInfo.Substring(0, [Math]::Min(200, $failureInfo.Length)) - } - - # Search for known issues - Show-KnownIssues -TestName $workItemName -ErrorMessage $failureInfo -IncludeMihuBot:$SearchMihuBot - } - else { - # No failure pattern matched — show tail of log - $lines = $helixLog -split "`n" - $lastLines = $lines | Select-Object -Last 20 - $tailText = $lastLines -join "`n" - Write-Host $tailText -ForegroundColor White - if (-not $jobDetail.errorSnippet) { - $jobDetail.errorSnippet = $tailText.Substring(0, [Math]::Min(200, $tailText.Length)) - } - Show-KnownIssues -TestName $workItemName -ErrorMessage $tailText -IncludeMihuBot:$SearchMihuBot - } - } - } - } - elseif ($helixUrls.Count -gt 0) { - Write-Host "`n Helix logs available (use -ShowLogs to fetch):" -ForegroundColor Yellow - foreach ($url in $helixUrls | Select-Object -First 3) { - Write-Host " $url" -ForegroundColor Gray - } - } - } - } - } - } - else { - # No Helix tasks - this is a build failure, extract actual errors - $buildTasks = $timeline.records | Where-Object { - $_.parentId -eq $job.id -and $_.result -eq "failed" - } - - foreach ($task in $buildTasks | Select-Object -First 3) { - Write-Host " Failed task: $($task.name)" -ForegroundColor Red - - # Fetch and parse the build log for actual errors - if ($task.log) { - $logUrl = "https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId&view=logs&j=$($job.id)&t=$($task.id)" - Write-Host " Log: $logUrl" -ForegroundColor Gray - $logContent = Get-BuildLog -Build $currentBuildId -LogId $task.log.id - - if ($logContent) { - $buildErrors = Extract-BuildErrors -LogContent $logContent - - if ($buildErrors.Count -gt 0) { - # Collect for PR correlation - $allFailuresForCorrelation += @{ - TaskName = $task.name - JobName = $job.name - Errors = $buildErrors - HelixLogs = @() - FailedTests = @() - } - $jobDetail.errorCategory = "build-error" - if (-not $jobDetail.errorSnippet) { - $snippet = ($buildErrors | Select-Object -First 2) -join "; " - $jobDetail.errorSnippet = $snippet.Substring(0, [Math]::Min(200, $snippet.Length)) - } - - # Extract Helix log URLs from the full log content - $helixLogUrls = Extract-HelixLogUrls -LogContent $logContent - - if ($helixLogUrls.Count -gt 0) { - Write-Host " Helix failures ($($helixLogUrls.Count)):" -ForegroundColor Red - foreach ($helixLog in $helixLogUrls | Select-Object -First 5) { - Write-Host " - $($helixLog.WorkItem)" -ForegroundColor White - Write-Host " Log: $($helixLog.Url)" -ForegroundColor Gray - } - if ($helixLogUrls.Count -gt 5) { - Write-Host " ... and $($helixLogUrls.Count - 5) more" -ForegroundColor Gray - } - } - else { - Write-Host " Build errors:" -ForegroundColor Red - foreach ($err in $buildErrors | Select-Object -First 5) { - Write-Host " $err" -ForegroundColor White - } - if ($buildErrors.Count -gt 5) { - Write-Host " ... and $($buildErrors.Count - 5) more errors" -ForegroundColor Gray - } - } - - # Search for known issues - Show-KnownIssues -ErrorMessage ($buildErrors -join "`n") -IncludeMihuBot:$SearchMihuBot - } - else { - Write-Host " (No specific errors extracted from log)" -ForegroundColor Gray - } - } - } - } - } - - $allFailedJobDetails += $jobDetail - $processedJobs++ - } - catch { - $errorCount++ - if ($ContinueOnError) { - Write-Warning " Error processing job '$($job.name)': $_" - } - else { - throw [System.Exception]::new("Error processing job '$($job.name)': $($_.Exception.Message)", $_.Exception) - } - } - } - - Write-Host "`n=== Build $currentBuildId Summary ===" -ForegroundColor Yellow - if ($allJobs.Count -gt 0) { - $parts = @() - if ($succeededJobs -gt 0) { $parts += "$succeededJobs passed" } - if ($warningJobs -gt 0) { $parts += "$warningJobs passed with warnings" } - if ($failedJobs.Count -gt 0) { $parts += "$($failedJobs.Count) failed" } - if ($canceledJobCount -gt 0) { $parts += "$canceledJobCount canceled" } - if ($skippedJobs -gt 0) { $parts += "$skippedJobs skipped" } - if ($pendingJobs -gt 0) { $parts += "$pendingJobs pending" } - $jobSummary = $parts -join ", " - $allSucceeded = ($failedJobs.Count -eq 0 -and $pendingJobs -eq 0 -and $canceledJobCount -eq 0 -and ($succeededJobs + $warningJobs + $skippedJobs) -eq $allJobs.Count) - $summaryColor = if ($allSucceeded) { "Green" } elseif ($failedJobs.Count -gt 0) { "Red" } else { "Cyan" } - Write-Host "Jobs: $($allJobs.Count) total ($jobSummary)" -ForegroundColor $summaryColor - } - else { - Write-Host "Failed jobs: $($failedJobs.Count)" -ForegroundColor Red - } - if ($localTestFailures.Count -gt 0) { - Write-Host "Local test failures: $($localTestFailures.Count)" -ForegroundColor Red - } - if ($errorCount -gt 0) { - Write-Host "API errors (partial results): $errorCount" -ForegroundColor Yellow - } - Write-Host "Build URL: https://dev.azure.com/$Organization/$Project/_build/results?buildId=$currentBuildId" -ForegroundColor Cyan -} - -# Show PR change correlation if we have changed files -if ($prChangedFiles.Count -gt 0 -and $allFailuresForCorrelation.Count -gt 0) { - Show-PRCorrelationSummary -ChangedFiles $prChangedFiles -AllFailures $allFailuresForCorrelation -} - -# Overall summary if multiple builds -if ($buildIds.Count -gt 1) { - Write-Host "`n=== Overall Summary ===" -ForegroundColor Magenta - Write-Host "Analyzed $($buildIds.Count) builds" -ForegroundColor White - Write-Host "Total failed jobs: $totalFailedJobs" -ForegroundColor Red - Write-Host "Total local test failures: $totalLocalFailures" -ForegroundColor Red - - if ($knownIssuesFromBuildAnalysis.Count -gt 0) { - Write-Host "`nKnown Issues (from Build Analysis):" -ForegroundColor Yellow - foreach ($issue in $knownIssuesFromBuildAnalysis) { - Write-Host " - #$($issue.Number): $($issue.Title)" -ForegroundColor Gray - Write-Host " $($issue.Url)" -ForegroundColor DarkGray - } - } -} - -# Build structured summary and emit as JSON -$summary = [ordered]@{ - mode = $PSCmdlet.ParameterSetName - repository = $Repository - prNumber = if ($PSCmdlet.ParameterSetName -eq 'PRNumber') { $PRNumber } else { $null } - builds = @($buildIds | ForEach-Object { - [ordered]@{ - buildId = $_ - url = "https://dev.azure.com/$Organization/$Project/_build/results?buildId=$_" - } - }) - totalFailedJobs = $totalFailedJobs - totalLocalFailures = $totalLocalFailures - lastBuildJobSummary = if ($lastBuildJobSummary) { $lastBuildJobSummary } else { [ordered]@{ - total = 0; succeeded = 0; failed = 0; canceled = 0; pending = 0; warnings = 0; skipped = 0 - } } - failedJobNames = @($allFailedJobNames) - failedJobDetails = @($allFailedJobDetails) - failedJobDetailsTruncated = ($allFailedJobNames.Count -gt $allFailedJobDetails.Count) - canceledJobNames = @($allCanceledJobNames) - knownIssues = @($knownIssuesFromBuildAnalysis | ForEach-Object { - [ordered]@{ number = $_.Number; title = $_.Title; url = $_.Url } - }) - prCorrelation = [ordered]@{ - changedFileCount = $prChangedFiles.Count - hasCorrelation = $false - correlatedFiles = @() - } - recommendationHint = "" -} - -# Compute PR correlation using shared helper -if ($prChangedFiles.Count -gt 0 -and $allFailuresForCorrelation.Count -gt 0) { - $correlation = Get-PRCorrelation -ChangedFiles $prChangedFiles -AllFailures $allFailuresForCorrelation - $allCorrelated = @($correlation.CorrelatedFiles) + @($correlation.TestFiles) | Select-Object -Unique - $summary.prCorrelation.hasCorrelation = $allCorrelated.Count -gt 0 - $summary.prCorrelation.correlatedFiles = @($allCorrelated) -} - -# Compute recommendation hint -# Priority: KNOWN_ISSUES wins over LIKELY_PR_RELATED intentionally. -# When both exist, SKILL.md "Mixed signals" guidance tells the agent to separate them. -if (-not $lastBuildJobSummary -and $buildIds.Count -gt 0) { - $summary.recommendationHint = "REVIEW_REQUIRED" -} elseif ($knownIssuesFromBuildAnalysis.Count -gt 0) { - $summary.recommendationHint = "KNOWN_ISSUES_DETECTED" -} elseif ($totalFailedJobs -eq 0 -and $totalLocalFailures -eq 0) { - $summary.recommendationHint = "BUILD_SUCCESSFUL" -} elseif ($summary.prCorrelation.hasCorrelation) { - $summary.recommendationHint = "LIKELY_PR_RELATED" -} elseif ($prChangedFiles.Count -gt 0 -and $allFailuresForCorrelation.Count -gt 0) { - $summary.recommendationHint = "POSSIBLY_TRANSIENT" -} else { - $summary.recommendationHint = "REVIEW_REQUIRED" -} - -Write-Host "" -Write-Host "[CI_ANALYSIS_SUMMARY]" -Write-Host ($summary | ConvertTo-Json -Depth 5) -Write-Host "[/CI_ANALYSIS_SUMMARY]" - -} -catch { - Write-Error "Error: $_" - exit 1 -} - -#endregion Main Execution diff --git a/.github/skills/vmr-codeflow-status/SKILL.md b/.github/skills/vmr-codeflow-status/SKILL.md deleted file mode 100644 index f80352e5275bd3..00000000000000 --- a/.github/skills/vmr-codeflow-status/SKILL.md +++ /dev/null @@ -1,230 +0,0 @@ ---- -name: vmr-codeflow-status -description: Analyze VMR codeflow PR status for dotnet repositories. Use when investigating stale codeflow PRs, checking if fixes have flowed through the VMR pipeline, debugging dependency update issues in PRs authored by dotnet-maestro[bot], checking overall flow status for a repo, or diagnosing why backflow PRs are missing or blocked. ---- - -# VMR Codeflow Status - -Analyze the health of VMR codeflow PRs in both directions: -- **Backflow**: `dotnet/dotnet` → product repos (e.g., `dotnet/sdk`) -- **Forward flow**: product repos → `dotnet/dotnet` - -> 🚨 **NEVER** use `gh pr review --approve` or `--request-changes`. Only `--comment` is allowed. Approval and blocking are human-only actions. - -> 📝 **AI-generated content disclosure:** When posting any content to GitHub (PR comments, analysis summaries) under a user's credentials — i.e., the account is **not** a dedicated "copilot" or "bot" account/app — you **MUST** include a concise, visible note (e.g. a `> [!NOTE]` alert) indicating the content was AI/Copilot-generated. Skip this if the user explicitly asks you to omit it. - -**Workflow**: Run the script → read the human-readable output + `[CODEFLOW_SUMMARY]` JSON → synthesize recommendations yourself. The script collects data; you generate the advice. - -## Prerequisites - -- **GitHub CLI (`gh`)** — must be installed and authenticated (`gh auth login`) -- Run scripts **from the skill directory** or use the full path to the script - -## When to Use This Skill - -Use this skill when: -- A codeflow PR (from `dotnet-maestro[bot]`) has failing tests and you need to know if it's stale -- You need to check if a specific fix has flowed through the VMR pipeline to a codeflow PR -- A PR has a Maestro staleness warning ("codeflow cannot continue") or conflict -- You need to understand what manual commits would be lost if a codeflow PR is closed -- You want to check the overall state of flow for a repo (backflow and forward flow health) -- You need to know why backflow PRs are missing or when the last VMR build was published -- You're asked questions like "is this codeflow PR up to date", "has the runtime revert reached this PR", "why is the codeflow blocked", "what is the state of flow for the sdk", "what's the flow status for net11" - -## Two Modes - -| Mode | Use When | Required Params | -|------|----------|-----------------| -| **PR analysis** | Investigating a specific codeflow PR | `-PRNumber` (and optionally `-Repository`) | -| **Flow health** (`-CheckMissing`) | Checking overall repo flow status | `-CheckMissing` (optional: `-Repository`, `-Branch`) | - -> ⚠️ **Common mistake**: Don't use `-PRNumber` and `-CheckMissing` together — they are separate modes. `-CheckMissing` scans branches discovered from open and recent backflow PRs (unless `-Branch` is provided), not a specific PR. - -## Quick Start - -```powershell -# Check codeflow PR status (most common) -./scripts/Get-CodeflowStatus.ps1 -PRNumber 52727 -Repository "dotnet/sdk" - -# Trace a specific fix through the pipeline -./scripts/Get-CodeflowStatus.ps1 -PRNumber 52727 -Repository "dotnet/sdk" -TraceFix "dotnet/runtime#123974" - -# Show individual VMR commits that are missing -./scripts/Get-CodeflowStatus.ps1 -PRNumber 52727 -Repository "dotnet/sdk" -ShowCommits - -# Check overall flow health for a repo (backflow + forward flow) -./scripts/Get-CodeflowStatus.ps1 -Repository "dotnet/roslyn" -CheckMissing - -# Check a specific branch only -./scripts/Get-CodeflowStatus.ps1 -Repository "dotnet/sdk" -CheckMissing -Branch "main" -``` - -## Key Parameters - -| Parameter | Required | Default | Description | -|-----------|----------|---------|-------------| -| `-PRNumber` | Yes (unless `-CheckMissing`) | — | GitHub PR number to analyze | -| `-Repository` | No | `dotnet/sdk` | Target repo in `owner/repo` format | -| `-TraceFix` | No | — | Trace a repo PR through the pipeline. Format: `owner/repo#number` (e.g., `dotnet/runtime#123974`) | -| `-ShowCommits` | No | `$false` | Show individual VMR commits between PR snapshot and branch HEAD | -| `-CheckMissing` | No | `$false` | Check overall flow health: missing backflow PRs, forward flow status, and official build freshness | -| `-Branch` | No | — | With `-CheckMissing`, only check a specific branch (e.g., `main`, `release/10.0`) | - -## What the Script Does - -### PR Analysis Mode (default) - -> **Design principle**: Assess current state from primary signals first, then use Maestro comments as historical context — not the other way around. Comments tell you the history, not the present. - -1. **PR Overview** — Basic PR info, flow direction (backflow vs forward flow) -2. **Current State** — Independent assessment from primary signals: empty diff, force pushes, merge status. Produces a one-line verdict (NO-OP / IN PROGRESS / STALE / ACTIVE / MERGED / CLOSED) before reading any comments -3. **Codeflow Metadata** — Extracts VMR commit, subscription ID, build info from PR body -4. **Snapshot Validation** — Cross-references PR body commit against Version.Details.xml and branch commits to detect stale metadata -5. **Source Freshness** — Compares PR's VMR snapshot against current VMR branch HEAD; shows pending forward flow PRs -6. **PR Branch Analysis** — Categorizes commits as auto-updates vs manual; detects codeflow-like manual commits -7. **Codeflow History** — Maestro comments as historical context (conflict/staleness warnings), cross-referenced against force push timestamps to determine if issues were already addressed -8. **Traces fixes** (with `-TraceFix`) — Checks if a specific fix has flowed through VMR → codeflow PR -9. **Emits structured summary** — `[CODEFLOW_SUMMARY]` JSON block with all key facts for the agent to reason over - -> **After the script runs**, you (the agent) generate recommendations. The script collects data; you synthesize the advice. See [Generating Recommendations](#generating-recommendations) below. - -### Flow Health Mode (`-CheckMissing`) -1. **Checks official build freshness** — Queries `aka.ms` shortlinks for latest published VMR build dates per channel -2. **Scans backflow PRs** — Finds branches where a backflow PR should exist but doesn't, and checks health of open PRs (conflict/staleness/resolved status) -3. **Scans forward flow** — Checks open forward flow PRs into `dotnet/dotnet` for staleness and conflicts -4. **Produces summary** — Counts healthy/blocked/missing PRs across both directions - -> ❌ **Never assume "Unknown" health means healthy.** When `gh` API calls fail (auth, rate limiting), the script returns "Unknown" status — this is explicitly excluded from healthy/covered counts. - -> ⚠️ **aka.ms redirect behavior**: 301 is expected and treated as a valid product URL (→ ci.dot.net). Non-301 redirects (often 302, which goes to Bing) indicate an invalid URL. The script only accepts 301. - -## Interpreting Results - -### Current State (assessed first, from primary signals) -- **✅ MERGED**: PR has been merged — no action needed -- **✖️ CLOSED**: PR was closed without merging — Maestro should create a replacement -- **📭 NO-OP**: Empty diff — PR likely already resolved, changes landed via other paths -- **🔄 IN PROGRESS**: Recent force push within 24h — someone is actively working on it -- **⏳ STALE**: No activity for >3 days — may need attention -- **✅ ACTIVE**: PR has content and recent activity - -### Freshness -- **✅ Up to date**: PR has the latest VMR snapshot -- **⚠️ VMR is N commits ahead**: The PR is missing updates. Check if the missing commits contain the fix you need. -- **📊 Forward flow coverage**: Shows how many missing repos have pending forward flow PRs that would close part of the gap once merged. - -### Snapshot Validation -- **✅ Match**: PR body commit matches the branch's actual "Backflow from" commit -- **⚠️ Mismatch**: PR body is stale — the script automatically uses the branch-derived commit for freshness checks -- **ℹ️ Initial commit only**: PR body can't be verified yet (no "Backflow from" commit exists) - -### Codeflow History (Maestro comments as context) -- **✅ No warnings**: Maestro can freely update the PR -- **⚠️ Staleness warning**: A forward flow merged while this backflow PR was open. Maestro blocked further updates. -- **🔴 Conflict detected**: Maestro found merge conflicts. Shows conflicting files and `darc vmr resolve-conflict` command. -- **ℹ️ Force push after warning**: When a force push post-dates a conflict/staleness warning, the issue may already be resolved. The script cross-references timestamps automatically. - -### Manual Commits -Manual commits on the PR branch are at risk if the PR is closed or force-triggered. The script lists them so you can decide whether to preserve them. - -### Fix Tracing -When using `-TraceFix`: -- **✅ Fix is in VMR manifest**: The fix has flowed to the VMR -- **✅ Fix is in PR snapshot**: The codeflow PR already includes this fix -- **❌ Fix is NOT in PR snapshot**: The PR needs a codeflow update to get this fix - -## Generating Recommendations - -After the script outputs the `[CODEFLOW_SUMMARY]` JSON block, **you** synthesize recommendations. Do not parrot the JSON — reason over it. - -### Decision logic - -Check `isCodeflowPR` first — if `false`, skip all codeflow-specific advice: -- **Not a codeflow PR** (`isCodeflowPR = false` or `flowDirection = "unknown"`): State this clearly. No darc commands, no codeflow recommendations. Treat as a normal PR. - -Then read `currentState`: - -| State | Action | -|-------|--------| -| `MERGED` | No action needed. Mention Maestro will create a new PR if VMR has newer content. | -| `CLOSED` | Suggest triggering a new PR if `subscriptionId` is available. | -| `NO-OP` | PR has no meaningful changes. Recommend closing or merging to clear state. If `subscriptionId` is available, offer force-trigger as a third option. | -| `IN_PROGRESS` | Someone is actively working. Recommend waiting, then checking back. | -| `STALE` | Needs attention — see warnings below for what's blocking. | -| `ACTIVE` | PR is healthy — check freshness and warnings for nuance. | - -Then layer in context from `warnings`, `freshness`, and `commits`: - -- **Unresolved conflict** (`warnings.conflictCount > 0`, `conflictMayBeResolved = false`): Lead with "resolve conflicts" using `darc vmr resolve-conflict --subscription `. Offer "close & reopen" as alternative. -- **Conflict may be resolved** (`conflictMayBeResolved = true`): Note the force push post-dates the conflict warning. Suggest verifying, then merging. -- **Staleness warning active** (`stalenessCount > 0`, `stalenessMayBeResolved = false`): Codeflow is blocked. Options: merge as-is, force trigger, or close & reopen. -- **Manual commits present** (`commits.manual > 0`): Warn that force-trigger or close will lose them. If `commits.codeflowLikeManual > 0`, note the freshness gap may be partially covered. -- **Behind on freshness** (`freshness.aheadBy > 0`): Mention the PR is missing updates. If staleness is blocking, a force trigger is needed. Otherwise, Maestro should auto-update. - -### Darc commands to include - -When recommending actions, include the relevant `darc` command with the actual `subscriptionId` from the summary. Be precise about what each command does: - -| Command | What it does | When to use | -|---------|-------------|-------------| -| `darc trigger-subscriptions --id ` | Normal trigger — only works if subscription isn't stale. Creates a new PR if none exists. | PR was closed, or no PR exists | -| `darc trigger-subscriptions --id --force` | Force trigger — **overwrites the existing PR branch** with fresh VMR content. Does not create a new PR. | PR exists but is stale/no-op and you want to reuse it | -| `darc vmr resolve-conflict --subscription ` | Resolve conflicts locally and push to the PR branch | PR has merge conflicts | - -> ⚠️ **Common mistake**: Don't say "close then force-trigger" — force-trigger pushes to the *existing* PR. If you close first, use a normal trigger instead (which creates a new PR). The two paths are: (A) force-trigger to refresh the existing PR, or (B) close + normal-trigger to get a new PR. - -### Tone - -Be direct. Lead with the most important action. Use 2-4 bullet points, not long paragraphs. Include the darc command inline so the user can copy-paste. - -## Darc Commands for Remediation - -After analyzing the codeflow status, common next steps involve `darc` commands: - -```bash -# Force trigger the subscription to get a fresh codeflow update -darc trigger-subscriptions --id --force - -# Normal trigger (only works if not stale) -darc trigger-subscriptions --id - -# Check subscription details -darc get-subscriptions --target-repo dotnet/sdk --source-repo dotnet/dotnet - -# Get BAR build details -darc get-build --id - -# Resolve codeflow conflicts locally -darc vmr resolve-conflict --subscription -``` - -Install darc via `eng\common\darc-init.ps1` in any arcade-enabled repository. - -### When the script reports "Maestro may be stuck" - -When the script shows a missing backflow PR with "Maestro may be stuck" (builds are fresh but no PR was created), follow these diagnostic steps: - -1. **Check the subscription** to find when it last consumed a build: - ```bash - darc get-subscriptions --target-repo --source-repo dotnet/dotnet - ``` - Look at the `Last Build` field — if it's weeks old while the channel has newer builds, the subscription is stuck. - -2. **Compare against the latest channel build** to confirm the gap: - ```bash - darc get-latest-build --repo dotnet/dotnet --channel "" - ``` - Channel names follow patterns like `.NET 11.0.1xx SDK`, `.NET 10.0.1xx SDK`, `.NET 11.0.1xx SDK Preview 1`. - -3. **Trigger the subscription manually** to unstick it: - ```bash - darc trigger-subscriptions --id - ``` - -4. **If triggering doesn't produce a PR within a few minutes**, the issue may be deeper — check Maestro health or open an issue on `dotnet/arcade`. - -## References - -- **VMR codeflow concepts**: See [references/vmr-codeflow-reference.md](references/vmr-codeflow-reference.md) -- **VMR build topology & staleness diagnosis**: See [references/vmr-build-topology.md](references/vmr-build-topology.md) — explains how to diagnose widespread backflow staleness by checking VMR build health, the bootstrap chicken-and-egg problem, and the channel/subscription flow -- **Codeflow PR documentation**: [dotnet/dotnet Codeflow-PRs.md](https://github.com/dotnet/dotnet/blob/main/docs/Codeflow-PRs.md) diff --git a/.github/skills/vmr-codeflow-status/references/vmr-build-topology.md b/.github/skills/vmr-codeflow-status/references/vmr-build-topology.md deleted file mode 100644 index bcbc82df8246e2..00000000000000 --- a/.github/skills/vmr-codeflow-status/references/vmr-build-topology.md +++ /dev/null @@ -1,252 +0,0 @@ -# VMR Build Topology and Staleness Diagnosis - -## Overview - -When backflow PRs are missing across multiple repositories simultaneously, the root cause -is usually not Maestro — it's that the VMR can't build successfully, so no new channel -builds are produced, and subscriptions have nothing to trigger on. - -This reference explains how to diagnose that situation using publicly available signals. - -## Build Pipeline Structure - -The VMR (`dotnet/dotnet`) has two tiers of builds: - -### Public CI (validation only) -- **AzDO org**: `dnceng-public` -- **Project**: `public` (ID: `cbb18261-c48f-4abb-8651-8cdcb5474649`) -- **Pipeline**: `dotnet-unified-build` (definition 278) -- **Purpose**: Validates PRs and runs scheduled CI on `refs/heads/main` and release branches -- **Does NOT publish** to Maestro channels — cannot trigger subscriptions - -### Official builds (channel publishing) -- **AzDO org**: `dnceng` (internal, requires auth) -- **Purpose**: Produces signed builds that publish to Maestro channels (e.g., `.NET 11.0.1xx SDK`) -- **These are the builds that trigger Maestro subscriptions and create backflow PRs** -- Not queryable without internal access - -### Key insight -When investigating stale backflow, the **public CI builds are a useful proxy**. If the public -scheduled build on `refs/heads/main` is failing, the official build is almost certainly -failing too (they build the same source). A string of failed public builds strongly suggests -the official pipeline is also broken. - -## Checking Official Build Freshness (aka.ms) - -The most direct way to check if official VMR builds are producing output is to query -the SDK blob storage via `aka.ms` shortlinks. When official builds succeed, they publish -SDK artifacts to `ci.dot.net`. We can check when the latest build was published. - -### How it works - -1. Resolve the aka.ms redirect (returns 301 with the blob URL): - ``` - https://aka.ms/dotnet/{channel}/daily/dotnet-sdk-win-x64.zip - ``` - Example channels: `11.0.1xx`, `11.0.1xx-preview1`, `10.0.3xx`, `10.0.1xx` - -2. The 301 Location header gives the actual blob URL on `ci.dot.net`, which includes - the version number in the path. - -3. HEAD the blob URL — the `Last-Modified` header tells you exactly when the build was - published. - -### Example (PowerShell) - -```powershell -Add-Type -AssemblyName System.Net.Http -$handler = [System.Net.Http.HttpClientHandler]::new() -$handler.AllowAutoRedirect = $false -$client = [System.Net.Http.HttpClient]::new($handler) - -# Step 1: Resolve aka.ms → ci.dot.net blob URL -$resp = $client.GetAsync("https://aka.ms/dotnet/11.0.1xx/daily/dotnet-sdk-win-x64.zip").Result -$blobUrl = $resp.Headers.Location.ToString() # Only if StatusCode is 301 -$resp.Dispose() - -# Step 2: HEAD the blob for Last-Modified -$head = Invoke-WebRequest -Uri $blobUrl -Method Head -UseBasicParsing -$published = [DateTimeOffset]::Parse($head.Headers['Last-Modified']).UtcDateTime -$age = [DateTime]::UtcNow - $published - -$client.Dispose() -$handler.Dispose() -``` - -### Interpreting results -- **< 1 day old**: Official builds are healthy for this channel -- **1-2 days old**: Normal for daily builds, especially over weekends -- **3+ days old**: Official builds are likely failing — investigate further -- **Multiple channels stale simultaneously**: Strong signal of a systemic VMR build problem - -### Validating with darc (when auth is available) - -The aka.ms approach is an auth-free proxy. When `darc` is installed and authenticated, -you can get the authoritative answer directly from Maestro: - -```bash -# Latest build on a channel (exact match for what triggers subscriptions) -darc get-latest-build --repo dotnet/dotnet --channel ".NET 11.0.1xx SDK" - -# Check what build a subscription last acted on -darc get-subscriptions --source-repo dotnet/dotnet --target-repo dotnet/aspnetcore -``` - -The `Date Produced` from `darc get-latest-build` will be ~6 hours earlier than the -aka.ms blob `Last-Modified` (due to signing/publishing delay), but they refer to the -same build. If the subscription's `Last Build` SHA matches the channel's latest build, -then Maestro already fired — no newer builds exist. - -### Channel-to-branch mapping - -| Channel | VMR branch | Backflow targets | -|---------|-----------|-----------------| -| `11.0.1xx` | `main` | runtime, sdk, aspnetcore (main) | -| `11.0.1xx-preview1` | `release/11.0.1xx-preview1` | runtime, sdk, aspnetcore (preview) | -| `10.0.3xx` | `release/10.0.3xx` | sdk (release/10.0.3xx) | -| `10.0.2xx` | `release/10.0.2xx` | sdk (release/10.0.2xx) | -| `10.0.1xx` | `release/10.0.1xx` | runtime, sdk, aspnetcore (release/10.0) | - -### Cross-referencing with Version.Details.xml and PR metadata - -There are two sources of truth for what VMR build a repo is synced to: - -**1. `eng/Version.Details.xml` in the target repo (authoritative):** -```xml - -``` -- `Sha` = the exact VMR commit the repo is synced to -- `BarId` = the Maestro build ID (queryable via `darc get-build --id 297974` for date/channel) -- Dependency version strings encode build dates (e.g., `26069.105` → year 26, day-code 069) - -**2. Backflow PR body (when a PR is open):** -``` -- **Date Produced**: February 4, 2026 11:05:10 AM UTC -- **Build**: [20260203.11](...) ([300217](https://maestro.dot.net/channel/8298/.../build/300217)) -``` - -**Comparing against aka.ms build date:** -- If they match → the backflow PR is based on the latest successful build -- If the aka.ms build is newer → a newer build succeeded but hasn't triggered backflow yet -- If the aka.ms build matches the PR but is old → no new successful builds since - -## Querying Public VMR CI Builds - -Public CI builds (separate from official builds) can confirm whether the VMR source is -buildable. These don't publish to channels but use the same source. - -### AzDO REST API endpoints - -Recent scheduled builds on a branch: -``` -GET https://dev.azure.com/dnceng-public/public/_apis/build/builds?definitions=278&branchName=refs/heads/main&$top=5&api-version=7.0 -``` - -Last successful build: -``` -GET https://dev.azure.com/dnceng-public/public/_apis/build/builds?definitions=278&branchName=refs/heads/main&resultFilter=succeeded&$top=1&api-version=7.0 -``` - -Build timeline (to find failing jobs): -``` -GET https://dev.azure.com/dnceng-public/public/_apis/build/builds/{buildId}/timeline?api-version=7.0 -``` - -### Interpreting results -- **`reason: schedule`** — Scheduled daily builds, closest proxy to official builds -- **`reason: pullRequest`** — PR validation only -- **`result: failed`** with consecutive scheduled builds — strong signal of broken VMR -- Check the timeline for which jobs/stages failed to understand the root cause - -## Diagnosing Widespread Backflow Staleness - -### Pattern: Multiple repos missing backflow simultaneously - -When `CheckMissing` shows missing backflow across 3+ repos (e.g., runtime, SDK, aspnetcore -all stale), this is almost always a VMR build problem, not a Maestro problem. - -**Diagnosis steps:** - -1. **Check public VMR builds**: Query the last 5 scheduled builds on the affected branch. - If all are failing, the VMR build is broken. - -2. **Find the failure**: Get the timeline of the most recent failed build. Look for failed - stages/jobs — common failures include: - - **macOS signing** (SignTool crashes on non-PE files) - - **Windows build** (individual repo build failures within the VMR) - - **Source-build validation** (packaging or dependency issues) - -3. **Check for known issues**: Search `dotnet/dotnet` issues with label `[Operational Issue]` - or search for the error message. - -4. **Check the last successful build date**: A gap of days or weeks confirms the VMR has been - broken for an extended period. - -### Pattern: Single repo missing backflow - -When only one repo is missing backflow but others are healthy, the issue is more likely: -- Maestro subscription disabled or misconfigured -- The specific repo's forward flow is blocking (conflict or staleness) -- Channel mismatch - -Use `darc get-subscriptions --source-repo dotnet/dotnet --target-repo dotnet/` to check. - -## The Bootstrap / Chicken-and-Egg Problem - -The VMR builds arcade and other infrastructure from source. When an infrastructure fix -(e.g., in `dotnet/arcade`) is needed to unblock the VMR build itself, a circular dependency -can occur: - -1. Arcade fix merges in `dotnet/arcade` -2. Arcade forward-flows to VMR (`dotnet/dotnet`) -3. VMR now has the fix **in source**, but the build tooling used to build may still be the - old version (from a previous successful bootstrap) -4. The build fails because the **bootstrap SDK** (cached from a prior build) doesn't have - the fix yet - -**Resolution** (by VMR maintainers): -- Re-bootstrap: Build a new `source-built-sdks` package from a working state -- Manual intervention: Patch the bootstrap or skip the failing step -- Wait for a full re-bootstrap cycle after a milestone release - -This is not something that can be fixed by triggering subscriptions or resolving conflicts. -When you see this pattern, flag it as needing VMR infrastructure team intervention. - -## Channels and Subscription Flow - -``` -dotnet/arcade ──forward flow──► dotnet/dotnet (VMR) -dotnet/runtime ─forward flow──► dotnet/dotnet (VMR) -dotnet/sdk ────forward flow──► dotnet/dotnet (VMR) - ...other repos... - -dotnet/dotnet (VMR) - │ - ├── official build succeeds - │ │ - │ ▼ - │ publishes to channel (e.g., ".NET 11.0.1xx SDK") - │ │ - │ ▼ - │ Maestro fires subscriptions - │ │ - │ ├──► dotnet/runtime backflow PR - │ ├──► dotnet/sdk backflow PR - │ ├──► dotnet/aspnetcore backflow PR - │ └──► ...etc - │ - └── official build FAILS - │ - ▼ - nothing publishes → no subscriptions fire → all backflow stalls -``` - -## Quick Reference: Common VMR Build Failures - -| Failure | Symptom | Root cause | -|---------|---------|------------| -| SignTool crash | `Unknown file format` in Sign.proj on macOS | Non-PE file in signing input (e.g., tar.gz) | -| Repo build failure | `error MSB...` in a specific repo's build | Source incompatibility within VMR | -| Source-build validation | Packaging or prebuilt detection errors | New prebuilt dependency introduced | -| Infrastructure timeout | Build exceeds time limit | Resource contention or build perf regression | diff --git a/.github/skills/vmr-codeflow-status/references/vmr-codeflow-reference.md b/.github/skills/vmr-codeflow-status/references/vmr-codeflow-reference.md deleted file mode 100644 index cfbb5e40fb1f55..00000000000000 --- a/.github/skills/vmr-codeflow-status/references/vmr-codeflow-reference.md +++ /dev/null @@ -1,144 +0,0 @@ -# VMR Codeflow Reference - -## Key Concepts - -### Codeflow Types -- **Backflow** (VMR → product repo): Automated PRs created by Maestro that bring VMR source updates + dependency updates into product repos (e.g., `dotnet/sdk`). These are titled `[branch] Source code updates from dotnet/dotnet`. -- **Forward flow** (product repo → VMR): Changes from product repos flowing into the VMR. These are titled `[branch] Source code updates from dotnet/`. - -### Staleness -When a product repo pushes changes to the VMR (forward flow merges) while a backflow PR is already open, Maestro blocks further codeflow updates to that PR. The bot posts a warning comment with options: -1. Merge the PR as-is, then Maestro creates a new PR with remaining changes -2. Close the PR and let Maestro open a fresh one (loses manual commits) -3. Force trigger: `darc trigger-subscriptions --id --force` (manual commits may be reverted) - -### Key Files -- **`src/source-manifest.json`** (in VMR): Tracks the exact commit SHA for each product repo synchronized into the VMR. This is the authoritative source of truth. -- **`eng/Version.Details.xml`** (in product repos): Tracks dependencies and includes a `` tag for codeflow tracking. - -## PR Body Metadata Format - -Codeflow PRs have structured metadata in their body: - -``` -[marker]: <> (Begin:) -## From https://github.com/dotnet/dotnet -- **Subscription**: [](https://maestro.dot.net/subscriptions?search=) -- **Build**: []() ([]()) -- **Date Produced**: -- **Commit**: []() -- **Commit Diff**: [...]() -- **Branch**: []() -[marker]: <> (End:) -``` - -## Darc CLI Commands - -The `darc` tool (Dependency ARcade) manages dependency flow in the .NET ecosystem. Install via `eng\common\darc-init.ps1` in any arcade-enabled repo. - -### Essential Commands for Codeflow Analysis - -#### Get subscription details -```bash -# Find all subscriptions flowing to a repo -darc get-subscriptions --target-repo dotnet/sdk --source-repo dotnet/dotnet - -# Output shows subscription ID, channel, update frequency, merge policies -``` - -#### Trigger a codeflow update -```bash -# Normal trigger (only works if not stale) -darc trigger-subscriptions --id - -# Force trigger (works even when stale, but may revert manual commits) -darc trigger-subscriptions --id --force - -# Trigger with a specific build -darc trigger-subscriptions --id --build -``` - -#### Get build information -```bash -# Get BAR build details by ID (found in PR body or AzDO logs) -darc get-build --id - -# Get latest build for a repo on a channel -darc get-latest-build --repo dotnet/dotnet --channel ".NET 11 Preview 1" -``` - -#### Check subscription health -```bash -# See if dependencies are missing subscriptions or have issues -darc get-health --channel ".NET 11 Preview 1" -``` - -#### Simulate a subscription update locally -```bash -# Dry-run to see what a subscription would update -darc update-dependencies --subscription --dry-run -``` - -### VMR-Specific Commands - -```bash -# Resolve codeflow conflicts locally -darc vmr resolve-conflict --subscription --build - -# Flow source from VMR → local repo -darc vmr backflow --subscription - -# Flow source from local repo → local VMR -darc vmr forwardflow --subscription - -# Get version (SHA) of a repo in the VMR -darc vmr get-version - -# Diff VMR vs product repos -darc vmr diff -``` - -### Halting and Restarting Dependency Flow - -- **Disable default channel**: `darc default-channel-status --disable --id ` — stops new builds from flowing -- **Disable subscription**: `darc subscription-status --disable --id ` — stops flow between specific repos -- **Pin dependency**: Add `Pinned="true"` to dependency in `Version.Details.xml` — prevents specific dependency from updating - -## API Endpoints - -### GitHub API -- PR details: `GET /repos/{owner}/{repo}/pulls/{pr_number}` -- PR comments: `GET /repos/{owner}/{repo}/issues/{pr_number}/comments` -- PR commits: `GET /repos/{owner}/{repo}/pulls/{pr_number}/commits` -- Compare commits: `GET /repos/{owner}/{repo}/compare/{base}...{head}` -- File contents: `GET /repos/{owner}/{repo}/contents/{path}?ref={branch}` - -### VMR Source Manifest -``` -GET /repos/dotnet/dotnet/contents/src/source-manifest.json?ref={branch} -``` -Returns JSON with `repositories[]` array, each having `path`, `remoteUri`, `commitSha`. - -### Maestro/BAR REST API -Base URL: `https://maestro.dot.net` -- Swagger: `https://maestro.dot.net/swagger` -- Get subscriptions: `GET /api/subscriptions` -- Get builds: `GET /api/builds` -- Get build by ID: `GET /api/builds/{id}` - -## Common Scenarios - -### 1. Codeflow is stale — a fix landed but hasn't reached the PR -**Symptoms**: Tests failing on the codeflow PR; the fix is merged in a product repo. -**Diagnosis**: Compare `source-manifest.json` on VMR branch HEAD vs the PR's VMR snapshot commit. -**Resolution**: Close PR + reopen, or force trigger the subscription. - -### 2. Opposite codeflow merged — staleness warning -**Symptoms**: Maestro bot comment saying "codeflow cannot continue". -**Diagnosis**: Check PR comments for the warning. Check if forward flow PRs merged after the backflow PR was opened. -**Resolution**: Follow the options in the bot's comment. - -### 3. Manual commits on the codeflow PR -**Symptoms**: Developers added manual fixes to unblock the PR (baseline updates, workarounds). -**Diagnosis**: Analyze PR commits to identify non-maestro commits. -**Risk**: Closing the PR loses these. Force-triggering may revert them. diff --git a/.github/skills/vmr-codeflow-status/scripts/Get-CodeflowStatus.ps1 b/.github/skills/vmr-codeflow-status/scripts/Get-CodeflowStatus.ps1 deleted file mode 100644 index 9c968ec8e9931d..00000000000000 --- a/.github/skills/vmr-codeflow-status/scripts/Get-CodeflowStatus.ps1 +++ /dev/null @@ -1,1476 +0,0 @@ -<# -.SYNOPSIS - Analyzes VMR codeflow PR status for dotnet repositories. - -.DESCRIPTION - Checks whether a codeflow PR (backflow from dotnet/dotnet VMR) is up to date, - detects staleness warnings, traces specific fixes through the pipeline, and - provides actionable recommendations. - - Can also check if a backflow PR is expected but missing for a given repo/branch. - -.PARAMETER PRNumber - GitHub PR number to analyze. Required unless -CheckMissing is used. - -.PARAMETER Repository - Target repository (default: dotnet/sdk). Format: owner/repo. - -.PARAMETER TraceFix - Optional. A repo PR to trace through the pipeline (e.g., "dotnet/runtime#123974"). - Checks if the fix has flowed through VMR into the codeflow PR. - -.PARAMETER ShowCommits - Show individual VMR commits between the PR snapshot and current branch HEAD. - -.PARAMETER CheckMissing - Check if backflow PRs are expected but missing for a repository. When used, - PRNumber is not required. Finds the most recent merged backflow PR for each branch, - extracts its VMR commit, and compares against current VMR branch HEAD. - -.PARAMETER Branch - Optional. When used with -CheckMissing, only check a specific branch instead of all. - -.EXAMPLE - ./Get-CodeflowStatus.ps1 -PRNumber 52727 -Repository "dotnet/sdk" - -.EXAMPLE - ./Get-CodeflowStatus.ps1 -PRNumber 52727 -Repository "dotnet/sdk" -TraceFix "dotnet/runtime#123974" - -.EXAMPLE - ./Get-CodeflowStatus.ps1 -Repository "dotnet/roslyn" -CheckMissing - -.EXAMPLE - ./Get-CodeflowStatus.ps1 -Repository "dotnet/roslyn" -CheckMissing -Branch "main" -#> - -param( - [int]$PRNumber, - - [string]$Repository = "dotnet/sdk", - - [string]$TraceFix, - - [switch]$ShowCommits, - - [switch]$CheckMissing, - - [string]$Branch -) - -$ErrorActionPreference = "Stop" - -# --- Helpers --- - -function Invoke-GitHubApi { - param( - [string]$Endpoint, - [switch]$Raw - ) - try { - $args = @($Endpoint) - if ($Raw) { - $args += '-H' - $args += 'Accept: application/vnd.github.raw' - } - $result = gh api @args 2>$null - if ($LASTEXITCODE -ne 0) { - Write-Warning "GitHub API call failed: $Endpoint" - return $null - } - if ($Raw) { return $result -join "`n" } - return ($result -join "`n") | ConvertFrom-Json - } - catch { - Write-Warning "Error calling GitHub API: $_" - return $null - } -} - -function Get-ShortSha { - param([string]$Sha, [int]$Length = 12) - if (-not $Sha) { return "(unknown)" } - return $Sha.Substring(0, [Math]::Min($Length, $Sha.Length)) -} - -function Write-Section { - param([string]$Title) - Write-Host "" - Write-Host "=== $Title ===" -ForegroundColor Cyan -} - -function Write-Status { - param([string]$Label, [string]$Value, [string]$Color = "White") - Write-Host " ${Label}: " -NoNewline - Write-Host $Value -ForegroundColor $Color -} - -# Check an open codeflow PR for staleness/conflict warnings -# Returns a hashtable with: Status, Color, HasConflict, HasStaleness, WasResolved -function Get-CodeflowPRHealth { - param([int]$PRNumber, [string]$Repo = "dotnet/dotnet") - - $result = @{ Status = "⚠️ Unknown"; Color = "Yellow"; HasConflict = $false; HasStaleness = $false; WasResolved = $false; Details = @() } - - $prJson = gh pr view $PRNumber -R $Repo --json body,comments,updatedAt,mergeable 2>$null - if ($LASTEXITCODE -ne 0 -or -not $prJson) { return $result } - - try { $prDetail = ($prJson -join "`n") | ConvertFrom-Json } catch { return $result } - - # If we got here, we can determine health - $result.Status = "✅ Healthy" - $result.Color = "Green" - - $hasConflict = $false - $hasStaleness = $false - if ($prDetail.comments) { - foreach ($comment in $prDetail.comments) { - if ($comment.author.login -match '^dotnet-maestro') { - if ($comment.body -match 'codeflow cannot continue|the source repository has received code changes') { $hasStaleness = $true } - if ($comment.body -match 'Conflict detected') { $hasConflict = $true } - } - } - } - - $wasConflict = $hasConflict - $wasStaleness = $hasStaleness - - # If issues detected, check if they were resolved - # Two signals: (1) PR is mergeable (no git conflict), (2) Codeflow verification SUCCESS - # Either one clears the conflict flag. Staleness needs a newer commit after the warning. - if ($hasConflict -or $hasStaleness) { - # Check mergeable status — if PR has no git conflicts, clear the conflict flag - $isMergeable = $false - if ($prDetail.PSObject.Properties.Name -contains 'mergeable' -and $prDetail.mergeable -eq 'MERGEABLE') { - $isMergeable = $true - } - if ($isMergeable -and $hasConflict) { - $hasConflict = $false - } - - $checksJson = gh pr checks $PRNumber -R $Repo --json name,state 2>$null - if ($LASTEXITCODE -eq 0 -and $checksJson) { - try { - $checks = ($checksJson -join "`n") | ConvertFrom-Json - $codeflowCheck = @($checks | Where-Object { $_.name -match 'Codeflow verification' }) | Select-Object -First 1 - if (($codeflowCheck -and $codeflowCheck.state -eq 'SUCCESS') -or $isMergeable) { - # No merge conflict — either Codeflow verification passes or PR is mergeable - $hasConflict = $false - # For staleness, check if there are commits after the last staleness warning - if ($hasStaleness) { - $commitsJson = gh pr view $PRNumber -R $Repo --json commits --jq '.commits[-1].committedDate' 2>$null - if ($LASTEXITCODE -eq 0 -and $commitsJson) { - $lastCommitTime = ($commitsJson -join "").Trim() - $lastWarnTime = $null - foreach ($comment in $prDetail.comments) { - if ($comment.author.login -match '^dotnet-maestro' -and $comment.body -match 'codeflow cannot continue|the source repository has received code changes') { - $warnDt = [DateTimeOffset]::Parse($comment.createdAt).UtcDateTime - if (-not $lastWarnTime -or $warnDt -gt $lastWarnTime) { - $lastWarnTime = $warnDt - } - } - } - $commitDt = if ($lastCommitTime) { [DateTimeOffset]::Parse($lastCommitTime).UtcDateTime } else { $null } - if ($lastWarnTime -and $commitDt -and $commitDt -gt $lastWarnTime) { - $hasStaleness = $false - } - } - } - } - } catch { } - } - } - - if ($hasConflict) { - $result.Status = "🔴 Conflict" - $result.Color = "Red" - $result.HasConflict = $true - } - elseif ($hasStaleness) { - $result.Status = "⚠️ Stale" - $result.Color = "Yellow" - $result.HasStaleness = $true - } - else { - if ($wasConflict) { $result.Status = "✅ Conflict resolved"; $result.WasResolved = $true } - elseif ($wasStaleness) { $result.Status = "✅ Updated since staleness warning"; $result.WasResolved = $true } - } - - return $result -} - -function Get-VMRBuildFreshness { - param([string]$VMRBranch) - - # Map VMR branch to aka.ms channel - $channel = $null - $blobUrl = $null - - Add-Type -AssemblyName System.Net.Http -ErrorAction SilentlyContinue - $handler = [System.Net.Http.HttpClientHandler]::new() - $handler.AllowAutoRedirect = $false - $client = [System.Net.Http.HttpClient]::new($handler) - $client.Timeout = [TimeSpan]::FromSeconds(15) - - try { - if ($VMRBranch -eq "main") { - $tryChannels = @("11.0.1xx", "12.0.1xx", "10.0.1xx") - foreach ($ch in $tryChannels) { - try { - $resp = $client.GetAsync("https://aka.ms/dotnet/$ch/daily/dotnet-sdk-win-x64.zip").Result - if ([int]$resp.StatusCode -eq 301 -and $resp.Headers.Location) { - $channel = $ch - $blobUrl = $resp.Headers.Location.ToString() - $resp.Dispose() - break - } - $resp.Dispose() - } catch { } - } - } - elseif ($VMRBranch -match 'release/(\d+\.\d+\.\d+xx-preview\.?\d+)') { - # aka.ms uses "preview1" not "preview.1" - $channel = $Matches[1] -replace 'preview\.', 'preview' - } - elseif ($VMRBranch -match 'release/(\d+\.\d+)\.(\d)xx') { - $channel = "$($Matches[1]).$($Matches[2])xx" - } - - if (-not $channel) { return $null } - - if (-not $blobUrl) { - $resp = $client.GetAsync("https://aka.ms/dotnet/$channel/daily/dotnet-sdk-win-x64.zip").Result - if ([int]$resp.StatusCode -ne 301 -or -not $resp.Headers.Location) { - $resp.Dispose() - return $null - } - $blobUrl = $resp.Headers.Location.ToString() - $resp.Dispose() - } - - $version = if ($blobUrl -match '/Sdk/([^/]+)/') { $Matches[1] } else { $null } - - # Use HttpClient HEAD (consistent with above, avoids mixing Invoke-WebRequest) - # Need a separate client with auto-redirect enabled for the blob URL - $blobHandler = [System.Net.Http.HttpClientHandler]::new() - $blobClient = [System.Net.Http.HttpClient]::new($blobHandler) - $blobClient.Timeout = [TimeSpan]::FromSeconds(15) - $published = $null - try { - $request = [System.Net.Http.HttpRequestMessage]::new([System.Net.Http.HttpMethod]::Head, $blobUrl) - $headResp = $blobClient.SendAsync($request, [System.Net.Http.HttpCompletionOption]::ResponseHeadersRead).Result - # PowerShell unwraps Nullable — use cast, not .Value - $lastMod = $headResp.Content.Headers.LastModified - if ($null -eq $lastMod) { $lastMod = $headResp.Headers.LastModified } - if ($null -ne $lastMod) { $published = ([DateTimeOffset]$lastMod).UtcDateTime } - } - finally { - if ($headResp) { $headResp.Dispose() } - if ($request) { $request.Dispose() } - $blobClient.Dispose() - $blobHandler.Dispose() - } - - if (-not $published) { return $null } - return @{ - Channel = $channel - Version = $version - Published = $published - Age = [DateTime]::UtcNow - $published - } - } - catch { - return $null - } - finally { - if ($client) { $client.Dispose() } - } -} - -# --- Parse repo owner/name --- -if ($Repository -notmatch '^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$') { - Write-Error "Repository must be in format 'owner/repo' (e.g., 'dotnet/sdk')" - return -} - -# --- CheckMissing mode: find expected but missing backflow PRs --- -if ($CheckMissing) { - if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - Write-Error "GitHub CLI (gh) is not installed or not in PATH. Install from https://cli.github.com/" - return - } - - Write-Section "Checking for missing backflow PRs in $Repository" - - # dotnet/dotnet doesn't have backflow from itself — skip to forward flow + build freshness - if ($Repository -eq "dotnet/dotnet") { - Write-Host " ℹ️ VMR (dotnet/dotnet) does not have backflow from itself" -ForegroundColor DarkGray - - # Still show build freshness for the VMR - $vmrBranches = @{} - if ($Branch -eq "main" -or -not $Branch) { $vmrBranches["main"] = "main" } - if ($Branch -match 'release/' -or -not $Branch) { - # Try to detect release branches - $branchesJson = gh api "/repos/dotnet/dotnet/branches?per_page=30" --jq '.[].name' 2>$null - if ($LASTEXITCODE -eq 0 -and $branchesJson) { - foreach ($b in ($branchesJson -split "`n")) { - if ($b -match '^release/') { $vmrBranches[$b] = $b } - } - } - } - if ($vmrBranches.Count -gt 0) { - Write-Section "Official Build Freshness (via aka.ms)" - $checkedChannels = @{} - $anyVeryStale = $false - foreach ($entry in $vmrBranches.GetEnumerator()) { - $freshness = Get-VMRBuildFreshness -VMRBranch $entry.Value - if ($freshness -and -not $checkedChannels.ContainsKey($freshness.Channel)) { - $checkedChannels[$freshness.Channel] = $freshness - $ageDays = $freshness.Age.TotalDays - $ageStr = if ($ageDays -ge 1) { "$([math]::Round($ageDays, 1))d" } else { "$([math]::Round($freshness.Age.TotalHours, 1))h" } - $color = if ($ageDays -gt 3) { 'Red' } elseif ($ageDays -gt 1) { 'Yellow' } else { 'Green' } - $versionStr = if ($freshness.Version) { $freshness.Version } else { "unknown" } - $branchLabel = "$($entry.Key) → $($freshness.Channel)" - Write-Host " $($branchLabel.PadRight(40)) $($versionStr.PadRight(48)) $($freshness.Published.ToString('yyyy-MM-dd HH:mm')) UTC ($ageStr ago)" -ForegroundColor $color - if ($ageDays -gt 3) { $anyVeryStale = $true } - } - } - if ($anyVeryStale) { - Write-Host "" - Write-Host " ⚠️ Official builds appear stale — VMR may be failing to build" -ForegroundColor Yellow - Write-Host " Check https://dev.azure.com/dnceng-public/public/_build?definitionId=278 for public CI failures" -ForegroundColor DarkGray - Write-Host " See also: https://github.com/dotnet/dotnet/issues?q=is:issue+is:open+%22Operational+Issue%22" -ForegroundColor DarkGray - } - } - return - } - - # Find open backflow PRs (to know which branches are already covered) - $openPRsJson = gh search prs --repo $Repository --author "dotnet-maestro[bot]" --state open "Source code updates from dotnet/dotnet" --json number,title --limit 50 2>$null - $openPRs = @() - $ghSearchFailed = $false - if ($LASTEXITCODE -eq 0 -and $openPRsJson) { - try { $openPRs = ($openPRsJson -join "`n") | ConvertFrom-Json } catch { $openPRs = @() } - } - elseif ($LASTEXITCODE -ne 0) { - Write-Warning "gh search failed (exit code $LASTEXITCODE). Check authentication with 'gh auth status'." - $ghSearchFailed = $true - } - $openBranches = @{} - foreach ($opr in $openPRs) { - if ($opr.title -match '^\[([^\]]+)\]') { - $openBranches[$Matches[1]] = $opr.number - } - } - - if ($openPRs.Count -gt 0) { - Write-Host " Open backflow PRs already exist:" -ForegroundColor White - foreach ($opr in $openPRs) { - Write-Host " #$($opr.number): $($opr.title)" -ForegroundColor Green - } - Write-Host "" - } - - # Find recently merged backflow PRs to discover branches and VMR commit mapping - $mergedPRsJson = gh search prs --repo $Repository --author "dotnet-maestro[bot]" --state closed --merged "Source code updates from dotnet/dotnet" --limit 30 --sort updated --json number,title,closedAt 2>$null - $mergedPRs = @() - if ($LASTEXITCODE -eq 0 -and $mergedPRsJson) { - try { $mergedPRs = ($mergedPRsJson -join "`n") | ConvertFrom-Json } catch { $mergedPRs = @() } - } - elseif ($LASTEXITCODE -ne 0 -and -not $ghSearchFailed) { - Write-Warning "gh search for merged PRs failed (exit code $LASTEXITCODE). Results may be incomplete." - } - - if ($mergedPRs.Count -eq 0 -and $openPRs.Count -eq 0) { - if ($ghSearchFailed) { - Write-Host " ❌ Could not query GitHub. Check 'gh auth status' and rate limits." -ForegroundColor Red - } - else { - Write-Host " No backflow PRs found (open or recently merged). This repo may not have backflow subscriptions." -ForegroundColor Yellow - } - return - } - - # Group merged PRs by branch, keeping only the most recently merged per branch - $branchLastMerged = @{} - foreach ($mpr in $mergedPRs) { - if ($mpr.title -match '^\[([^\]]+)\]') { - $branchName = $Matches[1] - if ($Branch -and $branchName -ne $Branch) { continue } - if (-not $branchLastMerged.ContainsKey($branchName)) { - $branchLastMerged[$branchName] = $mpr - } - else { - # Keep the one with the later closedAt (actual merge time) - $existing = $branchLastMerged[$branchName] - if ($mpr.closedAt -and $existing.closedAt -and $mpr.closedAt -gt $existing.closedAt) { - $branchLastMerged[$branchName] = $mpr - } - } - } - } - - if ($Branch -and -not $branchLastMerged.ContainsKey($Branch) -and -not $openBranches.ContainsKey($Branch)) { - Write-Host " No backflow PRs found for branch '$Branch'." -ForegroundColor Yellow - return - } - - # For each branch without an open PR, check if VMR has moved past the last merged commit - $missingCount = 0 - $coveredCount = 0 - $upToDateCount = 0 - $blockedCount = 0 - $vmrBranchesFound = @{} - $cachedPRBodies = @{} - - # First pass: collect VMR branch mappings from merged PRs (needed for build freshness) - foreach ($branchName in ($branchLastMerged.Keys | Sort-Object)) { - if ($openBranches.ContainsKey($branchName)) { continue } - $lastPR = $branchLastMerged[$branchName] - $prDetailJson = gh pr view $lastPR.number -R $Repository --json body 2>$null - if ($LASTEXITCODE -ne 0) { continue } - try { $prDetail = ($prDetailJson -join "`n") | ConvertFrom-Json } catch { continue } - $cachedPRBodies[$branchName] = $prDetail - $vmrBranchFromPR = $null - if ($prDetail.body -match '\*\*Branch\*\*:\s*\[([^\]]+)\]') { $vmrBranchFromPR = $Matches[1] } - if ($vmrBranchFromPR) { $vmrBranchesFound[$branchName] = $vmrBranchFromPR } - } - - # --- Official build freshness check (shown first for context) --- - $buildsAreStale = $false - if ($vmrBranchesFound.Count -gt 0) { - Write-Section "Official Build Freshness (via aka.ms)" - $checkedChannels = @{} - foreach ($entry in $vmrBranchesFound.GetEnumerator()) { - $freshness = Get-VMRBuildFreshness -VMRBranch $entry.Value - if ($freshness -and -not $checkedChannels.ContainsKey($freshness.Channel)) { - $checkedChannels[$freshness.Channel] = $freshness - $ageDays = $freshness.Age.TotalDays - $ageStr = if ($ageDays -ge 1) { "$([math]::Round($ageDays, 1))d" } else { "$([math]::Round($freshness.Age.TotalHours, 1))h" } - $color = if ($ageDays -gt 3) { 'Red' } elseif ($ageDays -gt 1) { 'Yellow' } else { 'Green' } - $versionStr = if ($freshness.Version) { $freshness.Version } else { "unknown" } - $branchLabel = "$($entry.Key) → $($freshness.Channel)" - Write-Host " $($branchLabel.PadRight(40)) $($versionStr.PadRight(48)) $($freshness.Published.ToString('yyyy-MM-dd HH:mm')) UTC ($ageStr ago)" -ForegroundColor $color - if ($ageDays -gt 3) { $buildsAreStale = $true } - } - } - if ($buildsAreStale) { - Write-Host "" - Write-Host " ⚠️ Official builds appear stale — VMR may be failing to build" -ForegroundColor Yellow - Write-Host " Missing backflow PRs below are likely caused by this, not a Maestro issue" -ForegroundColor DarkGray - Write-Host " Check https://dev.azure.com/dnceng-public/public/_build?definitionId=278 for public CI failures" -ForegroundColor DarkGray - Write-Host " See also: https://github.com/dotnet/dotnet/issues?q=is:issue+is:open+%22Operational+Issue%22" -ForegroundColor DarkGray - } - } - - # --- Per-branch backflow analysis --- - Write-Section "Backflow status ($Repository ← dotnet/dotnet)" - - foreach ($branchName in ($branchLastMerged.Keys | Sort-Object)) { - $lastPR = $branchLastMerged[$branchName] - Write-Host "" - Write-Host " Branch: $branchName" -ForegroundColor White - - if ($openBranches.ContainsKey($branchName)) { - $bfHealth = Get-CodeflowPRHealth -PRNumber $openBranches[$branchName] -Repo $Repository - Write-Host " Open backflow PR #$($openBranches[$branchName]): $($bfHealth.Status)" -ForegroundColor $bfHealth.Color - if ($bfHealth.HasConflict -or $bfHealth.HasStaleness) { $blockedCount++ } - elseif ($bfHealth.Status -notlike '*Unknown*') { $coveredCount++ } - continue - } - - # Get the PR body to extract VMR commit (branch already collected above) - $vmrBranchFromPR = $vmrBranchesFound[$branchName] - if (-not $vmrBranchFromPR) { - Write-Host " ⚠️ Could not determine VMR branch from last merged PR" -ForegroundColor Yellow - continue - } - - # Use cached PR body from first pass - $prDetail = $cachedPRBodies[$branchName] - if (-not $prDetail) { - Write-Host " ⚠️ Could not fetch PR details" -ForegroundColor Yellow - continue - } - - $vmrCommitFromPR = $null - if ($prDetail.body -match '\*\*Commit\*\*:\s*\[([a-fA-F0-9]+)\]') { - $vmrCommitFromPR = $Matches[1] - } - - if (-not $vmrCommitFromPR) { - Write-Host " ⚠️ Could not parse VMR commit from last merged PR #$($lastPR.number)" -ForegroundColor Yellow - continue - } - - Write-Host " Last merged: PR #$($lastPR.number) on $($lastPR.closedAt)" -ForegroundColor DarkGray - Write-Host " VMR branch: $vmrBranchFromPR" -ForegroundColor DarkGray - Write-Host " VMR commit: $(Get-ShortSha $vmrCommitFromPR)" -ForegroundColor DarkGray - - # Get current VMR branch HEAD - $encodedVmrBranch = [uri]::EscapeDataString($vmrBranchFromPR) - $vmrHead = Invoke-GitHubApi "/repos/dotnet/dotnet/commits/$encodedVmrBranch" - if (-not $vmrHead) { - Write-Host " ⚠️ Could not fetch VMR branch HEAD for $vmrBranchFromPR" -ForegroundColor Yellow - continue - } - - $vmrHeadSha = $vmrHead.sha - $vmrHeadDate = $vmrHead.commit.committer.date - - if ($vmrCommitFromPR -eq $vmrHeadSha -or $vmrHeadSha.StartsWith($vmrCommitFromPR) -or $vmrCommitFromPR.StartsWith($vmrHeadSha)) { - Write-Host " ✅ VMR branch is at same commit — no backflow needed" -ForegroundColor Green - $upToDateCount++ - } - else { - # Check how far ahead - $compare = Invoke-GitHubApi "/repos/dotnet/dotnet/compare/$vmrCommitFromPR...$vmrHeadSha" - $ahead = if ($compare) { $compare.ahead_by } else { "?" } - - Write-Host " 🔴 MISSING BACKFLOW PR" -ForegroundColor Red - Write-Host " VMR is $ahead commit(s) ahead since last merged PR" -ForegroundColor Yellow - Write-Host " VMR HEAD: $(Get-ShortSha $vmrHeadSha) ($vmrHeadDate)" -ForegroundColor DarkGray - Write-Host " Last merged VMR commit: $(Get-ShortSha $vmrCommitFromPR)" -ForegroundColor DarkGray - - # Check how long ago the last PR merged - $mergedTime = [DateTimeOffset]::Parse($lastPR.closedAt).UtcDateTime - $elapsed = [DateTime]::UtcNow - $mergedTime - if ($elapsed.TotalHours -gt 6) { - if ($buildsAreStale) { - Write-Host " ℹ️ No new official build available — backflow blocked upstream" -ForegroundColor DarkGray - } - else { - Write-Host " ⚠️ Last PR merged $([math]::Round($elapsed.TotalHours, 1)) hours ago — Maestro may be stuck" -ForegroundColor Yellow - } - } - else { - Write-Host " ℹ️ Last PR merged $([math]::Round($elapsed.TotalHours, 1)) hours ago — Maestro may still be processing" -ForegroundColor DarkGray - } - $missingCount++ - } - } - - # Also check open-only branches (that weren't in merged list) - foreach ($branchName in ($openBranches.Keys | Sort-Object)) { - if (-not $branchLastMerged.ContainsKey($branchName)) { - if ($Branch -and $branchName -ne $Branch) { continue } - Write-Host "" - Write-Host " Branch: $branchName" -ForegroundColor White - $bfHealth = Get-CodeflowPRHealth -PRNumber $openBranches[$branchName] -Repo $Repository - Write-Host " Open backflow PR #$($openBranches[$branchName]): $($bfHealth.Status)" -ForegroundColor $bfHealth.Color - if ($bfHealth.HasConflict -or $bfHealth.HasStaleness) { $blockedCount++ } - elseif ($bfHealth.Status -notlike '*Unknown*') { $coveredCount++ } - } - } - - # --- Forward flow: check PRs from this repo into the VMR --- - $repoShortName = $Repository -replace '^dotnet/', '' - Write-Host "" - Write-Section "Forward flow PRs ($Repository → dotnet/dotnet)" - - $fwdPRsJson = gh search prs --repo dotnet/dotnet --author "dotnet-maestro[bot]" --state open "Source code updates from dotnet/$repoShortName" --json number,title --limit 10 2>$null - $fwdPRs = @() - if ($LASTEXITCODE -eq 0 -and $fwdPRsJson) { - try { $fwdPRs = ($fwdPRsJson -join "`n") | ConvertFrom-Json } catch { $fwdPRs = @() } - } - # Filter to exact repo match (avoid dotnet/sdk matching dotnet/sdk-container-builds) - $fwdPRs = @($fwdPRs | Where-Object { $_.title -match "from dotnet/$([regex]::Escape($repoShortName))$" }) - - $fwdHealthy = 0 - $fwdStale = 0 - $fwdConflict = 0 - - if ($fwdPRs.Count -eq 0) { - Write-Host " No open forward flow PRs found" -ForegroundColor DarkGray - } - else { - foreach ($fpr in $fwdPRs) { - $fprBranch = if ($fpr.title -match '^\[([^\]]+)\]') { $Matches[1] } else { "unknown" } - if ($Branch -and $fprBranch -ne $Branch) { continue } - - $fwdHealth = Get-CodeflowPRHealth -PRNumber $fpr.number -Repo "dotnet/dotnet" - - if ($fwdHealth.HasConflict) { $fwdConflict++ } - elseif ($fwdHealth.HasStaleness) { $fwdStale++ } - elseif ($fwdHealth.Status -notlike '*Unknown*') { $fwdHealthy++ } - - Write-Host " PR #$($fpr.number) [$fprBranch]: $($fwdHealth.Status)" -ForegroundColor $fwdHealth.Color - Write-Host " https://github.com/dotnet/dotnet/pull/$($fpr.number)" -ForegroundColor DarkGray - } - } - - Write-Section "Summary" - Write-Host " Backflow ($Repository ← dotnet/dotnet):" -ForegroundColor White - if ($coveredCount -gt 0) { Write-Host " Branches with healthy open PRs: $coveredCount" -ForegroundColor Green } - if ($upToDateCount -gt 0) { Write-Host " Branches up to date: $upToDateCount" -ForegroundColor Green } - if ($blockedCount -gt 0) { Write-Host " Branches with blocked open PRs: $blockedCount" -ForegroundColor Red } - if ($missingCount -gt 0) { - Write-Host " Branches MISSING backflow PRs: $missingCount" -ForegroundColor Red - } - if ($missingCount -eq 0 -and $blockedCount -eq 0) { - Write-Host " No missing backflow PRs ✅" -ForegroundColor Green - } - Write-Host " Forward flow ($Repository → dotnet/dotnet):" -ForegroundColor White - if ($fwdPRs.Count -eq 0) { - Write-Host " No open forward flow PRs" -ForegroundColor DarkGray - } - else { - if ($fwdHealthy -gt 0) { Write-Host " Healthy: $fwdHealthy" -ForegroundColor Green } - if ($fwdStale -gt 0) { Write-Host " Stale: $fwdStale" -ForegroundColor Yellow } - if ($fwdConflict -gt 0) { Write-Host " Conflicted: $fwdConflict" -ForegroundColor Red } - } - return -} - -# --- Validate PRNumber for non-CheckMissing mode --- -if (-not $PRNumber) { - Write-Error "PRNumber is required unless -CheckMissing is used." - return -} - -# --- Step 1: PR Overview --- -Write-Section "Codeflow PR #$PRNumber in $Repository" - -if (-not (Get-Command gh -ErrorAction SilentlyContinue)) { - Write-Error "GitHub CLI (gh) is not installed or not in PATH. Install from https://cli.github.com/" - return -} - -$prJson = gh pr view $PRNumber -R $Repository --json body,title,state,author,headRefName,baseRefName,createdAt,updatedAt,url,comments,commits,additions,deletions,changedFiles -if ($LASTEXITCODE -ne 0) { - Write-Error "Could not fetch PR #$PRNumber from $Repository. Ensure you are authenticated (gh auth login)." - return -} -$pr = ($prJson -join "`n") | ConvertFrom-Json - -Write-Status "Title" $pr.title -Write-Status "State" $pr.state -Write-Status "Branch" "$($pr.headRefName) -> $($pr.baseRefName)" -Write-Status "Created" $pr.createdAt -Write-Status "Updated" $pr.updatedAt -Write-Host " URL: $($pr.url)" - -# Check if this is actually a codeflow PR and detect flow direction -$isMaestroPR = $pr.author.login -eq "dotnet-maestro[bot]" -$isBackflow = $pr.title -match "Source code updates from dotnet/dotnet" -$isForwardFlow = $pr.title -match "Source code updates from (dotnet/\S+)" -and -not $isBackflow -if (-not $isMaestroPR -and -not $isBackflow -and -not $isForwardFlow) { - Write-Warning "This does not appear to be a codeflow PR (author: $($pr.author.login), title: $($pr.title))" - Write-Warning "Expected author 'dotnet-maestro[bot]' and title containing 'Source code updates from'" -} - -if ($isForwardFlow) { - $sourceRepo = $Matches[1] - Write-Status "Flow" "Forward ($sourceRepo → $Repository)" "Cyan" -} -elseif ($isBackflow) { - Write-Status "Flow" "Backflow (dotnet/dotnet → $Repository)" "Cyan" -} - -# --- Step 2: Current State (independent assessment from primary signals) --- -Write-Section "Current State" - -# Check for empty diff (0 changed files) -$isEmptyDiff = ($pr.changedFiles -eq 0 -and $pr.additions -eq 0 -and $pr.deletions -eq 0) -if ($isEmptyDiff) { - Write-Host " 📭 Empty diff: 0 changed files, 0 additions, 0 deletions" -ForegroundColor Yellow -} - -# Check PR timeline for force pushes -$forcePushEvents = @() -$owner, $repo = $Repository -split '/' -$forcePushFetchSucceeded = $false -try { - $timelineJson = gh api "repos/$owner/$repo/issues/$PRNumber/timeline" --paginate --slurp --jq 'map(.[] | select(.event == "head_ref_force_pushed"))' 2>$null - if ($LASTEXITCODE -eq 0 -and $timelineJson) { - $forcePushEvents = @($timelineJson | ConvertFrom-Json) - $forcePushFetchSucceeded = $true - } elseif ($LASTEXITCODE -ne 0) { - Write-Warning "Could not fetch PR timeline for force push detection (gh api exit code $LASTEXITCODE). Current state assessment may be incomplete." - } else { - $forcePushFetchSucceeded = $true - } -} -catch { - Write-Warning "Failed to parse timeline JSON for force push events: $($_.Exception.Message)" - $forcePushEvents = @() -} - -if ($forcePushEvents.Count -gt 0) { - foreach ($fp in $forcePushEvents) { - $fpActor = if ($fp.actor) { $fp.actor.login } else { "unknown" } - $fpTime = $fp.created_at - $fpSha = if ($fp.commit_id) { Get-ShortSha $fp.commit_id } else { "unknown" } - Write-Host " 🔄 Force push by @$fpActor at $fpTime (→ $fpSha)" -ForegroundColor Cyan - } - $lastForcePush = $forcePushEvents[-1] - $lastForcePushTime = if ($lastForcePush.created_at) { - [DateTimeOffset]::Parse($lastForcePush.created_at).UtcDateTime - } else { $null } - $lastForcePushActor = if ($lastForcePush.actor) { $lastForcePush.actor.login } else { "unknown" } -} - -# Synthesize current state assessment -$prUpdatedTime = if ($pr.updatedAt) { [DateTimeOffset]::Parse($pr.updatedAt).UtcDateTime } else { $null } -$prAgeDays = if ($prUpdatedTime) { ([DateTime]::UtcNow - $prUpdatedTime).TotalDays } else { 0 } -$isClosed = $pr.state -eq "CLOSED" -$isMerged = $pr.state -eq "MERGED" -$currentState = if ($isMerged) { - "MERGED" -} elseif ($isClosed) { - "CLOSED" -} elseif ($isEmptyDiff) { - "NO-OP" -} elseif ($forcePushEvents.Count -gt 0 -and $lastForcePushTime -and ([DateTime]::UtcNow - $lastForcePushTime).TotalHours -lt 24) { - "IN_PROGRESS" -} elseif ($prAgeDays -gt 3) { - "STALE" -} else { - "ACTIVE" -} - -Write-Host "" -switch ($currentState) { - "MERGED" { Write-Host " ✅ MERGED — PR has been merged" -ForegroundColor Green } - "CLOSED" { Write-Host " ✖️ CLOSED — PR was closed without merging" -ForegroundColor DarkGray } - "NO-OP" { Write-Host " 📭 NO-OP — empty diff, likely already resolved" -ForegroundColor Yellow } - "IN_PROGRESS" { Write-Host " 🔄 IN PROGRESS — recent force push, awaiting update" -ForegroundColor Cyan } - "STALE" { Write-Host " ⏳ STALE — no recent activity" -ForegroundColor Yellow } - "ACTIVE" { Write-Host " ✅ ACTIVE — PR has content" -ForegroundColor Green } -} - -# --- Step 3: Codeflow Metadata --- -Write-Section "Codeflow Metadata" - -$body = $pr.body - -# Extract subscription ID -$subscriptionId = $null -if ($body -match '\(Begin:([a-f0-9-]+)\)') { - $subscriptionId = $Matches[1] - Write-Status "Subscription" $subscriptionId -} - -# Extract source commit (VMR commit for backflow, repo commit for forward flow) -$sourceCommit = $null -if ($body -match '\*\*Commit\*\*:\s*\[([a-fA-F0-9]+)\]') { - $sourceCommit = $Matches[1] - $commitLabel = if ($isForwardFlow) { "Source Commit" } else { "VMR Commit" } - Write-Status $commitLabel $sourceCommit -} -# Keep $vmrCommit alias for backflow compatibility -$vmrCommit = $sourceCommit - -# Extract build info -if ($body -match '\*\*Build\*\*:\s*\[([^\]]+)\]\(([^\)]+)\)') { - Write-Status "Build" "$($Matches[1])" - Write-Status "Build URL" $Matches[2] -} - -# Extract date produced -if ($body -match '\*\*Date Produced\*\*:\s*(.+)') { - Write-Status "Date Produced" $Matches[1].Trim() -} - -# Extract source branch -$vmrBranch = $null -if ($body -match '\*\*Branch\*\*:\s*\[([^\]]+)\]') { - $vmrBranch = $Matches[1] - $branchLabel = if ($isForwardFlow) { "Source Branch" } else { "VMR Branch" } - Write-Status $branchLabel $vmrBranch -} - -# Extract commit diff -if ($body -match '\*\*Commit Diff\*\*:\s*\[([^\]]+)\]\(([^\)]+)\)') { - Write-Status "Commit Diff" $Matches[1] -} - -# Extract associated repo changes from footer -$repoChanges = @() -$changeMatches = [regex]::Matches($body, '- (https://github\.com/([^/]+/[^/]+)/compare/([a-fA-F0-9]+)\.\.\.([a-fA-F0-9]+))') -foreach ($m in $changeMatches) { - $repoChanges += @{ - URL = $m.Groups[1].Value - Repo = $m.Groups[2].Value - FromSha = $m.Groups[3].Value - ToSha = $m.Groups[4].Value - } -} -if ($repoChanges.Count -gt 0) { - Write-Status "Associated Repos" "$($repoChanges.Count) repos with source changes" -} - -if (-not $vmrCommit -or -not $vmrBranch) { - Write-Warning "Could not parse VMR metadata from PR body. This may not be a codeflow PR." - if (-not $vmrBranch) { - # For backflow: infer from PR target (which is the product repo branch = VMR branch name) - # For forward flow: infer from PR head branch pattern or source repo context - if ($isForwardFlow) { - $vmrBranch = $pr.headRefName -replace '^darc-', '' -replace '-[a-f0-9-]+$', '' - if (-not $vmrBranch) { $vmrBranch = $pr.baseRefName } - } - else { - $vmrBranch = $pr.baseRefName - } - Write-Status "Inferred Branch" "$vmrBranch (from PR metadata)" - } -} - -# For backflow: compare against VMR (dotnet/dotnet) branch HEAD -# For forward flow: compare against product repo branch HEAD -$freshnessRepo = if ($isForwardFlow) { $sourceRepo } else { "dotnet/dotnet" } -$freshnessRepoLabel = if ($isForwardFlow) { $sourceRepo } else { "VMR" } - -# Pre-load PR commits for use in validation and later analysis -$prCommits = $pr.commits - -# --- Step 4: Determine actual VMR snapshot on the PR branch --- -# Priority: 1) Version.Details.xml (ground truth), 2) commit messages, 3) PR body -$branchVmrCommit = $null -$commitMsgVmrCommit = $null -$versionDetailsVmrCommit = $null - -# First: check eng/Version.Details.xml on the PR branch (authoritative source) -if (-not $isForwardFlow) { - $vdContent = Invoke-GitHubApi "/repos/$Repository/contents/eng/Version.Details.xml?ref=$([System.Uri]::EscapeDataString($pr.headRefName))" -Raw - if ($vdContent) { - try { - [xml]$vdXml = $vdContent - $sourceNode = $vdXml.Dependencies.Source - if ($sourceNode -and $sourceNode.Sha -and $sourceNode.Sha -match '^[a-fA-F0-9]{40}$') { - $versionDetailsVmrCommit = $sourceNode.Sha - $branchVmrCommit = $versionDetailsVmrCommit - } - } - catch { - # Fall back to regex if XML parsing fails - if ($vdContent -match ']*Sha="([a-fA-F0-9]{40})"') { - $versionDetailsVmrCommit = $Matches[1] - $branchVmrCommit = $versionDetailsVmrCommit - } - } - } -} - -# Second: scan commit messages for "Backflow from" / "Forward flow from" SHAs -if ($prCommits) { - $reversedCommits = @($prCommits) - [Array]::Reverse($reversedCommits) - foreach ($c in $reversedCommits) { - $msg = $c.messageHeadline - if ($msg -match '(?:Backflow|Forward flow) from .+ / ([a-fA-F0-9]+)') { - $commitMsgVmrCommit = $Matches[1] - break - } - } - # For forward flow (no Version.Details.xml source), commit messages are primary - if (-not $branchVmrCommit -and $commitMsgVmrCommit) { - $branchVmrCommit = $commitMsgVmrCommit - } -} - -if ($branchVmrCommit -or $vmrCommit) { - Write-Section "Snapshot Validation" - $usedBranchSnapshot = $false - - if ($branchVmrCommit) { - # We have a branch-derived snapshot (from Version.Details.xml or commit message) - $branchShort = Get-ShortSha $branchVmrCommit - $sourceLabel = if ($versionDetailsVmrCommit -and $branchVmrCommit -eq $versionDetailsVmrCommit) { "Version.Details.xml" } else { "branch commit" } - - if ($vmrCommit) { - $bodyShort = Get-ShortSha $vmrCommit - if ($vmrCommit.StartsWith($branchVmrCommit, [StringComparison]::OrdinalIgnoreCase) -or $branchVmrCommit.StartsWith($vmrCommit, [StringComparison]::OrdinalIgnoreCase)) { - Write-Host " ✅ $sourceLabel ($branchShort) matches PR body ($bodyShort)" -ForegroundColor Green - } - else { - Write-Host " ⚠️ MISMATCH: $sourceLabel has $branchShort but PR body claims $bodyShort" -ForegroundColor Red - Write-Host " PR body is stale — using $sourceLabel for freshness check" -ForegroundColor Yellow - } - } - else { - Write-Host " ℹ️ PR body has no commit reference — using $sourceLabel ($branchShort)" -ForegroundColor Yellow - } - - # Resolve to full SHA for accurate comparison (skip API call if already full-length) - if ($branchVmrCommit.Length -ge 40) { - $vmrCommit = $branchVmrCommit - $usedBranchSnapshot = $true - } - else { - $resolvedCommit = Invoke-GitHubApi "/repos/$freshnessRepo/commits/$branchVmrCommit" - if ($resolvedCommit) { - $vmrCommit = $resolvedCommit.sha - $usedBranchSnapshot = $true - } - elseif ($vmrCommit) { - Write-Host " ⚠️ Could not resolve $sourceLabel SHA $branchShort — falling back to PR body ($(Get-ShortSha $vmrCommit))" -ForegroundColor Yellow - } - else { - Write-Host " ⚠️ Could not resolve $sourceLabel SHA $branchShort" -ForegroundColor Yellow - } - } - } - else { - # No branch-derived snapshot — PR body only - $commitCount = if ($prCommits) { $prCommits.Count } else { 0 } - if ($commitCount -eq 1 -and $prCommits[0].messageHeadline -match "^Initial commit for subscription") { - Write-Host " ℹ️ PR has only an initial subscription commit — PR body snapshot ($(Get-ShortSha $vmrCommit)) not yet verifiable" -ForegroundColor DarkGray - } - else { - Write-Host " ⚠️ Could not verify PR body snapshot ($(Get-ShortSha $vmrCommit)) from branch" -ForegroundColor Yellow - } - } -} - -# --- Step 5: Check source freshness --- -$freshnessLabel = if ($isForwardFlow) { "Source Freshness" } else { "VMR Freshness" } -Write-Section $freshnessLabel - -$sourceHeadSha = $null -$aheadBy = 0 -$behindBy = 0 -$compareStatus = $null - -if ($vmrCommit -and $vmrBranch) { - # Get current branch HEAD (URL-encode branch name for path segments with /) - $encodedBranch = [uri]::EscapeDataString($vmrBranch) - $branchHead = Invoke-GitHubApi "/repos/$freshnessRepo/commits/$encodedBranch" - if ($branchHead) { - $sourceHeadSha = $branchHead.sha - $sourceHeadDate = $branchHead.commit.committer.date - $snapshotSource = if ($usedBranchSnapshot) { - if ($versionDetailsVmrCommit -and $vmrCommit.StartsWith($versionDetailsVmrCommit, [StringComparison]::OrdinalIgnoreCase)) { "from Version.Details.xml" } - elseif ($commitMsgVmrCommit) { "from branch commit" } - else { "from branch" } - } else { "from PR body" } - Write-Status "PR snapshot" "$(Get-ShortSha $vmrCommit) ($snapshotSource)" - Write-Status "$freshnessRepoLabel HEAD" "$(Get-ShortSha $sourceHeadSha) ($sourceHeadDate)" - - if ($vmrCommit -eq $sourceHeadSha) { - Write-Host " ✅ PR is up to date with $freshnessRepoLabel branch" -ForegroundColor Green - } - else { - # Compare to find how many commits differ - $compare = Invoke-GitHubApi "/repos/$freshnessRepo/compare/$vmrCommit...$sourceHeadSha" - if ($compare) { - $aheadBy = $compare.ahead_by - $behindBy = $compare.behind_by - $compareStatus = $compare.status - - switch ($compareStatus) { - 'identical' { - Write-Host " ✅ PR is up to date with $freshnessRepoLabel branch" -ForegroundColor Green - } - 'ahead' { - Write-Host " ⚠️ $freshnessRepoLabel is $aheadBy commit(s) ahead of the PR snapshot" -ForegroundColor Yellow - } - 'behind' { - Write-Host " ⚠️ $freshnessRepoLabel is $behindBy commit(s) behind the PR snapshot" -ForegroundColor Yellow - } - 'diverged' { - Write-Host " ⚠️ $freshnessRepoLabel and PR snapshot have diverged: $aheadBy commit(s) ahead and $behindBy commit(s) behind" -ForegroundColor Yellow - } - default { - Write-Host " ⚠️ $freshnessRepoLabel and PR snapshot differ (status: $compareStatus)" -ForegroundColor Yellow - } - } - - if ($compare.total_commits -and $compare.commits) { - $returnedCommits = @($compare.commits).Count - if ($returnedCommits -lt $compare.total_commits) { - Write-Host " ⚠️ Compare API returned $returnedCommits of $($compare.total_commits) commits; listing may be incomplete." -ForegroundColor Yellow - } - } - - if ($ShowCommits -and $compare.commits) { - Write-Host "" - $commitLabel = switch ($compareStatus) { - 'ahead' { "Commits since PR snapshot:" } - 'behind' { "Commits in PR snapshot but not in $freshnessRepoLabel`:" } - default { "Commits differing:" } - } - Write-Host " $commitLabel" -ForegroundColor Yellow - foreach ($c in $compare.commits) { - $msg = ($c.commit.message -split "`n")[0] - if ($msg.Length -gt 100) { $msg = $msg.Substring(0, 97) + "..." } - $date = $c.commit.committer.date - Write-Host " $(Get-ShortSha $c.sha 8) $date $msg" - } - } - - # Check which repos have updates in the missing commits - $missingRepoUpdates = @() - if ($compare.commits) { - foreach ($c in $compare.commits) { - $msg = ($c.commit.message -split "`n")[0] - if ($msg -match 'Source code updates from ([^\s(]+)') { - $missingRepoUpdates += $Matches[1] - } - } - } - if ($missingRepoUpdates.Count -gt 0) { - $uniqueRepos = $missingRepoUpdates | Select-Object -Unique - Write-Host "" - Write-Host " Missing updates from: $($uniqueRepos -join ', ')" -ForegroundColor Yellow - } - - # --- For backflow PRs that are behind: check pending forward flow PRs --- - if ($isBackflow -and $compareStatus -eq 'ahead' -and $aheadBy -gt 0 -and $vmrBranch) { - $forwardPRsJson = gh search prs --repo dotnet/dotnet --author "dotnet-maestro[bot]" --state open "Source code updates from" --base $vmrBranch --json number,title --limit 20 2>$null - $pendingForwardPRs = @() - if ($LASTEXITCODE -eq 0 -and $forwardPRsJson) { - try { - $allForward = ($forwardPRsJson -join "`n") | ConvertFrom-Json - # Filter to forward flow PRs (not backflow) targeting this VMR branch - $pendingForwardPRs = $allForward | Where-Object { - $_.title -match "Source code updates from (dotnet/\S+)" -and - $Matches[1] -ne "dotnet/dotnet" - } - } - catch { - Write-Warning "Failed to parse forward flow PR search results. Skipping forward flow analysis." - } - } - - if ($pendingForwardPRs.Count -gt 0) { - Write-Host "" - Write-Host " Pending forward flow PRs into VMR ($vmrBranch):" -ForegroundColor Cyan - - $coveredRepos = @() - foreach ($fpr in $pendingForwardPRs) { - $fprSourceRepo = $null - if ($fpr.title -match "Source code updates from (dotnet/\S+)") { - $fprSourceRepo = $Matches[1] - } - $coveredLabel = "" - if ($fprSourceRepo -and $uniqueRepos -contains $fprSourceRepo) { - $coveredRepos += $fprSourceRepo - $coveredLabel = " ← covers missing updates" - } - Write-Host " dotnet/dotnet#$($fpr.number): $($fpr.title)$coveredLabel" -ForegroundColor DarkGray - } - - if ($coveredRepos.Count -gt 0) { - $uncoveredRepos = $uniqueRepos | Where-Object { $_ -notin $coveredRepos } - $coveredCount = $coveredRepos.Count - $totalMissing = $uniqueRepos.Count - Write-Host "" - Write-Host " 📊 Forward flow coverage: $coveredCount of $totalMissing missing repo(s) have pending forward flow PRs" -ForegroundColor Cyan - if ($uncoveredRepos.Count -gt 0) { - Write-Host " Still waiting on: $($uncoveredRepos -join ', ')" -ForegroundColor Yellow - } - else { - Write-Host " ✅ All missing repos have pending forward flow — gap should close once they merge + new backflow triggers" -ForegroundColor Green - } - } - } - } - } - } - } -} -else { - Write-Warning "Cannot check freshness without source commit and branch info" -} - -# Collect Maestro comment data (needed by PR Branch Analysis and Codeflow History) -$stalenessWarnings = @() -$lastStalenessComment = $null - -if ($pr.comments) { - foreach ($comment in $pr.comments) { - $commentAuthor = $comment.author.login - if ($commentAuthor -eq "dotnet-maestro[bot]" -or $commentAuthor -eq "dotnet-maestro") { - if ($comment.body -match "codeflow cannot continue" -or $comment.body -match "darc trigger-subscriptions") { - $stalenessWarnings += $comment - $lastStalenessComment = $comment - } - } - } -} - -$conflictWarnings = @() -$lastConflictComment = $null - -if ($pr.comments) { - foreach ($comment in $pr.comments) { - $commentAuthor = $comment.author.login - if ($commentAuthor -eq "dotnet-maestro[bot]" -or $commentAuthor -eq "dotnet-maestro") { - if ($comment.body -match "Conflict detected") { - $conflictWarnings += $comment - $lastConflictComment = $comment - } - } - } -} - -# Extract conflicting files (used in History and Recommendations) -$conflictFiles = @() -if ($lastConflictComment) { - $fileMatches = [regex]::Matches($lastConflictComment.body, '-\s+`([^`]+)`\s*\r?\n') - foreach ($fm in $fileMatches) { - $conflictFiles += $fm.Groups[1].Value - } -} - -# Cross-reference force push against conflict/staleness warnings (data only) -$conflictMayBeResolved = $false -$stalenessMayBeResolved = $false -if ($lastForcePushTime) { - if ($conflictWarnings.Count -gt 0 -and $lastConflictComment) { - $lastConflictTime = [DateTimeOffset]::Parse($lastConflictComment.createdAt).UtcDateTime - if ($lastForcePushTime -gt $lastConflictTime) { - $conflictMayBeResolved = $true - } - } - if ($stalenessWarnings.Count -gt 0 -and $lastStalenessComment) { - $lastStalenessTime = [DateTimeOffset]::Parse($lastStalenessComment.createdAt).UtcDateTime - if ($lastForcePushTime -gt $lastStalenessTime) { - $stalenessMayBeResolved = $true - } - } -} - -# --- Step 6: PR Branch Analysis --- -Write-Section "PR Branch Analysis" - -if ($prCommits) { - $maestroCommits = @() - $manualCommits = @() - $mergeCommits = @() - - foreach ($c in $prCommits) { - $msg = $c.messageHeadline - $authorLogin = if ($c.authors -and $c.authors.Count -gt 0) { $c.authors[0].login } else { $null } - $authorName = if ($c.authors -and $c.authors.Count -gt 0) { $c.authors[0].name } else { "unknown" } - $author = if ($authorLogin) { $authorLogin } else { $authorName } - - if ($msg -match "^Merge branch") { - $mergeCommits += $c - } - elseif ($author -in @("dotnet-maestro[bot]", "dotnet-maestro") -or $msg -eq "Update dependencies") { - $maestroCommits += $c - } - else { - $manualCommits += $c - } - } - - Write-Status "Total commits" $prCommits.Count - Write-Status "Maestro auto-updates" $maestroCommits.Count - Write-Status "Merge commits" $mergeCommits.Count - Write-Status "Manual commits" $manualCommits.Count "$(if ($manualCommits.Count -gt 0) { 'Yellow' } else { 'Green' })" - - if ($manualCommits.Count -gt 0) { - Write-Host "" - Write-Host " Manual commits (at risk if PR is closed/force-triggered):" -ForegroundColor Yellow - foreach ($c in $manualCommits) { - $msg = $c.messageHeadline - if ($msg.Length -gt 80) { $msg = $msg.Substring(0, 77) + "..." } - $authorName = if ($c.authors -and $c.authors.Count -gt 0) { $c.authors[0].name } else { "unknown" } - Write-Host " $(Get-ShortSha $c.oid 8) [$authorName] $msg" - } - } - - # Detect manual commits that look like codeflow-like changes (someone manually - # doing what Maestro would do while flow is paused) - $codeflowLikeManualCommits = @() - foreach ($c in $manualCommits) { - $msg = $c.messageHeadline - if ($msg -match 'Update dependencies' -or - $msg -match 'Version\.Details\.xml' -or - $msg -match 'Versions\.props' -or - $msg -match '[Bb]ackflow' -or - $msg -match '[Ff]orward flow' -or - $msg -match 'from dotnet/' -or - $msg -match '[a-f0-9]{7,40}' -or - $msg -match 'src/SourceBuild') { - $codeflowLikeManualCommits += $c - } - } - - if ($codeflowLikeManualCommits.Count -gt 0 -and $stalenessWarnings.Count -gt 0) { - Write-Host "" - Write-Host " ⚠️ $($codeflowLikeManualCommits.Count) manual commit(s) appear to contain codeflow-like changes while flow is paused" -ForegroundColor Yellow - Write-Host " The freshness gap reported above may be partially covered by these manual updates" -ForegroundColor DarkGray - } -} - -# --- Step 7: Codeflow History (Maestro comments as historical context) --- -Write-Section "Codeflow History" -Write-Host " Maestro warnings (historical — see Current State for present status):" -ForegroundColor DarkGray - -if ($stalenessWarnings.Count -gt 0 -or $conflictWarnings.Count -gt 0) { - if ($conflictWarnings.Count -gt 0) { - Write-Host " 🔴 Conflict detected ($($conflictWarnings.Count) conflict warning(s))" -ForegroundColor Red - Write-Status "Latest conflict" $lastConflictComment.createdAt - - if ($conflictFiles.Count -gt 0) { - Write-Host " Conflicting files:" -ForegroundColor Yellow - foreach ($f in $conflictFiles) { - Write-Host " - $f" -ForegroundColor Yellow - } - } - - # Extract VMR commit from the conflict comment - if ($lastConflictComment.body -match 'sources from \[`([a-fA-F0-9]+)`\]') { - Write-Host " Conflicting VMR commit: $($Matches[1])" -ForegroundColor DarkGray - } - - # Extract resolve command - if ($lastConflictComment.body -match '(darc vmr resolve-conflict --subscription [a-fA-F0-9-]+(?:\s+--build [a-fA-F0-9-]+)?)') { - Write-Host "" - Write-Host " Resolve command:" -ForegroundColor White - Write-Host " $($Matches[1])" -ForegroundColor DarkGray - } - } - - if ($stalenessWarnings.Count -gt 0) { - if ($conflictWarnings.Count -gt 0) { Write-Host "" } - Write-Host " ⚠️ Staleness warning detected ($($stalenessWarnings.Count) warning(s))" -ForegroundColor Yellow - Write-Status "Latest warning" $lastStalenessComment.createdAt - $oppositeFlow = if ($isForwardFlow) { "backflow from VMR merged into $sourceRepo" } else { "forward flow merged into VMR" } - Write-Host " Opposite codeflow ($oppositeFlow) while this PR was open." -ForegroundColor Yellow - Write-Host " Maestro has blocked further codeflow updates to this PR." -ForegroundColor Yellow - - # Extract darc commands from the warning - if ($lastStalenessComment.body -match 'darc trigger-subscriptions --id ([a-fA-F0-9-]+)(?:\s+--force)?') { - Write-Host "" - Write-Host " Suggested commands from Maestro:" -ForegroundColor White - if ($lastStalenessComment.body -match '(darc trigger-subscriptions --id [a-fA-F0-9-]+)\s*\r?\n') { - Write-Host " Normal trigger: $($Matches[1])" - } - if ($lastStalenessComment.body -match '(darc trigger-subscriptions --id [a-fA-F0-9-]+ --force)') { - Write-Host " Force trigger: $($Matches[1])" - } - } - } -} -else { - Write-Host " ✅ No staleness or conflict warnings found" -ForegroundColor Green -} - -# Cross-reference force push against conflict/staleness warnings (historical context) -if ($lastForcePushTime) { - if ($conflictMayBeResolved) { - Write-Host "" - Write-Host " ℹ️ Force push by @$lastForcePushActor at $($lastForcePush.created_at) is AFTER the last conflict warning" -ForegroundColor Cyan - Write-Host " Conflict may have been resolved via darc vmr resolve-conflict" -ForegroundColor DarkGray - } - if ($stalenessMayBeResolved) { - Write-Host " ℹ️ Force push is AFTER the staleness warning — someone may have acted on it" -ForegroundColor Cyan - } - if ($isEmptyDiff -and ($conflictMayBeResolved -or $stalenessMayBeResolved)) { - Write-Host "" - Write-Host " 📭 PR has empty diff after force push — codeflow changes may already be in target branch" -ForegroundColor Yellow - Write-Host " This PR is likely a no-op. Consider merging to clear state or closing it." -ForegroundColor DarkGray - } -} - -# --- Step 8: Trace a specific fix (optional) --- -if ($TraceFix) { - Write-Section "Tracing Fix: $TraceFix" - - # Parse TraceFix format: "owner/repo#number" or "repo#number" - $traceMatch = [regex]::Match($TraceFix, '(?:([^/]+)/)?([^#]+)#(\d+)') - if (-not $traceMatch.Success) { - Write-Warning "Could not parse TraceFix format. Expected: 'owner/repo#number' or 'repo#number'" - } - else { - $traceOwner = if ($traceMatch.Groups[1].Value) { $traceMatch.Groups[1].Value } else { "dotnet" } - $traceRepo = $traceMatch.Groups[2].Value - $traceNumber = $traceMatch.Groups[3].Value - $traceFullRepo = "$traceOwner/$traceRepo" - - # Check if the fix PR is merged (use merged_at since REST may not include merged boolean) - $fixPR = Invoke-GitHubApi "/repos/$traceFullRepo/pulls/$traceNumber" - $fixIsMerged = $false - if ($fixPR) { - $fixIsMerged = $null -ne $fixPR.merged_at - Write-Status "Fix PR" "${traceFullRepo}#${traceNumber}: $($fixPR.title)" - Write-Status "State" $fixPR.state - Write-Status "Merged" "$(if ($fixIsMerged) { '✅ Yes' } else { '❌ No' })" "$(if ($fixIsMerged) { 'Green' } else { 'Red' })" - if ($fixIsMerged) { - Write-Status "Merged at" $fixPR.merged_at - Write-Status "Merge commit" $fixPR.merge_commit_sha - $fixMergeCommit = $fixPR.merge_commit_sha - } - } - - # Check if the fix is in the VMR source-manifest.json on the target branch - # For forward flow, the VMR target is the PR base branch; for backflow, use $vmrBranch - $vmrManifestBranch = if ($isForwardFlow -and $pr.baseRefName) { $pr.baseRefName } else { $vmrBranch } - if ($fixIsMerged -and $vmrManifestBranch) { - Write-Host "" - Write-Host " Checking VMR source-manifest.json on $vmrManifestBranch..." -ForegroundColor White - - $encodedManifestBranch = [uri]::EscapeDataString($vmrManifestBranch) - $manifestUrl = "/repos/dotnet/dotnet/contents/src/source-manifest.json?ref=$encodedManifestBranch" - $manifestJson = Invoke-GitHubApi $manifestUrl -Raw - if ($manifestJson) { - try { - $manifest = $manifestJson | ConvertFrom-Json - } - catch { - Write-Warning "Could not parse VMR source-manifest.json: $_" - $manifest = $null - } - - # Find the repo in the manifest - $escapedRepo = [regex]::Escape($traceRepo) - $repoEntry = $manifest.repositories | Where-Object { - $_.remoteUri -match "${escapedRepo}(\.git)?$" -or $_.path -eq $traceRepo - } - - if ($repoEntry) { - $manifestCommit = $repoEntry.commitSha - Write-Status "VMR manifest commit" "$(Get-ShortSha $manifestCommit) for $($repoEntry.path)" - - # Check if the fix merge commit is an ancestor of the manifest commit - if ($fixMergeCommit -eq $manifestCommit) { - Write-Host " ✅ Fix merge commit IS the VMR manifest commit" -ForegroundColor Green - } - else { - # Check if fix is an ancestor of the manifest commit - $ancestorCheck = Invoke-GitHubApi "/repos/$traceFullRepo/compare/$fixMergeCommit...$manifestCommit" - if ($ancestorCheck) { - if ($ancestorCheck.status -eq "ahead" -or $ancestorCheck.status -eq "identical") { - Write-Host " ✅ Fix is included in VMR manifest (manifest is ahead or identical)" -ForegroundColor Green - } - elseif ($ancestorCheck.status -eq "behind") { - Write-Host " ❌ Fix is NOT in VMR manifest yet (manifest is behind the fix)" -ForegroundColor Red - } - else { - Write-Host " ⚠️ Fix and manifest have diverged (status: $($ancestorCheck.status))" -ForegroundColor Yellow - } - } - } - - # Now check if the PR's VMR snapshot includes this - # For backflow: $vmrCommit is a VMR SHA, use it directly - # For forward flow: $vmrCommit is a source repo SHA, use PR head commit in dotnet/dotnet instead - $snapshotRef = $vmrCommit - if ($isForwardFlow -and $pr.commits -and $pr.commits.Count -gt 0) { - $snapshotRef = $pr.commits[-1].oid - } - if ($snapshotRef) { - Write-Host "" - Write-Host " Checking if fix is in the PR's snapshot..." -ForegroundColor White - - $snapshotManifestUrl = "/repos/dotnet/dotnet/contents/src/source-manifest.json?ref=$snapshotRef" - $snapshotJson = Invoke-GitHubApi $snapshotManifestUrl -Raw - if ($snapshotJson) { - try { - $snapshotData = $snapshotJson | ConvertFrom-Json - } - catch { - Write-Warning "Could not parse snapshot manifest: $_" - $snapshotData = $null - } - - $snapshotEntry = $snapshotData.repositories | Where-Object { - $_.remoteUri -match "${escapedRepo}(\.git)?$" -or $_.path -eq $traceRepo - } - - if ($snapshotEntry) { - $snapshotCommit = $snapshotEntry.commitSha - Write-Status "PR snapshot commit" "$(Get-ShortSha $snapshotCommit) for $($snapshotEntry.path)" - - if ($snapshotCommit -eq $fixMergeCommit) { - Write-Host " ✅ Fix IS in the PR's VMR snapshot" -ForegroundColor Green - } - else { - $snapshotCheck = Invoke-GitHubApi "/repos/$traceFullRepo/compare/$fixMergeCommit...$snapshotCommit" - if ($snapshotCheck) { - if ($snapshotCheck.status -eq "ahead" -or $snapshotCheck.status -eq "identical") { - Write-Host " ✅ Fix is included in PR snapshot" -ForegroundColor Green - } - else { - Write-Host " ❌ Fix is NOT in the PR's VMR snapshot" -ForegroundColor Red - Write-Host " The PR needs a codeflow update to pick up this fix." -ForegroundColor Yellow - } - } - } - } - } - } - } - else { - Write-Warning "Could not find $traceRepo in VMR source-manifest.json" - } - } - } - } -} - -# --- Step 9: Structured Summary --- -# Emit a JSON summary for the agent to reason over when generating recommendations. -# The agent should use SKILL.md guidance to synthesize contextual recommendations. - -$summary = [ordered]@{ - prNumber = $PRNumber - repository = $Repository - prState = $pr.state - currentState = $currentState - isCodeflowPR = ($isBackflow -or $isForwardFlow) - isMaestroAuthored = $isMaestroPR - flowDirection = if ($isForwardFlow) { "forward" } elseif ($isBackflow) { "backflow" } else { "unknown" } - isEmptyDiff = $isEmptyDiff - changedFiles = [int]$pr.changedFiles - additions = [int]$pr.additions - deletions = [int]$pr.deletions - subscriptionId = $subscriptionId - vmrCommit = if ($vmrCommit) { Get-ShortSha $vmrCommit } else { $null } - vmrBranch = $vmrBranch -} - -# Freshness -$hasFreshnessData = ($null -ne $vmrCommit -and $null -ne $sourceHeadSha) -$summary.freshness = [ordered]@{ - sourceHeadSha = if ($sourceHeadSha) { Get-ShortSha $sourceHeadSha } else { $null } - compareStatus = $compareStatus - aheadBy = $aheadBy - behindBy = $behindBy - isUpToDate = if ($hasFreshnessData) { ($vmrCommit -eq $sourceHeadSha -or $compareStatus -eq 'identical') } else { $null } -} - -# Force pushes -$summary.forcePushes = [ordered]@{ - count = $forcePushEvents.Count - fetchSucceeded = $forcePushFetchSucceeded - lastActor = if ($lastForcePushActor) { $lastForcePushActor } else { $null } - lastTime = if ($lastForcePushTime) { $lastForcePushTime.ToString("o") } else { $null } -} - -# Warnings -$summary.warnings = [ordered]@{ - conflictCount = $conflictWarnings.Count - conflictFiles = $conflictFiles - conflictMayBeResolved = $conflictMayBeResolved - stalenessCount = $stalenessWarnings.Count - stalenessMayBeResolved = $stalenessMayBeResolved -} - -# Commits -$manualCommitCount = if ($manualCommits) { $manualCommits.Count } else { 0 } -$codeflowLikeCount = if ($codeflowLikeManualCommits) { $codeflowLikeManualCommits.Count } else { 0 } -$summary.commits = [ordered]@{ - total = if ($prCommits) { $prCommits.Count } else { 0 } - manual = $manualCommitCount - codeflowLikeManual = $codeflowLikeCount -} - -# PR age -$summary.age = [ordered]@{ - daysSinceUpdate = [math]::Max(0, [math]::Round($prAgeDays, 1)) - createdAt = $pr.createdAt - updatedAt = $pr.updatedAt -} - -Write-Host "" -Write-Host "[CODEFLOW_SUMMARY]" -Write-Host ($summary | ConvertTo-Json -Depth 4 -Compress) -Write-Host "[/CODEFLOW_SUMMARY]" - -# Ensure clean exit code (gh api failures may leave $LASTEXITCODE = 1) -exit 0