Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions dotnet/test/Harness/CapiProxy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,16 @@ async Task<string> StartCoreAsync()
}
}

public async Task StopAsync()
public async Task StopAsync(bool skipWritingCache = false)
{
if (_startupTask != null)
{
try
{
var url = await _startupTask;
var stopUrl = skipWritingCache ? $"{url}/stop?skipWritingCache=true" : $"{url}/stop";
using var client = new HttpClient();
await client.PostAsync($"{url}/stop", null);
await client.PostAsync(stopUrl, null);
}
catch { /* Best effort */ }
}
Expand Down
4 changes: 3 additions & 1 deletion dotnet/test/Harness/E2ETestContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ public IReadOnlyDictionary<string, string> GetEnvironment()

public async ValueTask DisposeAsync()
{
await _proxy.DisposeAsync();
// Skip writing snapshots in CI to avoid corrupting them on test failures
var isCI = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("CI"));
await _proxy.StopAsync(skipWritingCache: isCI);
Comment on lines +104 to +106
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .NET implementation uses a CI environment variable check as a workaround for lack of test failure detection, while other languages (Go, Python, Node.js) properly detect actual test failures. This means snapshots will always be skipped in CI even for passing tests, which differs from the behavior in other languages and may not align with the PR's stated goal of only skipping on failure. Consider using a more sophisticated approach like capturing test status through ITestOutputHelper or other xUnit extensibility points.

Suggested change
// Skip writing snapshots in CI to avoid corrupting them on test failures
var isCI = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("CI"));
await _proxy.StopAsync(skipWritingCache: isCI);
await _proxy.StopAsync(skipWritingCache: false);

Copilot uses AI. Check for mistakes.

try { if (Directory.Exists(HomeDir)) Directory.Delete(HomeDir, true); } catch { }
try { if (Directory.Exists(WorkDir)) Directory.Delete(WorkDir, true); } catch { }
Expand Down
9 changes: 5 additions & 4 deletions go/e2e/testharness/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
WorkDir string
ProxyURL string

proxy *CapiProxy
proxy *CapiProxy
testFailed bool

Check failure on line 46 in go/e2e/testharness/context.go

View workflow job for this annotation

GitHub Actions / Go SDK Tests (ubuntu-latest)

field testFailed is unused (unused)
}

// NewTestContext creates a new test context with isolated directories and a replaying proxy.
Expand Down Expand Up @@ -82,7 +83,7 @@
}

t.Cleanup(func() {
ctx.Close()
ctx.Close(t.Failed())
})

return ctx
Expand Down Expand Up @@ -113,9 +114,9 @@
}

// Close cleans up the test context resources.
func (c *TestContext) Close() {
func (c *TestContext) Close(testFailed bool) {
if c.proxy != nil {
c.proxy.Stop()
c.proxy.StopWithOptions(testFailed)
}
if c.HomeDir != "" {
os.RemoveAll(c.HomeDir)
Expand Down
12 changes: 11 additions & 1 deletion go/e2e/testharness/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ func (p *CapiProxy) Start() (string, error) {

// Stop gracefully shuts down the proxy server.
func (p *CapiProxy) Stop() error {
return p.StopWithOptions(false)
}

// StopWithOptions gracefully shuts down the proxy server.
// If skipWritingCache is true, the proxy won't write captured exchanges to disk.
func (p *CapiProxy) StopWithOptions(skipWritingCache bool) error {
p.mu.Lock()
defer p.mu.Unlock()

Expand All @@ -84,8 +90,12 @@ func (p *CapiProxy) Stop() error {

// Send stop request to the server
if p.proxyURL != "" {
stopURL := p.proxyURL + "/stop"
if skipWritingCache {
stopURL += "?skipWritingCache=true"
}
// Best effort - ignore errors
resp, err := http.Post(p.proxyURL+"/stop", "application/json", nil)
resp, err := http.Post(stopURL, "application/json", nil)
if err == nil {
resp.Body.Close()
}
Expand Down
7 changes: 5 additions & 2 deletions nodejs/test/e2e/harness/CapiProxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,11 @@ export class CapiProxy {
return await response.json();
}

async stop(): Promise<void> {
const response = await fetch(`${this.proxyUrl}/stop`, { method: "POST" });
async stop(skipWritingCache?: boolean): Promise<void> {
const url = skipWritingCache
? `${this.proxyUrl}/stop?skipWritingCache=true`
: `${this.proxyUrl}/stop`;
const response = await fetch(url, { method: "POST" });
expect(response.ok).toBe(true);
}
}
11 changes: 9 additions & 2 deletions nodejs/test/e2e/harness/sdkTestContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import os from "os";
import { basename, dirname, join, resolve } from "path";
import { rimraf } from "rimraf";
import { fileURLToPath } from "url";
import { afterAll, afterEach, beforeEach, TestContext } from "vitest";
import { afterAll, afterEach, beforeEach, onTestFailed, TestContext } from "vitest";
import { CopilotClient } from "../../../src";
import { CapiProxy } from "./CapiProxy";
import { retry } from "./sdkTestHelper";
Expand Down Expand Up @@ -44,8 +44,15 @@ export async function createSdkTestContext() {

const harness = { homeDir, workDir, openAiEndpoint, copilotClient, env };

// Track if any test fails to avoid writing corrupted snapshots
let anyTestFailed = false;

// Wire up to Vitest lifecycle
beforeEach(async (testContext) => {
onTestFailed(() => {
anyTestFailed = true;
});

await openAiEndpoint.updateConfig({
filePath: getTrafficCapturePath(testContext),
workDir,
Expand All @@ -63,7 +70,7 @@ export async function createSdkTestContext() {

afterAll(async () => {
await copilotClient.stop();
await openAiEndpoint.stop();
await openAiEndpoint.stop(anyTestFailed);
await rmDir("remove e2e test homeDir", homeDir);
await rmDir("remove e2e test workDir", workDir);
});
Expand Down
19 changes: 18 additions & 1 deletion python/e2e/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
"""Shared pytest fixtures for e2e tests."""

import pytest
import pytest_asyncio

from .testharness import E2ETestContext


# Track if any test failed to avoid writing corrupted snapshots
_any_test_failed = False
Comment on lines +9 to +10
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The global _any_test_failed variable is not thread-safe. If pytest runs tests in parallel (e.g., with pytest-xdist), multiple test processes could have race conditions. Consider using pytest's built-in mechanisms or a thread-safe approach to track test failures.

Copilot uses AI. Check for mistakes.


@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
"""Track test failures to avoid writing corrupted snapshots."""
global _any_test_failed
outcome = yield
rep = outcome.get_result()
if rep.when == "call" and rep.failed:
_any_test_failed = True


@pytest_asyncio.fixture(scope="module", loop_scope="module")
async def ctx():
"""Create and teardown a test context shared across all tests in this module."""
global _any_test_failed
_any_test_failed = False # Reset for each module
context = E2ETestContext()
await context.setup()
yield context
await context.teardown()
await context.teardown(test_failed=_any_test_failed)


@pytest_asyncio.fixture(autouse=True, loop_scope="module")
Expand Down
10 changes: 7 additions & 3 deletions python/e2e/testharness/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,18 @@ async def setup(self):
}
)

async def teardown(self):
"""Clean up the test context."""
async def teardown(self, test_failed: bool = False):
"""Clean up the test context.

Args:
test_failed: If True, skip writing snapshots to avoid corruption.
"""
if self._client:
await self._client.stop()
self._client = None

if self._proxy:
await self._proxy.stop()
await self._proxy.stop(skip_writing_cache=test_failed)
self._proxy = None

if self.home_dir and os.path.exists(self.home_dir):
Expand Down
13 changes: 10 additions & 3 deletions python/e2e/testharness/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,23 @@ async def start(self) -> str:
self._proxy_url = match.group(1)
return self._proxy_url

async def stop(self):
"""Gracefully shut down the proxy server."""
async def stop(self, skip_writing_cache: bool = False):
"""Gracefully shut down the proxy server.

Args:
skip_writing_cache: If True, the proxy won't write captured exchanges to disk.
"""
if not self._process:
return

# Send stop request to the server
if self._proxy_url:
try:
stop_url = f"{self._proxy_url}/stop"
if skip_writing_cache:
stop_url += "?skipWritingCache=true"
async with httpx.AsyncClient() as client:
await client.post(f"{self._proxy_url}/stop")
await client.post(stop_url)
except Exception:
pass # Best effort

Expand Down
5 changes: 3 additions & 2 deletions test/harness/replayingCapiProxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,13 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {

// Handle /stop endpoint for stopping the proxy
if (
options.requestOptions.path === "/stop" &&
options.requestOptions.path?.startsWith("/stop") &&
options.requestOptions.method === "POST"
) {
const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true");
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using string .includes() to parse query parameters is fragile and could match unintended patterns (e.g., ?foo=skipWritingCache=true or ?skipWritingCache=true&bar=1). Consider using a proper URL query parameter parser like new URLSearchParams() for more robust parameter extraction.

Suggested change
const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true");
let skipWritingCache = false;
const path = options.requestOptions.path;
const queryIndex = path?.indexOf("?");
if (queryIndex !== undefined && queryIndex !== -1) {
const searchParams = new URLSearchParams(path.substring(queryIndex + 1));
skipWritingCache = searchParams.get("skipWritingCache") === "true";
}

Copilot uses AI. Check for mistakes.
options.onResponseStart(200, {});
options.onResponseEnd();
await this.stop();
await this.stop(skipWritingCache);
process.exit(0);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ conversations:
content: ${system}
- role: user
content: Run 'echo test' and tell me what happens
- role: assistant
content: I'll run the echo command for you.
- role: assistant
tool_calls:
- id: toolcall_0
Expand All @@ -22,6 +20,23 @@ conversations:
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test"}'
- messages:
- role: system
content: ${system}
- role: user
content: Run 'echo test' and tell me what happens
- role: assistant
tool_calls:
- id: toolcall_0
type: function
function:
name: report_intent
arguments: '{"intent":"Running echo command"}'
- id: toolcall_1
type: function
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
content: Intent logged
Expand All @@ -31,5 +46,4 @@ conversations:
test
<exited with exit code 0>
- role: assistant
content: The command executed successfully and printed "test" to the output, then exited with code 0 (indicating
success).
content: The command executed successfully and printed "test" to the console, then exited with code 0 (success).
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ conversations:
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test command"}'
- messages:
- role: system
content: ${system}
- role: user
content: Run 'echo test'. If you can't, say 'failed'.
- role: assistant
tool_calls:
- id: toolcall_0
type: function
function:
name: report_intent
arguments: '{"intent":"Running echo command"}'
- id: toolcall_1
type: function
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test command"}'
- role: tool
tool_call_id: toolcall_0
content: Intent logged
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ conversations:
function:
name: view
arguments: '{"path":"${workdir}/test.txt"}'
- messages:
- role: system
content: ${system}
- role: user
content: Edit test.txt and replace 'original' with 'modified'
- role: assistant
tool_calls:
- id: toolcall_0
type: function
function:
name: report_intent
arguments: '{"intent":"Editing test.txt file"}'
- id: toolcall_1
type: function
function:
name: view
arguments: '{"path":"${workdir}/test.txt"}'
- role: tool
tool_call_id: toolcall_0
content: Intent logged
Expand All @@ -37,4 +54,4 @@ conversations:
tool_call_id: toolcall_2
content: File ${workdir}/test.txt updated with changes.
- role: assistant
content: Done! Replaced 'original' with 'modified' in test.txt.
content: Done! I've replaced 'original' with 'modified' in test.txt.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ conversations:
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test"}'
- messages:
- role: system
content: ${system}
- role: user
content: Run 'echo test'
- role: assistant
tool_calls:
- id: toolcall_0
type: function
function:
name: report_intent
arguments: '{"intent":"Running echo command"}'
- id: toolcall_1
type: function
function:
name: ${shell}
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
content: Intent logged
Expand All @@ -29,4 +46,4 @@ conversations:
test
<exited with exit code 0>
- role: assistant
content: "The command executed successfully and output: **test**"
content: The command executed successfully and output "test".
Loading
Loading