diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs index cfe37cf77..6b96ff67c 100644 --- a/dotnet/src/Client.cs +++ b/dotnet/src/Client.cs @@ -373,7 +373,11 @@ private async Task CleanupConnectionAsync(List? errors) { try { - if (!childProcess.HasExited) childProcess.Kill(); + if (!childProcess.HasExited) + { + childProcess.Kill(entireProcessTree: true); + await childProcess.WaitForExitAsync(); + } childProcess.Dispose(); } catch (Exception ex) { errors?.Add(ex); } @@ -1090,7 +1094,7 @@ internal static async Task InvokeRpcAsync(JsonRpc rpc, string method, obje if (!string.IsNullOrEmpty(stderrOutput)) { - throw new IOException($"CLI process exited unexpectedly.\nstderr: {stderrOutput}", ex); + throw new IOException(FormatCliExitedMessage("CLI process exited unexpectedly.", stderrOutput), ex); } throw new IOException($"Communication error with Copilot CLI: {ex.Message}", ex); } @@ -1100,6 +1104,24 @@ internal static async Task InvokeRpcAsync(JsonRpc rpc, string method, obje } } + private static string FormatCliExitedMessage(string message, string stderrOutput) + { + return string.IsNullOrEmpty(stderrOutput) + ? message + : $"{message}\nstderr: {stderrOutput}"; + } + + private static IOException CreateCliExitedException(string message, StringBuilder stderrBuffer) + { + string stderrOutput; + lock (stderrBuffer) + { + stderrOutput = stderrBuffer.ToString().Trim(); + } + + return new IOException(FormatCliExitedMessage(message, stderrOutput)); + } + private Task EnsureConnectedAsync(CancellationToken cancellationToken) { if (_connectionTask is null && !_options.AutoStart) @@ -1152,7 +1174,7 @@ private async Task VerifyProtocolVersionAsync(Connection connection, Cancellatio connection.Rpc, "connect", [new ConnectRequest { Token = _effectiveConnectionToken }], connection.StderrBuffer, cancellationToken); serverVersion = (int)connectResponse.ProtocolVersion; } - catch (RemoteRpcException ex) when (ex.ErrorCode == RemoteRpcException.MethodNotFoundErrorCode) + catch (IOException ex) when (ex.InnerException is RemoteRpcException remoteEx && IsUnsupportedConnectMethod(remoteEx)) { // Legacy server without `connect`; fall back to `ping`. A token, if any, // is silently dropped — the legacy server can't enforce one. @@ -1180,6 +1202,12 @@ private async Task VerifyProtocolVersionAsync(Connection connection, Cancellatio _negotiatedProtocolVersion = serverVersion.Value; } + private static bool IsUnsupportedConnectMethod(RemoteRpcException ex) + { + return ex.ErrorCode == RemoteRpcException.MethodNotFoundErrorCode + || string.Equals(ex.Message, "Unhandled method connect", StringComparison.Ordinal); + } + private static async Task<(Process Process, int? DetectedLocalhostTcpPort, StringBuilder StderrBuffer)> StartCliServerAsync(CopilotClientOptions options, string? connectionToken, ILogger logger, CancellationToken cancellationToken) { // Use explicit path, COPILOT_CLI_PATH env var (from options.Environment or process env), or bundled CLI - no PATH fallback @@ -1282,22 +1310,24 @@ private async Task VerifyProtocolVersionAsync(Connection connection, Cancellatio // Capture stderr for error messages and forward to logger var stderrBuffer = new StringBuilder(); - _ = Task.Run(async () => + var stderrReader = Task.Run(async () => { - while (cliProcess != null && !cliProcess.HasExited) + while (true) { var line = await cliProcess.StandardError.ReadLineAsync(cancellationToken); - if (line != null) + if (line is null) { - lock (stderrBuffer) - { - stderrBuffer.AppendLine(line); - } + break; + } - if (logger.IsEnabled(LogLevel.Debug)) - { - logger.LogDebug("[CLI] {Line}", line); - } + lock (stderrBuffer) + { + stderrBuffer.AppendLine(line); + } + + if (logger.IsEnabled(LogLevel.Debug)) + { + logger.LogDebug("[CLI] {Line}", line); } } }, cancellationToken); @@ -1311,7 +1341,13 @@ private async Task VerifyProtocolVersionAsync(Connection connection, Cancellatio while (!cts.Token.IsCancellationRequested) { - var line = await cliProcess.StandardOutput.ReadLineAsync(cts.Token) ?? throw new IOException("CLI process exited unexpectedly"); + var line = await cliProcess.StandardOutput.ReadLineAsync(cts.Token); + if (line is null) + { + await stderrReader; + throw CreateCliExitedException("CLI process exited unexpectedly", stderrBuffer); + } + if (ListeningOnPortRegex().Match(line) is { Success: true } match) { detectedLocalhostTcpPort = int.Parse(match.Groups[1].Value, CultureInfo.InvariantCulture); diff --git a/dotnet/test/E2E/AbortE2ETests.cs b/dotnet/test/E2E/AbortE2ETests.cs new file mode 100644 index 000000000..910038d1b --- /dev/null +++ b/dotnet/test/E2E/AbortE2ETests.cs @@ -0,0 +1,138 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using System.ComponentModel; +using GitHub.Copilot.SDK.Test.Harness; +using Microsoft.Extensions.AI; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test.E2E; + +/// +/// Verifies that cleanly interrupts an active +/// turn — both during streaming and during tool execution — without leaving dangling +/// state or causing exceptions in the event delivery pipeline. +/// +public class AbortE2ETests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "abort", output) +{ + [Fact] + public async Task Should_Abort_During_Active_Streaming() + { + var session = await CreateSessionAsync(new SessionConfig { Streaming = true }); + + var firstDeltaReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var allEvents = new List(); + + session.On(evt => + { + lock (allEvents) { allEvents.Add(evt); } + if (evt is AssistantMessageDeltaEvent delta) + { + firstDeltaReceived.TrySetResult(delta); + } + }); + + // Fire-and-forget — we'll abort before it finishes + _ = session.SendAsync(new MessageOptions + { + Prompt = "Write a very long essay about the history of computing, covering every decade from the 1940s to the 2020s in great detail.", + }); + + // Wait for at least one delta to arrive (proves streaming started) + var delta = await firstDeltaReceived.Task.WaitAsync(TimeSpan.FromSeconds(60)); + Assert.False(string.IsNullOrEmpty(delta.Data.DeltaContent)); + + // Now abort mid-stream + await session.AbortAsync(); + + List snapshot; + lock (allEvents) { snapshot = [.. allEvents]; } + + // No session.idle should have appeared (abort cancels the turn) + // OR if idle DID appear, it should be after the abort, which is fine + // The key contract: no exceptions were thrown, and the session is usable afterwards + var types = snapshot.Select(e => e.Type).ToList(); + Assert.Contains("assistant.message_delta", types); + + // Session should be usable after abort — verify by listening for the + // recovery message rather than racing against a late idle from the + // aborted streaming turn. + var recoveryReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + session.On(evt => + { + if (evt is AssistantMessageEvent msg && (msg.Data.Content?.Contains("abort_recovery_ok") == true)) + { + recoveryReceived.TrySetResult(msg); + } + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Say 'abort_recovery_ok'.", + }); + + var recoveryMessage = await recoveryReceived.Task.WaitAsync(TimeSpan.FromSeconds(60)); + Assert.Contains("abort_recovery_ok", recoveryMessage.Data.Content?.ToLowerInvariant() ?? string.Empty); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Abort_During_Active_Tool_Execution() + { + var toolStarted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var releaseTool = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(SlowTool, "slow_analysis")], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + // Fire-and-forget + _ = session.SendAsync(new MessageOptions + { + Prompt = "Use slow_analysis with value 'test_abort'. Wait for the result.", + }); + + // Wait for the tool to start executing + var toolValue = await toolStarted.Task.WaitAsync(TimeSpan.FromSeconds(60)); + Assert.Equal("test_abort", toolValue); + + // Abort while the tool is running + await session.AbortAsync(); + + // Release the tool so its task doesn't leak + releaseTool.TrySetResult("RELEASED_AFTER_ABORT"); + + // Session should be usable after abort — verify by listening for the right event + var recoveryReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + session.On(evt => + { + if (evt is AssistantMessageEvent msg && (msg.Data.Content?.Contains("tool_abort_recovery_ok") == true)) + { + recoveryReceived.TrySetResult(msg); + } + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Say 'tool_abort_recovery_ok'.", + }); + + var recoveryMessage = await recoveryReceived.Task.WaitAsync(TimeSpan.FromSeconds(60)); + Assert.Contains("tool_abort_recovery_ok", recoveryMessage.Data.Content?.ToLowerInvariant() ?? string.Empty); + + await session.DisposeAsync(); + + [Description("A slow analysis tool that blocks until released")] + async Task SlowTool([Description("Value to analyze")] string value) + { + toolStarted.TrySetResult(value); + return await releaseTool.Task; + } + } +} diff --git a/dotnet/test/E2E/ClientE2ETests.cs b/dotnet/test/E2E/ClientE2ETests.cs index f1b60e685..ff3b4e672 100644 --- a/dotnet/test/E2E/ClientE2ETests.cs +++ b/dotnet/test/E2E/ClientE2ETests.cs @@ -10,10 +10,12 @@ namespace GitHub.Copilot.SDK.Test.E2E; // Other test classes should instead inherit from E2ETestBase public class ClientE2ETests { - [Fact] - public async Task Should_Start_And_Connect_To_Server_Using_Stdio() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Start_And_Connect_To_Server(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions { UseStdio = true }); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); try { @@ -33,31 +35,12 @@ public async Task Should_Start_And_Connect_To_Server_Using_Stdio() } } - [Fact] - public async Task Should_Start_And_Connect_To_Server_Using_Tcp() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Force_Stop_Without_Cleanup(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions { UseStdio = false }); - - try - { - await client.StartAsync(); - Assert.Equal(ConnectionState.Connected, client.State); - - var pong = await client.PingAsync("test message"); - Assert.Equal("pong: test message", pong.Message); - - await client.StopAsync(); - } - finally - { - await client.ForceStopAsync(); - } - } - - [Fact] - public async Task Should_Force_Stop_Without_Cleanup() - { - using var client = new CopilotClient(new CopilotClientOptions()); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); await client.CreateSessionAsync(new SessionConfig { OnPermissionRequest = PermissionHandler.ApproveAll }); await client.ForceStopAsync(); @@ -65,10 +48,12 @@ public async Task Should_Force_Stop_Without_Cleanup() Assert.Equal(ConnectionState.Disconnected, client.State); } - [Fact] - public async Task Should_Get_Status_With_Version_And_Protocol_Info() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Get_Status_With_Version_And_Protocol_Info(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions { UseStdio = true }); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); try { @@ -87,10 +72,12 @@ public async Task Should_Get_Status_With_Version_And_Protocol_Info() } } - [Fact] - public async Task Should_Get_Auth_Status() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Get_Auth_Status(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions { UseStdio = true }); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); try { @@ -112,10 +99,12 @@ public async Task Should_Get_Auth_Status() } } - [Fact] - public async Task Should_List_Models_When_Authenticated() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_List_Models_When_Authenticated(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions { UseStdio = true }); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); try { @@ -148,22 +137,26 @@ public async Task Should_List_Models_When_Authenticated() } } - [Fact] - public async Task Should_Not_Throw_When_Disposing_Session_After_Stopping_Client() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Not_Throw_When_Disposing_Session_After_Stopping_Client(bool useStdio) { - await using var client = new CopilotClient(new CopilotClientOptions()); + await using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); await using var session = await client.CreateSessionAsync(new SessionConfig { OnPermissionRequest = PermissionHandler.ApproveAll }); await client.StopAsync(); } - [Fact] - public async Task Should_Report_Error_With_Stderr_When_CLI_Fails_To_Start() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Report_Error_With_Stderr_When_CLI_Fails_To_Start(bool useStdio) { var client = new CopilotClient(new CopilotClientOptions { CliArgs = ["--nonexistent-flag-for-testing"], - UseStdio = true + UseStdio = useStdio }); var ex = await Assert.ThrowsAsync(() => client.StartAsync()); @@ -185,10 +178,12 @@ public async Task Should_Report_Error_With_Stderr_When_CLI_Fails_To_Start() try { await client.ForceStopAsync(); } catch (Exception) { /* Expected */ } } - [Fact] - public async Task Should_Throw_When_CreateSession_Called_Without_PermissionHandler() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Throw_When_CreateSession_Called_Without_PermissionHandler(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions()); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); var ex = await Assert.ThrowsAsync(() => client.CreateSessionAsync(new SessionConfig())); @@ -196,10 +191,12 @@ public async Task Should_Throw_When_CreateSession_Called_Without_PermissionHandl Assert.Contains("is required", ex.Message); } - [Fact] - public async Task Should_Throw_When_ResumeSession_Called_Without_PermissionHandler() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task Should_Throw_When_ResumeSession_Called_Without_PermissionHandler(bool useStdio) { - using var client = new CopilotClient(new CopilotClientOptions()); + using var client = new CopilotClient(new CopilotClientOptions { UseStdio = useStdio }); var ex = await Assert.ThrowsAsync(() => client.ResumeSessionAsync("some-session-id", new())); @@ -207,8 +204,10 @@ public async Task Should_Throw_When_ResumeSession_Called_Without_PermissionHandl Assert.Contains("is required", ex.Message); } - [Fact] - public async Task ListModels_WithCustomHandler_CallsHandler() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task ListModels_WithCustomHandler_CallsHandler(bool useStdio) { IList customModels = new List { @@ -227,6 +226,7 @@ public async Task ListModels_WithCustomHandler_CallsHandler() var callCount = 0; await using var client = new CopilotClient(new CopilotClientOptions { + UseStdio = useStdio, OnListModels = (ct) => { callCount++; @@ -241,8 +241,10 @@ public async Task ListModels_WithCustomHandler_CallsHandler() Assert.Equal("my-custom-model", models[0].Id); } - [Fact] - public async Task ListModels_WithCustomHandler_CachesResults() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task ListModels_WithCustomHandler_CachesResults(bool useStdio) { IList customModels = new List { @@ -261,6 +263,7 @@ public async Task ListModels_WithCustomHandler_CachesResults() var callCount = 0; await using var client = new CopilotClient(new CopilotClientOptions { + UseStdio = useStdio, OnListModels = (ct) => { callCount++; @@ -274,8 +277,10 @@ public async Task ListModels_WithCustomHandler_CachesResults() Assert.Equal(1, callCount); // Only called once due to caching } - [Fact] - public async Task ListModels_WithCustomHandler_WorksWithoutStart() + [Theory] + [InlineData(true)] // stdio transport + [InlineData(false)] // TCP transport + public async Task ListModels_WithCustomHandler_WorksWithoutStart(bool useStdio) { IList customModels = new List { @@ -294,6 +299,7 @@ public async Task ListModels_WithCustomHandler_WorksWithoutStart() var callCount = 0; await using var client = new CopilotClient(new CopilotClientOptions { + UseStdio = useStdio, OnListModels = (ct) => { callCount++; diff --git a/dotnet/test/E2E/ClientLifecycleE2ETests.cs b/dotnet/test/E2E/ClientLifecycleE2ETests.cs index f93f6e71a..7026093f8 100644 --- a/dotnet/test/E2E/ClientLifecycleE2ETests.cs +++ b/dotnet/test/E2E/ClientLifecycleE2ETests.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using GitHub.Copilot.SDK.Rpc; using Xunit; using Xunit.Abstractions; @@ -22,7 +23,7 @@ public async Task Should_Receive_Session_Created_Lifecycle_Event() } }); - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var evt = await created.Task.WaitAsync(TimeSpan.FromSeconds(10)); Assert.Equal(SessionLifecycleEventTypes.Created, evt.Type); @@ -35,7 +36,7 @@ public async Task Should_Filter_Session_Lifecycle_Events_By_Type() var created = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); using var subscription = Client.On(SessionLifecycleEventTypes.Created, evt => created.TrySetResult(evt)); - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var evt = await created.Task.WaitAsync(TimeSpan.FromSeconds(10)); Assert.Equal(SessionLifecycleEventTypes.Created, evt.Type); @@ -51,7 +52,7 @@ public async Task Disposing_Lifecycle_Subscription_Stops_Receiving_Events() subscription.Dispose(); using var activeSubscription = Client.On(SessionLifecycleEventTypes.Created, evt => created.TrySetResult(evt)); - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var evt = await created.Task.WaitAsync(TimeSpan.FromSeconds(10)); Assert.Equal(session.SessionId, evt.SessionId); @@ -80,4 +81,60 @@ public async Task Dispose_Disconnects_Client_And_Disposes_Rpc_Surface(bool useAs Assert.Equal(ConnectionState.Disconnected, client.State); Assert.Throws(() => client.Rpc); } + + [Fact] + public async Task Should_Receive_Session_Updated_Lifecycle_Event_For_Non_Ephemeral_Activity() + { + await using var session = await CreateSessionAsync(); + + var updated = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + using var subscription = Client.On(SessionLifecycleEventTypes.Updated, evt => + { + if (string.Equals(evt.SessionId, session.SessionId, StringComparison.Ordinal)) + { + updated.TrySetResult(evt); + } + }); + + // session.mode.set emits a non-ephemeral session.mode_changed event, + // which the runtime forwards as session.updated to lifecycle subscribers. + await session.Rpc.Mode.SetAsync(SessionMode.Plan); + + var evt = await updated.Task.WaitAsync(TimeSpan.FromSeconds(15)); + Assert.Equal(SessionLifecycleEventTypes.Updated, evt.Type); + Assert.Equal(session.SessionId, evt.SessionId); + } + + [Fact] + public async Task Should_Receive_Session_Deleted_Lifecycle_Event_When_Deleted() + { + var session = await CreateSessionAsync(); + var sessionId = session.SessionId; + + // The runtime persists session state to disk only after the first user.message + // (LocalSessionManager.SessionWriter gates flushing on shouldSaveSession). + // session.delete fails with "Session file not found" otherwise, so prime + // persistence with a real LLM round-trip first. + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say SESSION_DELETED_OK exactly." }); + + var deleted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + using var subscription = Client.On(SessionLifecycleEventTypes.Deleted, evt => + { + if (string.Equals(evt.SessionId, sessionId, StringComparison.Ordinal)) + { + deleted.TrySetResult(evt); + } + }); + + // Do NOT DisposeAsync the session before deleting: dispose sends session.destroy + // which closes in-memory state but does not remove the disk file; calling + // delete afterwards still succeeds, but skipping dispose keeps the test minimal. + await Client.DeleteSessionAsync(sessionId); + + var evt = await deleted.Task.WaitAsync(TimeSpan.FromSeconds(15)); + Assert.Equal(SessionLifecycleEventTypes.Deleted, evt.Type); + Assert.Equal(sessionId, evt.SessionId); + + await session.DisposeAsync(); + } } diff --git a/dotnet/test/E2E/ClientOptionsE2ETests.cs b/dotnet/test/E2E/ClientOptionsE2ETests.cs index dfdc41878..724367aa9 100644 --- a/dotnet/test/E2E/ClientOptionsE2ETests.cs +++ b/dotnet/test/E2E/ClientOptionsE2ETests.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using System.Diagnostics; using System.Net; using System.Net.Sockets; using System.Text.Json; @@ -90,8 +91,7 @@ public async Task Should_Use_Client_Cwd_For_Default_WorkingDirectory() [Fact] public async Task Should_Propagate_Process_Options_To_Spawned_Cli() { - var cliPath = Path.Join(Ctx.WorkDir, $"fake-cli-{Guid.NewGuid():N}.js"); - var capturePath = Path.Join(Ctx.WorkDir, $"fake-cli-capture-{Guid.NewGuid():N}.json"); + var (cliPath, capturePath) = await CreateFakeCliCaptureAsync(); var telemetryPath = Path.Join(Ctx.WorkDir, "telemetry.jsonl"); var copilotHomeFromEnv = Path.Join(Ctx.WorkDir, "copilot-home-from-env"); var copilotHomeFromOption = Path.Join(Ctx.WorkDir, "copilot-home-from-option"); @@ -151,17 +151,91 @@ public async Task Should_Propagate_Process_Options_To_Spawned_Cli() }); using var updatedCapture = JsonDocument.Parse(await File.ReadAllTextAsync(capturePath)); - var createRequest = updatedCapture.RootElement - .GetProperty("requests") - .EnumerateArray() - .Single(request => request.GetProperty("method").GetString() == "session.create") - .GetProperty("params"); + var createRequest = GetCapturedRequestParams(updatedCapture.RootElement, "session.create"); Assert.True(createRequest.GetProperty("enableConfigDiscovery").GetBoolean()); Assert.False(createRequest.GetProperty("includeSubAgentStreamingEvents").GetBoolean()); await session.DisposeAsync(); } + [Fact] + public async Task Should_Propagate_Activity_TraceContext_To_Session_Create_And_Send() + { + var (cliPath, capturePath) = await CreateFakeCliCaptureAsync(); + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + AutoStart = false, + CliPath = cliPath, + CliArgs = ["--capture-file", capturePath], + UseLoggedInUser = false, + }); + + await client.StartAsync(); + + using var activity = new Activity("dotnet-sdk-trace-create-send"); + activity.SetIdFormat(ActivityIdFormat.W3C); + activity.TraceStateString = "vendor=create-send"; + activity.Start(); + + var session = await client.CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var messageId = await session.SendAsync(new MessageOptions + { + Prompt = "Trace this message.", + }); + + Assert.Equal("fake-message", messageId); + + using var capture = JsonDocument.Parse(await File.ReadAllTextAsync(capturePath)); + var createRequest = GetCapturedRequestParams(capture.RootElement, "session.create"); + var sendRequest = GetCapturedRequestParams(capture.RootElement, "session.send"); + + Assert.Equal(activity.Id, createRequest.GetProperty("traceparent").GetString()); + Assert.Equal("vendor=create-send", createRequest.GetProperty("tracestate").GetString()); + Assert.Equal(activity.Id, sendRequest.GetProperty("traceparent").GetString()); + Assert.Equal("vendor=create-send", sendRequest.GetProperty("tracestate").GetString()); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Propagate_Activity_TraceContext_To_Session_Resume() + { + var (cliPath, capturePath) = await CreateFakeCliCaptureAsync(); + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + AutoStart = false, + CliPath = cliPath, + CliArgs = ["--capture-file", capturePath], + UseLoggedInUser = false, + }); + + await client.StartAsync(); + + using var activity = new Activity("dotnet-sdk-trace-resume"); + activity.SetIdFormat(ActivityIdFormat.W3C); + activity.TraceStateString = "vendor=resume"; + activity.Start(); + + var session = await client.ResumeSessionAsync("trace-resume-session", new ResumeSessionConfig + { + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + using var capture = JsonDocument.Parse(await File.ReadAllTextAsync(capturePath)); + var resumeRequest = GetCapturedRequestParams(capture.RootElement, "session.resume"); + + Assert.Equal(activity.Id, resumeRequest.GetProperty("traceparent").GetString()); + Assert.Equal("vendor=resume", resumeRequest.GetProperty("tracestate").GetString()); + + await session.DisposeAsync(); + } + [Fact] public void Should_Accept_GitHubToken_Option() { @@ -271,6 +345,23 @@ private static void AssertArgumentValue(string?[] args, string name, string expe Assert.Equal(expectedValue, args[index + 1]); } + private async Task<(string CliPath, string CapturePath)> CreateFakeCliCaptureAsync() + { + var cliPath = Path.Join(Ctx.WorkDir, $"fake-cli-{Guid.NewGuid():N}.js"); + var capturePath = Path.Join(Ctx.WorkDir, $"fake-cli-capture-{Guid.NewGuid():N}.json"); + await File.WriteAllTextAsync(cliPath, FakeStdioCliScript); + return (cliPath, capturePath); + } + + private static JsonElement GetCapturedRequestParams(JsonElement captureRoot, string method) + { + return captureRoot + .GetProperty("requests") + .EnumerateArray() + .Single(request => request.GetProperty("method").GetString() == method) + .GetProperty("params"); + } + private const string FakeStdioCliScript = """ const fs = require("fs"); @@ -361,6 +452,17 @@ function handleMessage(message) { return; } + if (message.method === "session.resume") { + const sessionId = message.params?.sessionId ?? message.params?.[0]?.sessionId ?? "fake-session"; + writeResponse(message.id, { sessionId, workspacePath: null, capabilities: null }); + return; + } + + if (message.method === "session.send") { + writeResponse(message.id, { messageId: "fake-message" }); + return; + } + writeResponse(message.id, {}); } diff --git a/dotnet/test/E2E/CompactionE2ETests.cs b/dotnet/test/E2E/CompactionE2ETests.cs index e6a9d04e2..abee92219 100644 --- a/dotnet/test/E2E/CompactionE2ETests.cs +++ b/dotnet/test/E2E/CompactionE2ETests.cs @@ -2,7 +2,6 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ -using System.Runtime.InteropServices; using GitHub.Copilot.SDK.Test.Harness; using Xunit; using Xunit.Abstractions; @@ -11,38 +10,33 @@ namespace GitHub.Copilot.SDK.Test.E2E; public class CompactionE2ETests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "compaction", output) { - [Fact(Skip = "Compaction tests are skipped due to flakiness — re-enable once stabilized")] + private static readonly TimeSpan CompactionTimeout = TimeSpan.FromSeconds(60); + + [Fact] public async Task Should_Trigger_Compaction_With_Low_Threshold_And_Emit_Events() { - // Create session with very low compaction thresholds to trigger compaction quickly - var session = await CreateSessionAsync(new SessionConfig + await using var session = await CreateSessionAsync(new SessionConfig { InfiniteSessions = new InfiniteSessionConfig { Enabled = true, - // Trigger background compaction at 0.5% context usage (~1000 tokens) BackgroundCompactionThreshold = 0.005, - // Block at 1% to ensure compaction runs BufferExhaustionThreshold = 0.01 } }); - var compactionStartEvents = new List(); - var compactionCompleteEvents = new List(); - - session.On(evt => - { - if (evt is SessionCompactionStartEvent startEvt) - { - compactionStartEvents.Add(startEvt); - } - if (evt is SessionCompactionCompleteEvent completeEvt) - { - compactionCompleteEvents.Add(completeEvt); - } - }); + // The first prompt leaves the session below the compaction processor's minimum + // message count. The second prompt is therefore the first deterministic point + // at which low thresholds can trigger compaction. + var compactionStarted = TestHelper.GetNextEventOfTypeAsync( + session, + CompactionTimeout); + var compactionCompleted = TestHelper.GetNextEventOfTypeAsync( + session, + evt => evt.Data.Success, + CompactionTimeout, + timeoutDescription: "successful compaction completion"); - // Send multiple messages to fill up the context window await session.SendAndWaitAsync(new MessageOptions { Prompt = "Tell me a story about a dragon. Be detailed." @@ -51,40 +45,37 @@ await session.SendAndWaitAsync(new MessageOptions { Prompt = "Continue the story with more details about the dragon's castle." }); - await session.SendAndWaitAsync(new MessageOptions - { - Prompt = "Now describe the dragon's treasure in great detail." - }); - // Should have triggered compaction at least once - Assert.True(compactionStartEvents.Count >= 1, "Expected at least 1 compaction_start event"); - Assert.True(compactionCompleteEvents.Count >= 1, "Expected at least 1 compaction_complete event"); + var startEvent = await compactionStarted; + var completeEvent = await compactionCompleted; - // Compaction should have succeeded - var lastComplete = compactionCompleteEvents[^1]; - Assert.True(lastComplete.Data.Success, "Expected compaction to succeed"); + Assert.True(startEvent.Data.ConversationTokens.GetValueOrDefault() > 0, "Expected compaction to report conversation tokens at start"); + Assert.True(completeEvent.Data.Success, "Expected compaction to succeed"); + Assert.NotNull(completeEvent.Data.CompactionTokensUsed); + Assert.True(completeEvent.Data.CompactionTokensUsed!.InputTokens.GetValueOrDefault() > 0, "Expected compaction call to consume input tokens"); + Assert.Contains("", completeEvent.Data.SummaryContent ?? string.Empty, StringComparison.OrdinalIgnoreCase); + Assert.Contains("", completeEvent.Data.SummaryContent ?? string.Empty, StringComparison.OrdinalIgnoreCase); + Assert.Contains("", completeEvent.Data.SummaryContent ?? string.Empty, StringComparison.OrdinalIgnoreCase); - // Should have removed some tokens - if (lastComplete.Data.TokensRemoved.HasValue) + await session.SendAndWaitAsync(new MessageOptions { - Assert.True(lastComplete.Data.TokensRemoved > 0, "Expected tokensRemoved > 0"); - } + Prompt = "Now describe the dragon's treasure in great detail." + }); - // Verify the session still works after compaction var answer = await session.SendAndWaitAsync(new MessageOptions { Prompt = "What was the story about?" }); - Assert.NotNull(answer); - Assert.NotNull(answer!.Data.Content); - // Should remember it was about a dragon (context preserved via summary) - Assert.Contains("dragon", answer.Data.Content.ToLower()); + + var content = answer?.Data.Content ?? string.Empty; + Assert.Contains("Kaedrith", content, StringComparison.OrdinalIgnoreCase); + Assert.Contains("dragon", content, StringComparison.OrdinalIgnoreCase); } - [Fact(Skip = "Compaction tests are skipped due to flakiness — re-enable once stabilized")] + [Fact] public async Task Should_Not_Emit_Compaction_Events_When_Infinite_Sessions_Disabled() { - var session = await CreateSessionAsync(new SessionConfig + await using var session = await CreateSessionAsync(new SessionConfig { InfiniteSessions = new InfiniteSessionConfig { diff --git a/dotnet/test/E2E/ElicitationE2ETests.cs b/dotnet/test/E2E/ElicitationE2ETests.cs index c14e8fa8a..fb6469ecf 100644 --- a/dotnet/test/E2E/ElicitationE2ETests.cs +++ b/dotnet/test/E2E/ElicitationE2ETests.cs @@ -91,34 +91,27 @@ public async Task Sends_RequestElicitation_When_Handler_Provided() await session.DisposeAsync(); } - [Fact] - public async Task Session_With_ElicitationHandler_Reports_Elicitation_Capability() + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task Should_Report_Elicitation_Capability_Based_On_Handler_Presence(bool hasHandler) { - var session = await CreateSessionAsync(new SessionConfig + var config = new SessionConfig { OnPermissionRequest = PermissionHandler.ApproveAll, - OnElicitationRequest = _ => Task.FromResult(new ElicitationResult + }; + + if (hasHandler) + { + config.OnElicitationRequest = _ => Task.FromResult(new ElicitationResult { Action = UIElicitationResponseAction.Accept, Content = new Dictionary(), - }), - }); - - Assert.True(session.Capabilities.Ui?.Elicitation == true, - "Session with onElicitationRequest should report elicitation capability"); - await session.DisposeAsync(); - } - - [Fact] - public async Task Session_Without_ElicitationHandler_Reports_No_Capability() - { - var session = await CreateSessionAsync(new SessionConfig - { - OnPermissionRequest = PermissionHandler.ApproveAll, - }); + }); + } - Assert.True(session.Capabilities.Ui?.Elicitation != true, - "Session without onElicitationRequest should not report elicitation capability"); + var session = await CreateSessionAsync(config); + Assert.Equal(hasHandler, session.Capabilities.Ui?.Elicitation == true); await session.DisposeAsync(); } diff --git a/dotnet/test/E2E/EventFidelityE2ETests.cs b/dotnet/test/E2E/EventFidelityE2ETests.cs index ccc9316ac..163a6a6a1 100644 --- a/dotnet/test/E2E/EventFidelityE2ETests.cs +++ b/dotnet/test/E2E/EventFidelityE2ETests.cs @@ -86,6 +86,76 @@ await session.SendAndWaitAsync(new MessageOptions await session.DisposeAsync(); } + [Fact] + public async Task Should_Emit_Assistant_Usage_Event_After_Model_Call() + { + var session = await CreateSessionAsync(); + var events = new List(); + session.On(evt => { lock (events) { events.Add(evt); } }); + + await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "What is 5+5? Reply with just the number.", + }); + + AssistantUsageEvent? usageEvent; + lock (events) { usageEvent = events.OfType().LastOrDefault(); } + + Assert.NotNull(usageEvent); + Assert.False(string.IsNullOrWhiteSpace(usageEvent!.Data.Model)); + Assert.NotEqual(Guid.Empty, usageEvent.Id); + Assert.NotEqual(default, usageEvent.Timestamp); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Emit_Session_Usage_Info_Event_After_Model_Call() + { + var session = await CreateSessionAsync(); + var events = new List(); + session.On(evt => { lock (events) { events.Add(evt); } }); + + await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "What is 5+5? Reply with just the number.", + }); + + SessionUsageInfoEvent? usageInfoEvent; + lock (events) { usageInfoEvent = events.OfType().LastOrDefault(); } + + Assert.NotNull(usageInfoEvent); + Assert.True(usageInfoEvent!.Data.CurrentTokens > 0); + Assert.True(usageInfoEvent.Data.MessagesLength > 0); + Assert.True(usageInfoEvent.Data.TokenLimit > 0); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Emit_Pending_Messages_Modified_Event_When_Message_Queue_Changes() + { + var session = await CreateSessionAsync(); + var pendingMessagesModified = TestHelper.GetNextEventOfTypeAsync( + session, + static _ => true, + timeout: TimeSpan.FromSeconds(60), + timeoutDescription: "pending_messages.modified event"); + + await session.SendAsync(new MessageOptions + { + Prompt = "What is 9+9? Reply with just the number.", + }); + + var pendingEvent = await pendingMessagesModified; + var answer = await TestHelper.GetFinalAssistantMessageAsync(session); + + Assert.NotNull(pendingEvent); + Assert.Contains("18", answer?.Data.Content ?? string.Empty); + + await session.DisposeAsync(); + } + [Fact] public async Task Should_Emit_Tool_Execution_Events_With_Correct_Fields() { @@ -142,4 +212,49 @@ await session.SendAndWaitAsync(new MessageOptions await session.DisposeAsync(); } + + [Fact] + public async Task Should_Preserve_Message_Order_In_GetMessages_After_Tool_Use() + { + await File.WriteAllTextAsync(Path.Join(Ctx.WorkDir, "order.txt"), "ORDER_CONTENT_42"); + + var session = await CreateSessionAsync(); + + await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Read the file 'order.txt' and tell me what the number is.", + }); + + var messages = await session.GetMessagesAsync(); + var types = messages.Select(m => m.Type).ToList(); + + // Verify complete event ordering contract: + // session.start → user.message → tool.execution_start → tool.execution_complete → assistant.message + var sessionStartIdx = types.IndexOf("session.start"); + var userMsgIdx = types.IndexOf("user.message"); + var toolStartIdx = types.IndexOf("tool.execution_start"); + var toolCompleteIdx = types.IndexOf("tool.execution_complete"); + var assistantMsgIdx = types.LastIndexOf("assistant.message"); + + Assert.True(sessionStartIdx >= 0, "Expected session.start event"); + Assert.True(userMsgIdx >= 0, "Expected user.message event"); + Assert.True(toolStartIdx >= 0, "Expected tool.execution_start event"); + Assert.True(toolCompleteIdx >= 0, "Expected tool.execution_complete event"); + Assert.True(assistantMsgIdx >= 0, "Expected assistant.message event"); + + Assert.True(sessionStartIdx < userMsgIdx, "session.start should precede user.message"); + Assert.True(userMsgIdx < toolStartIdx, "user.message should precede tool.execution_start"); + Assert.True(toolStartIdx < toolCompleteIdx, "tool.execution_start should precede tool.execution_complete"); + Assert.True(toolCompleteIdx < assistantMsgIdx, "tool.execution_complete should precede final assistant.message"); + + // Verify user.message has our content + var userEvent = messages.OfType().First(); + Assert.Contains("order.txt", userEvent.Data.Content ?? string.Empty); + + // Verify assistant.message references the file content + var assistantEvent = messages.OfType().Last(); + Assert.Contains("42", assistantEvent.Data.Content ?? string.Empty); + + await session.DisposeAsync(); + } } diff --git a/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs b/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs index 0f08ea559..a6627302b 100644 --- a/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs +++ b/dotnet/test/E2E/HookLifecycleAndOutputE2ETests.cs @@ -82,6 +82,7 @@ public async Task Should_Invoke_OnUserPromptSubmitted_Hook_When_Sending_A_Messag public async Task Should_Invoke_OnSessionEnd_Hook_When_Session_Is_Disconnected() { var sessionEndInputs = new List(); + var sessionEndHookInvoked = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); CopilotSession? session = null; session = await CreateSessionAsync(new SessionConfig { @@ -90,6 +91,7 @@ public async Task Should_Invoke_OnSessionEnd_Hook_When_Session_Is_Disconnected() OnSessionEnd = (input, invocation) => { sessionEndInputs.Add(input); + sessionEndHookInvoked.TrySetResult(input); Assert.Equal(session!.SessionId, invocation.SessionId); return Task.FromResult(null); }, @@ -100,9 +102,7 @@ public async Task Should_Invoke_OnSessionEnd_Hook_When_Session_Is_Disconnected() await session.DisposeAsync(); - // Wait briefly for the async hook to fire - await Task.Delay(200); - + await sessionEndHookInvoked.Task.WaitAsync(TimeSpan.FromSeconds(10)); Assert.NotEmpty(sessionEndInputs); } diff --git a/dotnet/test/E2E/MultiClientCommandsElicitationE2ETests.cs b/dotnet/test/E2E/MultiClientCommandsElicitationE2ETests.cs index 4e3711650..5d70f51b1 100644 --- a/dotnet/test/E2E/MultiClientCommandsElicitationE2ETests.cs +++ b/dotnet/test/E2E/MultiClientCommandsElicitationE2ETests.cs @@ -2,7 +2,6 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ -using System.Reflection; using GitHub.Copilot.SDK.Test.Harness; using Xunit; using Xunit.Abstractions; @@ -26,16 +25,11 @@ public async Task InitializeAsync() Client1 = Ctx.CreateClient(useStdio: false, options: new CopilotClientOptions { TcpConnectionToken = SharedToken, - }); + }, persistent: true); } public async Task DisposeAsync() { - if (Client1 is not null) - { - await Client1.ForceStopAsync(); - } - await Ctx.DisposeAsync(); } } @@ -56,20 +50,12 @@ public MultiClientCommandsElicitationE2ETests( ITestOutputHelper output) { _fixture = fixture; - _testName = GetTestName(output); - } - - private static string GetTestName(ITestOutputHelper output) - { - var type = output.GetType(); - var testField = type.GetField("test", BindingFlags.Instance | BindingFlags.NonPublic); - var test = (ITest?)testField?.GetValue(output); - return test?.TestCase.TestMethod.Method.Name - ?? throw new InvalidOperationException("Couldn't find test name"); + _testName = E2ETestBase.GetTestName(output); } public async Task InitializeAsync() { + await Ctx.CleanupAfterTestAsync(); await Ctx.ConfigureForTestAsync("multi_client", _testName); // Trigger connection so we can read the port @@ -82,7 +68,7 @@ public async Task InitializeAsync() var port = Client1.ActualPort ?? throw new InvalidOperationException("Client1 is not using TCP mode; ActualPort is null"); - _client2 = new CopilotClient(new CopilotClientOptions + _client2 = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = $"localhost:{port}", TcpConnectionToken = MultiClientCommandsElicitationFixture.SharedToken, @@ -91,16 +77,23 @@ public async Task InitializeAsync() public async Task DisposeAsync() { - if (_client3 is not null) + try { - await _client3.ForceStopAsync(); - _client3 = null; - } + if (_client3 is not null) + { + await _client3.ForceStopAsync(); + } - if (_client2 is not null) + if (_client2 is not null) + { + await _client2.ForceStopAsync(); + } + } + finally { - await _client2.ForceStopAsync(); + _client3 = null; _client2 = null; + await Ctx.CleanupAfterTestAsync(); } } @@ -224,7 +217,7 @@ public async Task Capabilities_Changed_Fires_When_Elicitation_Provider_Disconnec // Use a dedicated client (client3) so we can stop it without affecting client2 var port = Client1.ActualPort ?? throw new InvalidOperationException("Client1 ActualPort is null"); - _client3 = new CopilotClient(new CopilotClientOptions + _client3 = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = $"localhost:{port}", TcpConnectionToken = MultiClientCommandsElicitationFixture.SharedToken, diff --git a/dotnet/test/E2E/MultiClientE2ETests.cs b/dotnet/test/E2E/MultiClientE2ETests.cs index e5d7c4b69..88c6f5cf8 100644 --- a/dotnet/test/E2E/MultiClientE2ETests.cs +++ b/dotnet/test/E2E/MultiClientE2ETests.cs @@ -4,7 +4,6 @@ using System.Collections.Concurrent; using System.ComponentModel; -using System.Reflection; using System.Text.RegularExpressions; using GitHub.Copilot.SDK.Test.Harness; using Microsoft.Extensions.AI; @@ -29,16 +28,11 @@ public async Task InitializeAsync() Client1 = Ctx.CreateClient(useStdio: false, options: new CopilotClientOptions { TcpConnectionToken = SharedToken, - }); + }, persistent: true); } public async Task DisposeAsync() { - if (Client1 is not null) - { - await Client1.ForceStopAsync(); - } - await Ctx.DisposeAsync(); } } @@ -55,19 +49,12 @@ public class MultiClientE2ETests : IClassFixture, IAsync public MultiClientE2ETests(MultiClientTestFixture fixture, ITestOutputHelper output) { _fixture = fixture; - _testName = GetTestName(output); - } - - private static string GetTestName(ITestOutputHelper output) - { - var type = output.GetType(); - var testField = type.GetField("test", BindingFlags.Instance | BindingFlags.NonPublic); - var test = (ITest?)testField?.GetValue(output); - return test?.TestCase.TestMethod.Method.Name ?? throw new InvalidOperationException("Couldn't find test name"); + _testName = E2ETestBase.GetTestName(output); } public async Task InitializeAsync() { + await Ctx.CleanupAfterTestAsync(); await Ctx.ConfigureForTestAsync("multi_client", _testName); // Trigger connection so we can read the port @@ -80,7 +67,7 @@ public async Task InitializeAsync() var port = Client1.ActualPort ?? throw new InvalidOperationException("Client1 is not using TCP mode; ActualPort is null"); - _client2 = new CopilotClient(new CopilotClientOptions + _client2 = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = $"localhost:{port}", TcpConnectionToken = MultiClientTestFixture.SharedToken, @@ -89,10 +76,17 @@ public async Task InitializeAsync() public async Task DisposeAsync() { - if (_client2 is not null) + try + { + if (_client2 is not null) + { + await _client2.ForceStopAsync(); + } + } + finally { - await _client2.ForceStopAsync(); _client2 = null; + await Ctx.CleanupAfterTestAsync(); } } @@ -339,7 +333,7 @@ public async Task Disconnecting_Client_Removes_Its_Tools() // Recreate client2 for cleanup var port = Client1.ActualPort!.Value; - _client2 = new CopilotClient(new CopilotClientOptions + _client2 = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = $"localhost:{port}", TcpConnectionToken = MultiClientTestFixture.SharedToken, diff --git a/dotnet/test/E2E/MultiTurnE2ETests.cs b/dotnet/test/E2E/MultiTurnE2ETests.cs index 6469e1b60..0950a1bfd 100644 --- a/dotnet/test/E2E/MultiTurnE2ETests.cs +++ b/dotnet/test/E2E/MultiTurnE2ETests.cs @@ -22,12 +22,22 @@ public async Task Should_Use_Tool_Results_From_Previous_Turns() // Write a file, then ask the model to read it and reason about its content await File.WriteAllTextAsync(Path.Join(Ctx.WorkDir, "secret.txt"), "The magic number is 42."); var session = await CreateSessionAsync(); + var events = new List(); + var eventsLock = new object(); + using var subscription = session.On(evt => + { + lock (eventsLock) + { + events.Add(evt); + } + }); var msg1 = await session.SendAndWaitAsync(new MessageOptions { Prompt = "Read the file 'secret.txt' and tell me what the magic number is.", }); Assert.Contains("42", msg1?.Data.Content ?? string.Empty); + AssertToolTurnOrdering(SnapshotAndClearEvents(events, eventsLock), "file read turn"); // Follow-up that requires context from the previous turn var msg2 = await session.SendAndWaitAsync(new MessageOptions @@ -41,12 +51,23 @@ public async Task Should_Use_Tool_Results_From_Previous_Turns() public async Task Should_Handle_File_Creation_Then_Reading_Across_Turns() { var session = await CreateSessionAsync(); + var events = new List(); + var eventsLock = new object(); + using var subscription = session.On(evt => + { + lock (eventsLock) + { + events.Add(evt); + } + }); // First turn: create a file await session.SendAndWaitAsync(new MessageOptions { Prompt = "Create a file called 'greeting.txt' with the content 'Hello from multi-turn test'.", }); + Assert.Equal("Hello from multi-turn test", await File.ReadAllTextAsync(Path.Join(Ctx.WorkDir, "greeting.txt"))); + AssertToolTurnOrdering(SnapshotAndClearEvents(events, eventsLock), "file creation turn"); // Second turn: read the file var msg = await session.SendAndWaitAsync(new MessageOptions @@ -54,5 +75,68 @@ await session.SendAndWaitAsync(new MessageOptions Prompt = "Read the file 'greeting.txt' and tell me its exact contents.", }); Assert.Contains("Hello from multi-turn test", msg?.Data.Content ?? string.Empty); + AssertToolTurnOrdering(SnapshotAndClearEvents(events, eventsLock), "file read turn"); + } + + private static List SnapshotAndClearEvents(List events, object eventsLock) + { + lock (eventsLock) + { + var snapshot = events.ToList(); + events.Clear(); + return snapshot; + } + } + + private static void AssertToolTurnOrdering(IReadOnlyList events, string turnDescription) + { + var observedTypes = string.Join(", ", events.Select(e => e.Type)); + var userMessage = IndexOf(events); + var toolStarts = events + .Select((evt, index) => (evt, index)) + .Where(item => item.evt is ToolExecutionStartEvent) + .Select(item => (Event: (ToolExecutionStartEvent)item.evt, item.index)) + .ToList(); + var toolCompletes = events + .Select((evt, index) => (evt, index)) + .Where(item => item.evt is ToolExecutionCompleteEvent) + .Select(item => (Event: (ToolExecutionCompleteEvent)item.evt, item.index)) + .ToList(); + + Assert.True(userMessage >= 0, $"Expected user.message in {turnDescription}. Observed: {observedTypes}"); + Assert.NotEmpty(toolStarts); + Assert.NotEmpty(toolCompletes); + + var firstToolStartIndex = toolStarts.Min(item => item.index); + Assert.True(userMessage < firstToolStartIndex, $"Expected user.message before first tool start in {turnDescription}. Observed: {observedTypes}"); + + foreach (var (complete, completeIndex) in toolCompletes) + { + var matchingStart = toolStarts.LastOrDefault(start => + start.Event.Data.ToolCallId == complete.Data.ToolCallId && start.index < completeIndex); + Assert.NotNull(matchingStart.Event); + } + + var lastToolCompleteIndex = toolCompletes.Max(item => item.index); + var assistantAfterTools = IndexOf(events, lastToolCompleteIndex + 1); + var sessionIdle = IndexOf(events, Math.Max(assistantAfterTools + 1, 0)); + + Assert.True(assistantAfterTools >= 0, $"Expected assistant.message after tool completion in {turnDescription}. Observed: {observedTypes}"); + Assert.True(sessionIdle >= 0, $"Expected session.idle after assistant.message in {turnDescription}. Observed: {observedTypes}"); + Assert.True(lastToolCompleteIndex < assistantAfterTools, $"Expected final tool completion before final assistant message in {turnDescription}. Observed: {observedTypes}"); + Assert.True(assistantAfterTools < sessionIdle, $"Expected final assistant message before idle in {turnDescription}. Observed: {observedTypes}"); + } + + private static int IndexOf(IReadOnlyList events, int startIndex = 0) + { + for (var i = Math.Max(startIndex, 0); i < events.Count; i++) + { + if (events[i] is T) + { + return i; + } + } + + return -1; } } diff --git a/dotnet/test/E2E/PendingWorkResumeE2ETests.cs b/dotnet/test/E2E/PendingWorkResumeE2ETests.cs index fa654e7a0..445b43c92 100644 --- a/dotnet/test/E2E/PendingWorkResumeE2ETests.cs +++ b/dotnet/test/E2E/PendingWorkResumeE2ETests.cs @@ -164,6 +164,77 @@ async Task BlockingExternalTool([Description("Value to look up")] string } } + [Fact] + public async Task Should_Keep_Pending_External_Tool_Handleable_On_Warm_Resume_When_ContinuePendingWork_Is_False() + { + var originalToolStarted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var releaseOriginalTool = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var invocationCount = 0; + + await using var server = Ctx.CreateClient(useStdio: false, options: new CopilotClientOptions { TcpConnectionToken = SharedToken }); + await server.StartAsync(); + var cliUrl = GetCliUrl(server); + + using var suspendedClient = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = cliUrl, TcpConnectionToken = SharedToken }); + var session1 = await suspendedClient.CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(BlockingExternalTool, "resume_external_tool")], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + var sessionId = session1.SessionId; + + try + { + var toolRequested = WaitForExternalToolRequestAsync(session1, "resume_external_tool"); + + await session1.SendAsync(new MessageOptions + { + Prompt = "Use resume_external_tool with value 'beta', then reply with the result.", + }); + + var toolEvent = await toolRequested; + Assert.Equal("beta", await originalToolStarted.Task.WaitAsync(PendingWorkTimeout)); + + await suspendedClient.ForceStopAsync(); + + await using var resumedClient = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = cliUrl, TcpConnectionToken = SharedToken }); + var session2 = await resumedClient.ResumeSessionAsync(sessionId, new ResumeSessionConfig + { + ContinuePendingWork = false, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var resumeEvent = await GetSingleResumeEventAsync(session2); + Assert.Equal(false, resumeEvent.Data.ContinuePendingWork); + Assert.Equal(true, resumeEvent.Data.SessionWasActive); + + var resumedResult = await session2.Rpc.Tools.HandlePendingToolCallAsync( + toolEvent.Data.RequestId, + result: "EXTERNAL_RESUMED_BETA"); + Assert.True(resumedResult.Success); + + var answer = await TestHelper.GetFinalAssistantMessageAsync(session2, PendingWorkTimeout); + + Assert.Equal(1, invocationCount); + Assert.Contains("EXTERNAL_RESUMED_BETA", answer?.Data.Content ?? string.Empty); + + await session2.DisposeAsync(); + await resumedClient.ForceStopAsync(); + } + finally + { + releaseOriginalTool.TrySetResult("ORIGINAL_SHOULD_NOT_WIN"); + } + + [Description("Looks up a value after resumption")] + async Task BlockingExternalTool([Description("Value to look up")] string value) + { + Interlocked.Increment(ref invocationCount); + originalToolStarted.TrySetResult(value); + return await releaseOriginalTool.Task; + } + } + [Fact] public async Task Should_Continue_Parallel_Pending_External_Tool_Requests_After_Resume() { @@ -224,12 +295,6 @@ await Task.WhenAll( result: "PARALLEL_A_ALPHA"); Assert.True(resultA.Success); - var answer = await TestHelper.GetFinalAssistantMessageAsync(session2, PendingWorkTimeout); - - var content = answer?.Data.Content ?? string.Empty; - Assert.Contains("PARALLEL_A_ALPHA", content); - Assert.Contains("PARALLEL_B_BETA", content); - await session2.DisposeAsync(); await resumedClient.ForceStopAsync(); } @@ -292,6 +357,52 @@ public async Task Should_Resume_Successfully_When_No_Pending_Work_Exists() await resumedSession.DisposeAsync(); } + [Fact] + public async Task Should_Report_ContinuePendingWork_True_In_Resume_Event() + { + await using var server = Ctx.CreateClient(useStdio: false, options: new CopilotClientOptions { TcpConnectionToken = SharedToken }); + await server.StartAsync(); + var cliUrl = GetCliUrl(server); + + string sessionId; + await using (var firstClient = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = cliUrl, TcpConnectionToken = SharedToken })) + { + var firstSession = await firstClient.CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + sessionId = firstSession.SessionId; + + var firstAnswer = await firstSession.SendAndWaitAsync(new MessageOptions + { + Prompt = "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_ONE", + }); + Assert.Contains("CONTINUE_PENDING_WORK_TRUE_TURN_ONE", firstAnswer?.Data.Content ?? string.Empty); + + await firstSession.DisposeAsync(); + } + + await using var resumedClient = Ctx.CreateClient(options: new CopilotClientOptions { CliUrl = cliUrl, TcpConnectionToken = SharedToken }); + var resumedSession = await resumedClient.ResumeSessionAsync(sessionId, new ResumeSessionConfig + { + ContinuePendingWork = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var resumeEvent = await GetSingleResumeEventAsync(resumedSession); + Assert.Equal(true, resumeEvent.Data.ContinuePendingWork); + Assert.Equal((bool?)false, resumeEvent.Data.SessionWasActive); + + var followUp = await resumedSession.SendAndWaitAsync(new MessageOptions + { + Prompt = "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_TWO", + }); + + Assert.Contains("CONTINUE_PENDING_WORK_TRUE_TURN_TWO", followUp?.Data.Content ?? string.Empty); + + await resumedSession.DisposeAsync(); + } + private static async Task WaitForExternalToolRequestAsync( CopilotSession session, string toolName) @@ -338,4 +449,10 @@ private static string GetCliUrl(CopilotClient client) ?? throw new InvalidOperationException("Expected the test server to be listening on a TCP port."); return $"localhost:{port}"; } + + private static async Task GetSingleResumeEventAsync(CopilotSession session) + { + var messages = await session.GetMessagesAsync(); + return Assert.Single(messages.OfType()); + } } diff --git a/dotnet/test/E2E/PermissionE2ETests.cs b/dotnet/test/E2E/PermissionE2ETests.cs index 25e93c323..d4be653de 100644 --- a/dotnet/test/E2E/PermissionE2ETests.cs +++ b/dotnet/test/E2E/PermissionE2ETests.cs @@ -3,26 +3,46 @@ *--------------------------------------------------------------------------------------------*/ using GitHub.Copilot.SDK.Test.Harness; +using Microsoft.Extensions.AI; +using System.Text.Json; +using System.Text.Json.Serialization; using Xunit; using Xunit.Abstractions; namespace GitHub.Copilot.SDK.Test.E2E; -public class PermissionE2ETests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "permissions", output) +public partial class PermissionE2ETests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "permissions", output) { + [JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] + [JsonSerializable(typeof(ToolResultAIContent))] + [JsonSerializable(typeof(ToolResultObject))] + private partial class PermissionJsonContext : JsonSerializerContext; + [Fact] public async Task Should_Invoke_Permission_Handler_For_Write_Operations() { var permissionRequests = new List(); - var permissionRequestReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var permissionRequestsLock = new object(); + var readPermissionRequestReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var writePermissionRequestReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); CopilotSession? session = null; session = await CreateSessionAsync(new SessionConfig { OnPermissionRequest = (request, invocation) => { - permissionRequests.Add(request); + lock (permissionRequestsLock) + { + permissionRequests.Add(request); + } Assert.Equal(session!.SessionId, invocation.SessionId); - permissionRequestReceived.TrySetResult(request); + if (request is PermissionRequestRead readRequest) + { + readPermissionRequestReceived.TrySetResult(readRequest); + } + else if (request is PermissionRequestWrite writeRequest) + { + writePermissionRequestReceived.TrySetResult(writeRequest); + } return Task.FromResult(new PermissionRequestResult { Kind = PermissionRequestResultKind.Approved }); } }); @@ -34,11 +54,29 @@ await session.SendAsync(new MessageOptions Prompt = "Edit test.txt and replace 'original' with 'modified'" }); - await permissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30)); - await session.AbortAsync(); + var readRequest = await readPermissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30)); + var writeRequest = await writePermissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30)); + await TestHelper.GetFinalAssistantMessageAsync(session); - // Should have received at least one permission request - Assert.NotEmpty(permissionRequests); + List observedPermissionRequests; + lock (permissionRequestsLock) + { + observedPermissionRequests = [.. permissionRequests]; + } + + Assert.NotEmpty(observedPermissionRequests); + Assert.EndsWith("test.txt", readRequest.Path, StringComparison.Ordinal); + Assert.Contains("test.txt", readRequest.Intention, StringComparison.OrdinalIgnoreCase); + Assert.False(string.IsNullOrWhiteSpace(readRequest.ToolCallId)); + + Assert.Contains(observedPermissionRequests, request => request is PermissionRequestWrite); + Assert.EndsWith("test.txt", writeRequest.FileName, StringComparison.Ordinal); + Assert.Contains("original content", writeRequest.Diff, StringComparison.Ordinal); + Assert.Contains("modified content", writeRequest.Diff, StringComparison.Ordinal); + Assert.False(string.IsNullOrWhiteSpace(writeRequest.ToolCallId)); + + var updatedContent = await File.ReadAllTextAsync(Path.Join(Ctx.WorkDir, "test.txt")); + Assert.Equal("modified content", updatedContent); } [Fact] @@ -247,4 +285,271 @@ await session.SendAsync(new MessageOptions Assert.True(receivedToolCallId, "Should have received toolCallId in permission request"); } + + [Fact] + public async Task Should_Wait_For_Slow_Permission_Handler() + { + var handlerEntered = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var releaseHandler = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var targetToolCallId = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var lifecycle = new List<(string Phase, string? ToolCallId)>(); + var lifecycleLock = new object(); + + void AddLifecycleEvent(string phase, string? toolCallId) + { + lock (lifecycleLock) + { + lifecycle.Add((phase, toolCallId)); + } + } + + var session = await CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = async (request, invocation) => + { + var shellRequest = Assert.IsType(request); + Assert.False(string.IsNullOrWhiteSpace(shellRequest.ToolCallId)); + + AddLifecycleEvent("permission-start", shellRequest.ToolCallId); + targetToolCallId.TrySetResult(shellRequest.ToolCallId!); + handlerEntered.TrySetResult(); + await releaseHandler.Task.WaitAsync(TimeSpan.FromSeconds(30)); + AddLifecycleEvent("permission-complete", shellRequest.ToolCallId); + return new PermissionRequestResult { Kind = PermissionRequestResultKind.Approved }; + } + }); + + using var subscription = session.On(evt => + { + switch (evt) + { + case ToolExecutionStartEvent started: + AddLifecycleEvent("tool-start", started.Data.ToolCallId); + break; + case ToolExecutionCompleteEvent completed: + AddLifecycleEvent("tool-complete", completed.Data.ToolCallId); + break; + } + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Run 'echo slow_handler_test'" + }); + + await handlerEntered.Task.WaitAsync(TimeSpan.FromSeconds(30)); + var targetToolId = await targetToolCallId.Task.WaitAsync(TimeSpan.FromSeconds(30)); + lock (lifecycleLock) + { + Assert.DoesNotContain(lifecycle, evt => evt.Phase == "tool-complete" && evt.ToolCallId == targetToolId); + } + + releaseHandler.SetResult(); + + var message = await TestHelper.GetFinalAssistantMessageAsync(session); + + List<(string Phase, string? ToolCallId)> orderedLifecycle; + lock (lifecycleLock) + { + orderedLifecycle = [.. lifecycle]; + } + + var permissionStartIndex = orderedLifecycle.FindIndex(evt => evt.Phase == "permission-start" && evt.ToolCallId == targetToolId); + var permissionCompleteIndex = orderedLifecycle.FindIndex(evt => evt.Phase == "permission-complete" && evt.ToolCallId == targetToolId); + var toolStartIndex = orderedLifecycle.FindIndex(evt => evt.Phase == "tool-start" && evt.ToolCallId == targetToolId); + var toolCompleteIndex = orderedLifecycle.FindIndex(evt => evt.Phase == "tool-complete" && evt.ToolCallId == targetToolId); + var observedLifecycle = string.Join(", ", orderedLifecycle.Select(evt => $"{evt.Phase}:{evt.ToolCallId}")); + + Assert.InRange(permissionStartIndex, 0, orderedLifecycle.Count - 1); + Assert.InRange(permissionCompleteIndex, 0, orderedLifecycle.Count - 1); + Assert.InRange(toolStartIndex, 0, orderedLifecycle.Count - 1); + Assert.InRange(toolCompleteIndex, 0, orderedLifecycle.Count - 1); + Assert.True( + permissionCompleteIndex < toolCompleteIndex, + $"Expected permission completion before target tool completion. Observed: {observedLifecycle}"); + Assert.True( + toolStartIndex < toolCompleteIndex, + $"Expected target tool start before target tool completion. Observed: {observedLifecycle}"); + + // The tool should have actually run after permission was granted + Assert.Contains("slow_handler_test", message?.Data.Content ?? string.Empty); + } + + [Fact] + public async Task Should_Handle_Concurrent_Permission_Requests_From_Parallel_Tools() + { + var permissionRequestCount = 0; + var permissionRequests = new List(); + var permissionRequestsLock = new object(); + var bothPermissionRequestsStarted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var firstToolCompleted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var secondToolCompleted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var firstToolCalled = false; + var secondToolCalled = false; + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = + [ + AIFunctionFactory.Create( + FirstPermissionTool, + "first_permission_tool", + "First concurrent permission test tool", + serializerOptions: PermissionJsonContext.Default.Options), + AIFunctionFactory.Create( + SecondPermissionTool, + "second_permission_tool", + "Second concurrent permission test tool", + serializerOptions: PermissionJsonContext.Default.Options), + ], + AvailableTools = ["first_permission_tool", "second_permission_tool"], + OnPermissionRequest = async (request, invocation) => + { + var count = Interlocked.Increment(ref permissionRequestCount); + lock (permissionRequestsLock) { permissionRequests.Add(request); } + if (count >= 2) + { + bothPermissionRequestsStarted.TrySetResult(); + } + + await bothPermissionRequestsStarted.Task.WaitAsync(TimeSpan.FromSeconds(30)); + return new PermissionRequestResult { Kind = PermissionRequestResultKind.Approved }; + } + }); + + session.On(evt => + { + if (evt is ToolExecutionCompleteEvent toolEvt) + { + var errorMessage = toolEvt.Data.Error?.Message ?? string.Empty; + if (errorMessage.Contains("first_permission_tool completed", StringComparison.Ordinal)) + { + firstToolCompleted.TrySetResult(toolEvt); + } + else if (errorMessage.Contains("second_permission_tool completed", StringComparison.Ordinal)) + { + secondToolCompleted.TrySetResult(toolEvt); + } + } + }); + var idle = TestHelper.GetNextEventOfTypeAsync(session); + + await session.SendAsync(new MessageOptions + { + Prompt = "Call both first_permission_tool and second_permission_tool in the same turn. Do not call any other tools." + }); + + await bothPermissionRequestsStarted.Task.WaitAsync(TimeSpan.FromSeconds(30)); + var completed = await Task.WhenAll(firstToolCompleted.Task, secondToolCompleted.Task).WaitAsync(TimeSpan.FromSeconds(60)); + await idle; + + // Should have received multiple permission requests (one per tool call) + Assert.Equal(2, permissionRequestCount); + + List requests; + lock (permissionRequestsLock) { requests = [.. permissionRequests]; } + Assert.Contains(requests, request => request is PermissionRequestCustomTool custom && custom.ToolName == "first_permission_tool"); + Assert.Contains(requests, request => request is PermissionRequestCustomTool custom && custom.ToolName == "second_permission_tool"); + + Assert.True(firstToolCalled); + Assert.True(secondToolCalled); + Assert.All(completed, toolEvt => + { + Assert.False(toolEvt.Data.Success); + Assert.Equal("rejected", toolEvt.Data.Error?.Code); + }); + + ToolResultAIContent FirstPermissionTool() + { + firstToolCalled = true; + return new(new ToolResultObject + { + ResultType = "rejected", + TextResultForLlm = "first_permission_tool completed after permission approval", + }); + } + + ToolResultAIContent SecondPermissionTool() + { + secondToolCalled = true; + return new(new ToolResultObject + { + ResultType = "rejected", + TextResultForLlm = "second_permission_tool completed after permission approval", + }); + } + } + + [Fact] + public async Task Should_Deny_Permission_With_NoResult_Kind() + { + var permissionCalled = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + var session = await CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = (_, _) => + { + permissionCalled.TrySetResult(true); + return Task.FromResult(new PermissionRequestResult { Kind = PermissionRequestResultKind.NoResult }); + } + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Run 'node --version'" + }); + + Assert.True( + await permissionCalled.Task.WaitAsync(TimeSpan.FromSeconds(30)), + "Expected the no-result permission handler to be called."); + + await session.AbortAsync(); + } + + [Fact] + public async Task Should_Short_Circuit_Permission_Handler_When_Set_Approve_All_Enabled() + { + var handlerCallCount = 0; + + var session = await CreateSessionAsync(new SessionConfig + { + OnPermissionRequest = (_, _) => + { + Interlocked.Increment(ref handlerCallCount); + return Task.FromResult(new PermissionRequestResult { Kind = PermissionRequestResultKind.Approved }); + }, + }); + + // Runtime contract: when approveAllToolPermissionRequests is true the runtime + // short-circuits the permission flow with { kind: "approved" } *before* + // invoking the SDK-supplied handler. This RPC sets that runtime flag. + var setResult = await session.Rpc.Permissions.SetApproveAllAsync(true); + Assert.True(setResult.Success); + + try + { + var toolCompleted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + using var subscription = session.On(evt => + { + if (evt is ToolExecutionCompleteEvent done && done.Data.Success) + { + toolCompleted.TrySetResult(done); + } + }); + + await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Run 'echo test' and tell me what happens", + }); + + // A real shell tool must have completed successfully under the runtime-level approval. + await toolCompleted.Task.WaitAsync(TimeSpan.FromSeconds(30)); + + Assert.Equal(0, Volatile.Read(ref handlerCallCount)); + } + finally + { + await session.Rpc.Permissions.SetApproveAllAsync(false); + } + } } diff --git a/dotnet/test/E2E/RpcAdditionalEdgeCasesE2ETests.cs b/dotnet/test/E2E/RpcAdditionalEdgeCasesE2ETests.cs new file mode 100644 index 000000000..d71fa20d8 --- /dev/null +++ b/dotnet/test/E2E/RpcAdditionalEdgeCasesE2ETests.cs @@ -0,0 +1,232 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test.E2E; + +/// +/// Targeted gap-filler tests for assorted RPC surface area where the previous suite covered +/// the happy path but missed boundary semantics: idempotent state transitions, empty-content +/// IO, no-op operations, and unicode round-trips. None of these tests depend on LLM replay. +/// +public class RpcAdditionalEdgeCasesE2ETests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "rpc_additional_edge_cases", output) +{ + [Fact] + public async Task Shell_Exec_With_Zero_Timeout_Does_Not_Kill_Long_Running_Command() + { + // The runtime treats timeout > 0 as "schedule SIGTERM at deadline" (shellApi.ts). + // timeout = 0 must mean "no timer at all" — the command should be allowed to + // keep running long enough to write a marker, after which we kill it explicitly. + var session = await CreateSessionAsync(); + var markerPath = Path.Join(Ctx.WorkDir, $"shell-zero-timeout-{Guid.NewGuid():N}.txt"); + var command = OperatingSystem.IsWindows() + ? $"powershell -NoLogo -NoProfile -Command \"Start-Sleep -Milliseconds 500; Set-Content -LiteralPath '{markerPath}' -Value 'alive'; Start-Sleep -Seconds 60\"" + : $"sh -c \"sleep 0.5; printf alive > '{markerPath}'; sleep 60\""; + + var execResult = await session.Rpc.Shell.ExecAsync(command, cwd: Path.GetTempPath(), timeout: TimeSpan.Zero); + Assert.False(string.IsNullOrWhiteSpace(execResult.ProcessId)); + + await TestHelper.WaitForConditionAsync( + () => File.Exists(markerPath), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: $"Timed out waiting for zero-timeout shell command to write marker to '{markerPath}'."); + + var killResult = await session.Rpc.Shell.KillAsync(execResult.ProcessId); + Assert.True(killResult.Killed); + } + + [Fact] + public async Task Workspaces_CreateFile_With_Empty_Content_Round_Trips() + { + var session = await CreateSessionAsync(); + var path = $"empty-{Guid.NewGuid():N}.txt"; + + await session.Rpc.Workspaces.CreateFileAsync(path, string.Empty); + + var read = await session.Rpc.Workspaces.ReadFileAsync(path); + Assert.Equal(string.Empty, read.Content); + + var listed = await session.Rpc.Workspaces.ListFilesAsync(); + Assert.Contains(path, listed.Files); + } + + [Fact] + public async Task Workspaces_CreateFile_With_Unicode_Content_Round_Trips() + { + var session = await CreateSessionAsync(); + var path = $"unicode-{Guid.NewGuid():N}.txt"; + // Mix of BMP, supplementary plane (emoji), CJK, Cyrillic, and a NUL byte to stress the + // string-only persistence path (workspace files are persisted as UTF-8 strings). + var payload = "Hello, 世界! 🚀✨ Привет\u0000end"; + + await session.Rpc.Workspaces.CreateFileAsync(path, payload); + + var read = await session.Rpc.Workspaces.ReadFileAsync(path); + Assert.Equal(payload, read.Content); + } + + [Fact] + public async Task Workspaces_CreateFile_With_Large_Content_Round_Trips() + { + var session = await CreateSessionAsync(); + var path = $"large-{Guid.NewGuid():N}.txt"; + + // 256KB of varied content stresses both the runtime's UTF-8 encoding path and the + // JSON-RPC line-buffer path; small enough not to risk RPC size limits. + var payload = string.Create(256 * 1024, (object?)null, static (span, _) => + { + for (int i = 0; i < span.Length; i++) + { + span[i] = (char)('a' + (i % 26)); + } + }); + + await session.Rpc.Workspaces.CreateFileAsync(path, payload); + + var read = await session.Rpc.Workspaces.ReadFileAsync(path); + Assert.Equal(payload.Length, read.Content.Length); + Assert.Equal(payload, read.Content); + } + + [Fact] + public async Task Plan_Update_With_Empty_Content_Then_Read_Returns_Empty() + { + var session = await CreateSessionAsync(); + + await session.Rpc.Plan.UpdateAsync(string.Empty); + + var read = await session.Rpc.Plan.ReadAsync(); + Assert.Equal(string.Empty, read.Content); + } + + [Fact] + public async Task Plan_Delete_When_None_Exists_Is_Idempotent() + { + var session = await CreateSessionAsync(); + + // No prior plan — delete should succeed (no-op) and a subsequent read should still + // return null/empty content rather than throwing. + await session.Rpc.Plan.DeleteAsync(); + await session.Rpc.Plan.DeleteAsync(); + + var read = await session.Rpc.Plan.ReadAsync(); + Assert.True(string.IsNullOrEmpty(read.Content)); + } + + [Fact] + public async Task Mode_Set_To_Same_Value_Multiple_Times_Stays_Stable() + { + var session = await CreateSessionAsync(); + + await session.Rpc.Mode.SetAsync(SessionMode.Plan); + await session.Rpc.Mode.SetAsync(SessionMode.Plan); + await session.Rpc.Mode.SetAsync(SessionMode.Plan); + + Assert.Equal(SessionMode.Plan, await session.Rpc.Mode.GetAsync()); + } + + [Fact] + public async Task Name_Set_With_Unicode_Round_Trips() + { + var session = await CreateSessionAsync(); + const string name = "セッション 名前 ☕ – test"; + + await session.Rpc.Name.SetAsync(name); + + var read = await session.Rpc.Name.GetAsync(); + Assert.Equal(name, read.Name); + } + + [Fact] + public async Task Usage_GetMetrics_On_Fresh_Session_Returns_Zero_Tokens() + { + var session = await CreateSessionAsync(); + + var metrics = await session.Rpc.Usage.GetMetricsAsync(); + + // Fresh session = no LLM calls yet. Last-call counters and the user-request count + // must be zero, and SessionStartTime must be a positive epoch (set at create-time). + Assert.Equal(0, metrics.LastCallInputTokens); + Assert.Equal(0, metrics.LastCallOutputTokens); + Assert.Equal(0, metrics.TotalUserRequests); + Assert.True(metrics.SessionStartTime > 0, "SessionStartTime should be a positive epoch."); + } + + [Fact] + public async Task Permissions_ResetSessionApprovals_On_Fresh_Session_Is_Noop() + { + var session = await CreateSessionAsync(); + + // No prior approvals to reset; should succeed without throwing. + var result = await session.Rpc.Permissions.ResetSessionApprovalsAsync(); + + Assert.True(result.Success); + } + + [Fact] + public async Task Permissions_SetApproveAll_Toggle_Round_Trips() + { + var session = await CreateSessionAsync(); + + var first = await session.Rpc.Permissions.SetApproveAllAsync(true); + Assert.True(first.Success); + + var second = await session.Rpc.Permissions.SetApproveAllAsync(true); + Assert.True(second.Success); + + var third = await session.Rpc.Permissions.SetApproveAllAsync(false); + Assert.True(third.Success); + + var fourth = await session.Rpc.Permissions.SetApproveAllAsync(false); + Assert.True(fourth.Success); + } + + [Fact] + public async Task Workspaces_CreateFile_Then_ListFiles_Returns_Sorted_Or_Stable_Order() + { + var session = await CreateSessionAsync(); + var prefix = $"order-{Guid.NewGuid():N}-"; + + var paths = Enumerable.Range(0, 5).Select(i => $"{prefix}{i:D2}.txt").ToList(); + foreach (var p in paths) + { + await session.Rpc.Workspaces.CreateFileAsync(p, $"content-{p}"); + } + + var listed = await session.Rpc.Workspaces.ListFilesAsync(); + var matchingFiles = listed.Files + .Where(path => path.StartsWith(prefix, StringComparison.Ordinal)) + .ToList(); + + // The files this test created should be returned in sorted order. + Assert.Equal(paths, matchingFiles); + + // Calling list again immediately must preserve the same order. + var listed2 = await session.Rpc.Workspaces.ListFilesAsync(); + var matchingFiles2 = listed2.Files + .Where(path => path.StartsWith(prefix, StringComparison.Ordinal)) + .ToList(); + Assert.Equal(matchingFiles, matchingFiles2); + } + + [Fact] + public async Task Workspaces_GetWorkspace_Returns_Stable_Result_Across_Calls() + { + var session = await CreateSessionAsync(); + + var first = await session.Rpc.Workspaces.GetWorkspaceAsync(); + var second = await session.Rpc.Workspaces.GetWorkspaceAsync(); + + // GetWorkspace is a pure getter. The two calls must return semantically equal results. + // Even if the underlying implementation returns a fresh object each time, the JSON + // shape should round-trip identically. + Assert.Equal(first.Workspace?.Cwd, second.Workspace?.Cwd); + Assert.Equal(first.Workspace?.Id, second.Workspace?.Id); + } +} diff --git a/dotnet/test/E2E/RpcAgentE2ETests.cs b/dotnet/test/E2E/RpcAgentE2ETests.cs index 6accdb5c2..875497c76 100644 --- a/dotnet/test/E2E/RpcAgentE2ETests.cs +++ b/dotnet/test/E2E/RpcAgentE2ETests.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using GitHub.Copilot.SDK.Test.Harness; using Xunit; using Xunit.Abstractions; @@ -49,6 +50,35 @@ public async Task Should_Select_And_Get_Current_Agent() Assert.Equal("test-agent", currentResult.Agent.Name); } + [Fact] + public async Task Should_Emit_Subagent_Selected_And_Deselected_Events() + { + var session = await CreateSessionAsync(new SessionConfig { CustomAgents = [CreateCustomAgents()[0]] }); + + var selectedEventTask = TestHelper.GetNextEventOfTypeAsync( + session, + static _ => true, + timeout: TimeSpan.FromSeconds(30), + timeoutDescription: "subagent.selected event"); + var selectResult = await session.Rpc.Agent.SelectAsync("test-agent"); + var selectedEvent = await selectedEventTask; + + Assert.NotNull(selectResult.Agent); + Assert.Equal("test-agent", selectedEvent.Data.AgentName); + Assert.Equal("Test Agent", selectedEvent.Data.AgentDisplayName); + + var deselectedEventTask = TestHelper.GetNextEventOfTypeAsync( + session, + static _ => true, + timeout: TimeSpan.FromSeconds(30), + timeoutDescription: "subagent.deselected event"); + await session.Rpc.Agent.DeselectAsync(); + await deselectedEventTask; + + var currentResult = await session.Rpc.Agent.GetCurrentAsync(); + Assert.Null(currentResult.Agent); + } + [Fact] public async Task Should_Deselect_Current_Agent() { @@ -80,14 +110,14 @@ public async Task Should_Call_Agent_Reload() Assert.Single(before.Agents, agent => string.Equals(agent.Name, "reload-test-agent", StringComparison.Ordinal)); var result = await session.Rpc.Agent.ReloadAsync(); + var current = await session.Rpc.Agent.ListAsync(); Assert.NotNull(result.Agents); - - // Lock in current runtime behavior so a fix becomes a test failure rather than a - // silent regression: the runtime currently drops session-configured CustomAgents - // on reload (it reloads only on-disk agents). Once the runtime preserves session - // CustomAgents across reload, flip this to `Assert.Single(result.Agents, - // a => a.Name == "reload-test-agent")` and update the comment. - Assert.DoesNotContain(result.Agents, a => string.Equals(a.Name, "reload-test-agent", StringComparison.Ordinal)); + Assert.Equal( + result.Agents.Select(agent => agent.Name).OrderBy(name => name, StringComparer.Ordinal), + current.Agents.Select(agent => agent.Name).OrderBy(name => name, StringComparer.Ordinal)); + Assert.Equal( + result.Agents.Select(agent => agent.DisplayName).OrderBy(name => name, StringComparer.Ordinal), + current.Agents.Select(agent => agent.DisplayName).OrderBy(name => name, StringComparer.Ordinal)); } private static List CreateCustomAgents() => diff --git a/dotnet/test/E2E/RpcEventSideEffectsE2ETests.cs b/dotnet/test/E2E/RpcEventSideEffectsE2ETests.cs new file mode 100644 index 000000000..3f0a61d03 --- /dev/null +++ b/dotnet/test/E2E/RpcEventSideEffectsE2ETests.cs @@ -0,0 +1,189 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Test.Harness; +using GitHub.Copilot.SDK.Rpc; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test.E2E; + +/// +/// Verifies that session-scoped RPC calls emit the expected side-effect session events. +/// Most tests are pure RPC-only and need no replay snapshot, but the truncate tests +/// drive a real user.message first so the runtime persists events to disk +/// (LocalSessionManager.SessionWriter only flushes once a user.message is observed). +/// +public class RpcEventSideEffectsE2ETests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "rpc_event_side_effects", output) +{ + private static readonly TimeSpan EventTimeout = TimeSpan.FromSeconds(30); + + [Fact] + public async Task Should_Emit_Mode_Changed_Event_When_Mode_Set() + { + var session = await CreateSessionAsync(); + + // Subscribe before invoking RPC; events may arrive after the RPC completes. + var modeChangedTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => evt.Data.NewMode == "plan" && evt.Data.PreviousMode == "interactive", + EventTimeout, + timeoutDescription: "session.mode_changed event for interactive→plan"); + + await session.Rpc.Mode.SetAsync(SessionMode.Plan); + + var evt = await modeChangedTask; + Assert.Equal("plan", evt.Data.NewMode); + Assert.Equal("interactive", evt.Data.PreviousMode); + } + + [Fact] + public async Task Should_Emit_Plan_Changed_Event_For_Update_And_Delete() + { + var session = await CreateSessionAsync(); + + var createTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => evt.Data.Operation == PlanChangedOperation.Create, + EventTimeout, + timeoutDescription: "session.plan_changed event for plan creation"); + + await session.Rpc.Plan.UpdateAsync("# Test plan\n- item"); + + var createEvent = await createTask; + Assert.Equal(PlanChangedOperation.Create, createEvent.Data.Operation); + + var deleteTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => evt.Data.Operation == PlanChangedOperation.Delete, + EventTimeout, + timeoutDescription: "session.plan_changed event for plan deletion"); + + await session.Rpc.Plan.DeleteAsync(); + + var deleteEvent = await deleteTask; + Assert.Equal(PlanChangedOperation.Delete, deleteEvent.Data.Operation); + } + + [Fact] + public async Task Should_Emit_Plan_Changed_Update_Operation_On_Second_Update() + { + var session = await CreateSessionAsync(); + + // First update creates the plan. + await session.Rpc.Plan.UpdateAsync("# initial"); + + // Second update should emit operation == "update". + var updateTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => evt.Data.Operation == PlanChangedOperation.Update, + EventTimeout, + timeoutDescription: "session.plan_changed event for plan update"); + + await session.Rpc.Plan.UpdateAsync("# updated content"); + + var updateEvent = await updateTask; + Assert.Equal(PlanChangedOperation.Update, updateEvent.Data.Operation); + } + + [Fact] + public async Task Should_Emit_Workspace_File_Changed_Event_When_File_Created() + { + var session = await CreateSessionAsync(); + var path = $"side-effect-{Guid.NewGuid():N}.txt"; + + var changedTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.Path, path, StringComparison.Ordinal), + EventTimeout, + timeoutDescription: $"session.workspace_file_changed for '{path}'"); + + await session.Rpc.Workspaces.CreateFileAsync(path, "hello"); + + var evt = await changedTask; + Assert.Equal(path, evt.Data.Path); + // Operation must be one of the defined enum values; create or update are both runtime-acceptable. + Assert.Contains( + evt.Data.Operation, + new[] { WorkspaceFileChangedOperation.Create, WorkspaceFileChangedOperation.Update }); + } + + [Fact] + public async Task Should_Emit_Title_Changed_Event_When_Name_Set() + { + var session = await CreateSessionAsync(); + var title = $"Renamed-{Guid.NewGuid():N}"; + + // session.title_changed is ephemeral; it never lands in persisted history, + // so we must subscribe before invoking name.set. + var titleChangedTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.Title, title, StringComparison.Ordinal), + EventTimeout, + timeoutDescription: "session.title_changed event after name.set"); + + await session.Rpc.Name.SetAsync(title); + + var evt = await titleChangedTask; + Assert.Equal(title, evt.Data.Title); + } + + [Fact] + public async Task Should_Emit_Snapshot_Rewind_Event_And_Remove_Events_On_Truncate() + { + var session = await CreateSessionAsync(); + + // Send a real user.message; only after one is observed does the runtime + // begin persisting buffered events to disk (LocalSessionManager.SessionWriter + // gates flushing on shouldSaveSession, which flips on the first user.message). + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say SNAPSHOT_REWIND_TARGET exactly." }); + + var messages = await session.GetMessagesAsync(); + var userEvent = messages.OfType().FirstOrDefault() + ?? throw new InvalidOperationException("Expected at least one user.message in persisted history"); + var targetEventId = userEvent.Id.ToString(); + + // session.snapshot_rewind is ephemeral; must subscribe before invoking truncate. + var rewindTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.UpToEventId, targetEventId, StringComparison.OrdinalIgnoreCase), + EventTimeout, + timeoutDescription: "session.snapshot_rewind event after truncate"); + + var truncateResult = await session.Rpc.History.TruncateAsync(targetEventId); + + Assert.True(truncateResult.EventsRemoved >= 1, "Expected truncate to remove at least the targeted event"); + + var rewindEvent = await rewindTask; + Assert.Equal(targetEventId, rewindEvent.Data.UpToEventId, ignoreCase: true); + Assert.Equal(truncateResult.EventsRemoved, (long)rewindEvent.Data.EventsRemoved); + + // Verify the truncated event is no longer in persisted history. + var messagesAfter = await session.GetMessagesAsync(); + Assert.DoesNotContain(messagesAfter, e => e.Id == userEvent.Id); + } + + [Fact] + public async Task Should_Allow_Session_Use_After_Truncate() + { + var session = await CreateSessionAsync(); + + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say SNAPSHOT_REWIND_TARGET exactly." }); + + var messages = await session.GetMessagesAsync(); + var userEvent = messages.OfType().FirstOrDefault() + ?? throw new InvalidOperationException("Expected at least one user.message in persisted history"); + + var truncateResult = await session.Rpc.History.TruncateAsync(userEvent.Id.ToString()); + Assert.True(truncateResult.EventsRemoved >= 1); + + // After truncation the session should still respond to RPC. + var afterMode = await session.Rpc.Mode.GetAsync(); + Assert.True(afterMode == SessionMode.Interactive || afterMode == SessionMode.Plan || afterMode == SessionMode.Autopilot); + + // Workspace surface still works. + _ = await session.Rpc.Workspaces.GetWorkspaceAsync(); + } +} diff --git a/dotnet/test/E2E/RpcExtensionsLoadedE2ETests.cs b/dotnet/test/E2E/RpcExtensionsLoadedE2ETests.cs new file mode 100644 index 000000000..dce6d4f1e --- /dev/null +++ b/dotnet/test/E2E/RpcExtensionsLoadedE2ETests.cs @@ -0,0 +1,352 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; +using System.Diagnostics; +using Xunit; +using Xunit.Abstractions; +using RpcExtension = GitHub.Copilot.SDK.Rpc.Extension; + +namespace GitHub.Copilot.SDK.Test.E2E; + +/// +/// E2E coverage for the loaded-extensions code path in the runtime: when the +/// experimental EXTENSIONS feature flag is enabled and a session is created +/// with EnableConfigDiscovery=true, the runtime discovers user/project +/// extensions from disk, forks each one as a subprocess, and exposes +/// session.Rpc.Extensions.{List,Enable,Disable,Reload}. +/// +/// The "controller absent" path is already covered by +/// RpcMcpAndSkillsE2ETests.Should_Report_Error_When_Extensions_Are_Not_Available; +/// these tests cover the controller-present path. +/// +public class RpcExtensionsLoadedE2ETests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "rpc_extensions_loaded", output) +{ + /// + /// Extension subprocess startup involves Node fork + SDK resolver + JSON-RPC + /// handshake. Empirically this completes in well under a second on Windows, + /// but the runtime's READY_TIMEOUT_MS is 30s, so we use the same upper bound + /// to keep the test bulletproof on cold starts. + /// + private static readonly TimeSpan ExtensionStartupTimeout = TimeSpan.FromSeconds(45); + + /// + /// Builds an environment dict that opts the runtime into the experimental + /// EXTENSIONS feature flag while preserving every other harness-managed + /// var (COPILOT_API_URL, COPILOT_HOME, NODE_V8_COVERAGE, etc). + /// + private Dictionary ExtensionsEnabledEnvironment() + { + var env = new Dictionary(Ctx.GetEnvironment(), StringComparer.OrdinalIgnoreCase) + { + ["COPILOT_CLI_ENABLED_FEATURE_FLAGS"] = "EXTENSIONS", + }; + return env; + } + + /// + /// Writes a minimal user extension into {HomeDir}/extensions/{name}/extension.mjs. + /// The body imports @github/copilot-sdk/extension, calls joinSession + /// to establish the JSON-RPC handshake (so the extension transitions from + /// "starting" → "running" quickly), and then keeps the process alive. + /// Returns the unique extension name. + /// + private string CreateUserExtension(string? prefix = null) + { + var extName = Path.GetFileName($"{prefix ?? "test-ext"}-{Guid.NewGuid():N}"); + var extDir = Path.Join(Ctx.HomeDir, "extensions", extName); + WriteRunningExtension(extDir); + return extName; + } + + private async Task<(string Name, string Id, string WorkingDirectory)> CreateProjectExtensionAsync(string? prefix = null) + { + var extName = Path.GetFileName($"{prefix ?? "project-ext"}-{Guid.NewGuid():N}"); + var projectDirName = Path.GetFileName($"extension-project-{Guid.NewGuid():N}"); + var projectDir = Path.Join(Ctx.WorkDir, projectDirName); + Directory.CreateDirectory(projectDir); + await InitializeGitRepositoryAsync(projectDir); + + var extDir = Path.Join(projectDir, ".github", "extensions", extName); + WriteRunningExtension(extDir); + return (extName, $"project:{extName}", projectDir); + } + + private static void WriteRunningExtension(string extDir) + { + Directory.CreateDirectory(extDir); + + var body = """ + import { joinSession } from "@github/copilot-sdk/extension"; + + // Establish the JSON-RPC handshake so the runtime sees us as ready. + await joinSession({}); + + // Keep the process alive so the runtime doesn't reap us as exited. + // The unref() ensures we still exit when the parent disconnects. + setInterval(() => {}, 60_000).unref?.(); + """; + + File.WriteAllText(Path.Join(extDir, "extension.mjs"), body); + } + + private static async Task InitializeGitRepositoryAsync(string projectDir) + { + using var process = new Process + { + StartInfo = new ProcessStartInfo("git") + { + WorkingDirectory = projectDir, + RedirectStandardOutput = true, + RedirectStandardError = true, + } + }; + process.StartInfo.ArgumentList.Add("init"); + process.StartInfo.ArgumentList.Add("-q"); + + if (!process.Start()) + { + throw new InvalidOperationException("Failed to start git init."); + } + + await process.WaitForExitAsync(); + if (process.ExitCode != 0) + { + var stderr = await process.StandardError.ReadToEndAsync(); + throw new InvalidOperationException($"git init failed with exit code {process.ExitCode}: {stderr}"); + } + } + + /// + /// Polls session.Rpc.Extensions.ListAsync() until the controller + /// becomes available AND the named extension reaches a terminal status + /// (running, failed, or disabled). The controller is set asynchronously + /// after session create returns, and list calls can report an empty list + /// until setup finishes. + /// + private static async Task WaitForExtensionAsync( + CopilotSession session, + string extensionId, + ExtensionStatus expectedStatus, + TimeSpan? timeout = null) + { + RpcExtension? lastSeen = null; + await TestHelper.WaitForConditionAsync( + async () => + { + var list = await session.Rpc.Extensions.ListAsync(); + lastSeen = list.Extensions.FirstOrDefault(e => string.Equals(e.Id, extensionId, StringComparison.Ordinal)); + return lastSeen != null && lastSeen.Status == expectedStatus; + }, + timeout: timeout ?? ExtensionStartupTimeout, + timeoutMessage: $"Extension '{extensionId}' did not reach status '{expectedStatus}' (last seen: {lastSeen?.Status.ToString() ?? ""}).", + transientExceptionFilter: ex => ex.ToString().Contains("Extensions not available", StringComparison.OrdinalIgnoreCase), + pollInterval: TimeSpan.FromMilliseconds(100)); + + return lastSeen!; + } + + [Theory] + [InlineData(ExtensionSource.User)] + [InlineData(ExtensionSource.Project)] + public async Task Discovers_Loads_And_Reports_Running_Extension(ExtensionSource source) + { + string extName; + string extId; + string? workingDirectory; + if (source == ExtensionSource.User) + { + extName = CreateUserExtension(); + extId = $"user:{extName}"; + workingDirectory = null; + } + else if (source == ExtensionSource.Project) + { + (extName, extId, workingDirectory) = await CreateProjectExtensionAsync(); + } + else + { + throw new ArgumentOutOfRangeException(nameof(source), source, null); + } + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + WorkingDirectory = workingDirectory, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var ext = await WaitForExtensionAsync(session, extId, ExtensionStatus.Running); + + Assert.Equal(extId, ext.Id); + Assert.Equal(extName, ext.Name); + Assert.Equal(source, ext.Source); + Assert.Equal(ExtensionStatus.Running, ext.Status); + Assert.NotNull(ext.Pid); + Assert.True(ext.Pid > 0); + } + + [Fact] + public async Task Disable_Then_Enable_Cycles_Extension_Status() + { + var extName = CreateUserExtension(); + var extId = $"user:{extName}"; + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + // Wait until the initial running state is observed before mutating. + await WaitForExtensionAsync(session, extId, ExtensionStatus.Running); + + // Disable: the extension should transition to "disabled" and have no pid. + await session.Rpc.Extensions.DisableAsync(extId); + var disabled = await WaitForExtensionAsync(session, extId, ExtensionStatus.Disabled); + Assert.Null(disabled.Pid); + + // Re-enable: the extension is reloaded as a fresh subprocess. + await session.Rpc.Extensions.EnableAsync(extId); + var reEnabled = await WaitForExtensionAsync(session, extId, ExtensionStatus.Running); + Assert.NotNull(reEnabled.Pid); + } + + [Fact] + public async Task Reload_Picks_Up_Extension_Added_After_Session_Create() + { + // Start the session BEFORE writing the extension so the initial discovery sees nothing. + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + // setupExtensionsForSession runs asynchronously; until it completes the + // controller isn't installed and ReloadAsync throws "Extensions not + // available". (ListAsync returns {extensions: []} either way and is + // therefore not a usable probe here.) Poll Reload directly. + var extName = CreateUserExtension(prefix: "reloadable-ext"); + var extId = $"user:{extName}"; + + await TestHelper.WaitForConditionAsync( + async () => + { + await session.Rpc.Extensions.ReloadAsync(); + return true; + }, + timeout: ExtensionStartupTimeout, + timeoutMessage: "Extensions controller never became available for ReloadAsync.", + transientExceptionFilter: ex => ex.ToString().Contains("Extensions not available", StringComparison.OrdinalIgnoreCase), + pollInterval: TimeSpan.FromMilliseconds(100)); + + var ext = await WaitForExtensionAsync(session, extId, ExtensionStatus.Running); + Assert.Equal(ExtensionSource.User, ext.Source); + } + + [Fact] + public async Task Failed_Extension_Reports_Failed_Status() + { + // Write an extension whose body throws synchronously at import time. + // The bootstrap will fork the child, the import will throw, the child + // exits with code 1, and the runtime should mark it as "failed". + var extName = $"crashing-ext-{Guid.NewGuid():N}"; + var extDir = Path.Join(Ctx.HomeDir, "extensions", extName); + Directory.CreateDirectory(extDir); + File.WriteAllText( + Path.Join(extDir, "extension.mjs"), + "throw new Error('intentional startup failure');"); + + var extId = $"user:{extName}"; + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var ext = await WaitForExtensionAsync(session, extId, ExtensionStatus.Failed); + Assert.Equal(extId, ext.Id); + Assert.Equal(ExtensionSource.User, ext.Source); + } + + [Fact] + public async Task Multiple_Extensions_Are_Discovered_Independently() + { + var ext1Name = CreateUserExtension(prefix: "multi-a"); + var ext2Name = CreateUserExtension(prefix: "multi-b"); + var ext1Id = $"user:{ext1Name}"; + var ext2Id = $"user:{ext2Name}"; + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await WaitForExtensionAsync(session, ext1Id, ExtensionStatus.Running); + await WaitForExtensionAsync(session, ext2Id, ExtensionStatus.Running); + + var list = await session.Rpc.Extensions.ListAsync(); + var pids = list.Extensions.Select(e => e.Pid).Where(p => p.HasValue).ToList(); + Assert.Equal(pids.Count, pids.Distinct().Count()); + } + + [Fact] + public async Task Reload_Preserves_Disabled_State_Across_Calls() + { + var extName = CreateUserExtension(prefix: "persistent-disable"); + var extId = $"user:{extName}"; + + await using var client = Ctx.CreateClient(options: new CopilotClientOptions + { + Environment = ExtensionsEnabledEnvironment(), + }); + + await using var session = await client.CreateSessionAsync(new SessionConfig + { + EnableConfigDiscovery = true, + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await WaitForExtensionAsync(session, extId, ExtensionStatus.Running); + + await session.Rpc.Extensions.DisableAsync(extId); + await WaitForExtensionAsync(session, extId, ExtensionStatus.Disabled); + + // Reload re-runs discovery and respects the per-session disabled set, + // so the extension stays disabled and is not re-launched. + await session.Rpc.Extensions.ReloadAsync(); + + var afterReload = await WaitForExtensionAsync(session, extId, ExtensionStatus.Disabled); + Assert.Null(afterReload.Pid); + } +} diff --git a/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs b/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs index 407111a4a..33b3bc95d 100644 --- a/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs +++ b/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs @@ -124,6 +124,52 @@ await AssertFailureAsync( await AssertFailureAsync( () => session.Rpc.Mcp.ReloadAsync(), "MCP config reload not available"); + await AssertFailureAsync( + () => session.Rpc.Mcp.Oauth.LoginAsync("missing-server"), + "MCP host is not available"); + } + + [Fact] + public async Task Should_Report_Error_When_Mcp_Oauth_Server_Is_Not_Configured() + { + var session = await CreateSessionAsync(new SessionConfig + { + McpServers = new Dictionary + { + ["configured-stdio-server"] = new McpStdioServerConfig + { + Command = "echo", + Args = ["configured-stdio-server"], + Tools = ["*"], + }, + }, + }); + + await AssertFailureAsync( + () => session.Rpc.Mcp.Oauth.LoginAsync("missing-server"), + "is not configured"); + } + + [Fact] + public async Task Should_Report_Error_When_Mcp_Oauth_Server_Is_Not_Remote() + { + const string serverName = "configured-stdio-server"; + var session = await CreateSessionAsync(new SessionConfig + { + McpServers = new Dictionary + { + [serverName] = new McpStdioServerConfig + { + Command = "echo", + Args = [serverName], + Tools = ["*"], + }, + }, + }); + + await AssertFailureAsync( + () => session.Rpc.Mcp.Oauth.LoginAsync(serverName, forceReauth: true, clientName: "SDK E2E", callbackSuccessMessage: "Done"), + "not a remote server"); } [Fact] diff --git a/dotnet/test/E2E/RpcSessionStateE2ETests.cs b/dotnet/test/E2E/RpcSessionStateE2ETests.cs index 02541dc06..6e8118bb0 100644 --- a/dotnet/test/E2E/RpcSessionStateE2ETests.cs +++ b/dotnet/test/E2E/RpcSessionStateE2ETests.cs @@ -22,7 +22,7 @@ private static async Task AssertImplementedFailureAsync(Func ac [Fact] public async Task Should_Call_Session_Rpc_Model_GetCurrent() { - var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" }); + await using var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" }); var result = await session.Rpc.Model.GetCurrentAsync(); @@ -35,7 +35,7 @@ public async Task Should_Call_Session_Rpc_Model_GetCurrent() [Fact] public async Task Should_Call_Session_Rpc_Model_SwitchTo() { - var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" }); + await using var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" }); var before = await session.Rpc.Model.GetCurrentAsync(); Assert.NotNull(before.ModelId); @@ -50,7 +50,7 @@ public async Task Should_Call_Session_Rpc_Model_SwitchTo() [Fact] public async Task Should_Get_And_Set_Session_Mode() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var initial = await session.Rpc.Mode.GetAsync(); Assert.Equal(SessionMode.Interactive, initial); @@ -62,10 +62,22 @@ public async Task Should_Get_And_Set_Session_Mode() Assert.Equal(SessionMode.Interactive, await session.Rpc.Mode.GetAsync()); } + [Theory] + [InlineData(SessionMode.Interactive)] + [InlineData(SessionMode.Plan)] + [InlineData(SessionMode.Autopilot)] + public async Task Should_Set_And_Get_Each_Session_Mode_Value(SessionMode mode) + { + await using var session = await CreateSessionAsync(); + + await session.Rpc.Mode.SetAsync(mode); + Assert.Equal(mode, await session.Rpc.Mode.GetAsync()); + } + [Fact] public async Task Should_Read_Update_And_Delete_Plan() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var initial = await session.Rpc.Plan.ReadAsync(); Assert.False(initial.Exists); @@ -88,7 +100,7 @@ public async Task Should_Read_Update_And_Delete_Plan() [Fact] public async Task Should_Call_Workspace_File_Rpc_Methods() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var initial = await session.Rpc.Workspaces.ListFilesAsync(); Assert.NotNull(initial.Files); @@ -106,10 +118,119 @@ public async Task Should_Call_Workspace_File_Rpc_Methods() Assert.NotEqual(Guid.Empty, workspace.Workspace.Id); } + [Theory] + [InlineData("../escaped.txt")] + [InlineData("../../escaped.txt")] + [InlineData("nested/../../../escaped.txt")] + public async Task Should_Reject_Workspace_File_Path_Traversal(string path) + { + await using var session = await CreateSessionAsync(); + + // The runtime's resolveWorkspacePath enforces that resolved paths must remain + // inside the workspace files directory. Path traversal attempts must throw, + // not silently succeed. + var ex = await Assert.ThrowsAnyAsync( + () => session.Rpc.Workspaces.CreateFileAsync(path, "should not land outside workspace")); + Assert.Contains("workspace files directory", ex.ToString(), StringComparison.OrdinalIgnoreCase); + + var readEx = await Assert.ThrowsAnyAsync( + () => session.Rpc.Workspaces.ReadFileAsync(path)); + Assert.Contains("workspace files directory", readEx.ToString(), StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Should_Create_Workspace_File_With_Nested_Path_Auto_Creating_Dirs() + { + await using var session = await CreateSessionAsync(); + + // workspaceManager.writeWorkspaceFile mkdirs parent dirs recursively. + var nestedPath = $"nested-{Guid.NewGuid():N}/subdir/file.txt"; + await session.Rpc.Workspaces.CreateFileAsync(nestedPath, "nested content"); + + var read = await session.Rpc.Workspaces.ReadFileAsync(nestedPath); + Assert.Equal("nested content", read.Content); + + var listed = await session.Rpc.Workspaces.ListFilesAsync(); + Assert.Contains(listed.Files, f => f.EndsWith("file.txt", StringComparison.Ordinal)); + } + + [Fact] + public async Task Should_Report_Error_Reading_Nonexistent_Workspace_File() + { + await using var session = await CreateSessionAsync(); + + await Assert.ThrowsAnyAsync( + () => session.Rpc.Workspaces.ReadFileAsync($"never-exists-{Guid.NewGuid():N}.txt")); + } + + [Fact] + public async Task Should_Update_Existing_Workspace_File_With_Update_Operation() + { + await using var session = await CreateSessionAsync(); + var path = $"reused-{Guid.NewGuid():N}.txt"; + + await session.Rpc.Workspaces.CreateFileAsync(path, "v1"); + + var updateTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.Path, path, StringComparison.Ordinal) + && evt.Data.Operation == WorkspaceFileChangedOperation.Update, + TimeSpan.FromSeconds(15), + timeoutDescription: $"workspace_file_changed Update event for '{path}'"); + + await session.Rpc.Workspaces.CreateFileAsync(path, "v2"); + + var evt = await updateTask; + Assert.Equal(WorkspaceFileChangedOperation.Update, evt.Data.Operation); + Assert.Equal("v2", (await session.Rpc.Workspaces.ReadFileAsync(path)).Content); + } + + [Theory] + [InlineData("")] + [InlineData(" ")] + [InlineData("\t\n \r")] + public async Task Should_Reject_Empty_Or_Whitespace_Session_Name(string emptyOrWhitespace) + { + await using var session = await CreateSessionAsync(); + + // workspaceManager.renameSession trims and rejects empty/whitespace-only names + // with "Session name cannot be empty". + var ex = await Assert.ThrowsAnyAsync(() => session.Rpc.Name.SetAsync(emptyOrWhitespace)); + Assert.Contains("empty", ex.ToString(), StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Should_Emit_Title_Changed_Event_Each_Time_Name_Set_Is_Called() + { + await using var session = await CreateSessionAsync(); + var titleA = $"Title-A-{Guid.NewGuid():N}"; + var titleB = $"Title-B-{Guid.NewGuid():N}"; + + // session.title_changed is ephemeral. Subscribe before invoking. + var firstTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.Title, titleA, StringComparison.Ordinal), + TimeSpan.FromSeconds(15), + timeoutDescription: "first title_changed event"); + await session.Rpc.Name.SetAsync(titleA); + await firstTask; + + // Setting a different name MUST emit another event (renameSession does not + // suppress duplicates, and the second value is observably different anyway). + var secondTask = TestHelper.GetNextEventOfTypeAsync( + session, + evt => string.Equals(evt.Data.Title, titleB, StringComparison.Ordinal), + TimeSpan.FromSeconds(15), + timeoutDescription: "second title_changed event"); + await session.Rpc.Name.SetAsync(titleB); + var second = await secondTask; + Assert.Equal(titleB, second.Data.Title); + } + [Fact] public async Task Should_Get_And_Set_Session_Metadata() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); await session.Rpc.Name.SetAsync("SDK test session"); var name = await session.Rpc.Name.GetAsync(); @@ -125,7 +246,7 @@ public async Task Should_Fork_Session_With_Persisted_Messages() const string sourcePrompt = "Say FORK_SOURCE_ALPHA exactly."; const string forkPrompt = "Now say FORK_CHILD_BETA exactly."; - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var initialAnswer = await session.SendAndWaitAsync(new MessageOptions { Prompt = sourcePrompt }); Assert.Contains("FORK_SOURCE_ALPHA", initialAnswer?.Data.Content ?? string.Empty); @@ -138,7 +259,7 @@ public async Task Should_Fork_Session_With_Persisted_Messages() Assert.False(string.IsNullOrWhiteSpace(fork.SessionId)); Assert.NotEqual(session.SessionId, fork.SessionId); - var forkedSession = await ResumeSessionAsync(fork.SessionId); + await using var forkedSession = await ResumeSessionAsync(fork.SessionId); var forkedConversation = GetConversationMessages(await forkedSession.GetMessagesAsync()); Assert.Equal(sourceConversation, forkedConversation.Take(sourceConversation.Count)); @@ -151,14 +272,12 @@ public async Task Should_Fork_Session_With_Persisted_Messages() var forkAfterPrompt = GetConversationMessages(await forkedSession.GetMessagesAsync()); Assert.Contains(forkAfterPrompt, message => message.Role == "user" && message.Content == forkPrompt); Assert.Contains(forkAfterPrompt, message => message.Role == "assistant" && message.Content.Contains("FORK_CHILD_BETA", StringComparison.Ordinal)); - - await forkedSession.DisposeAsync(); } [Fact] public async Task Should_Report_Error_When_Forking_Session_Without_Persisted_Events() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var ex = await Assert.ThrowsAnyAsync(() => Client.Rpc.Sessions.ForkAsync(session.SessionId)); @@ -166,10 +285,59 @@ public async Task Should_Report_Error_When_Forking_Session_Without_Persisted_Eve Assert.DoesNotContain("Unhandled method sessions.fork", ex.ToString(), StringComparison.OrdinalIgnoreCase); } + [Fact] + public async Task Should_Fork_Session_To_Event_Id_Excluding_Boundary_Event() + { + const string firstPrompt = "Say FORK_BOUNDARY_FIRST exactly."; + const string secondPrompt = "Say FORK_BOUNDARY_SECOND exactly."; + + await using var session = await CreateSessionAsync(); + await session.SendAndWaitAsync(new MessageOptions { Prompt = firstPrompt }); + await session.SendAndWaitAsync(new MessageOptions { Prompt = secondPrompt }); + + var sourceEvents = await session.GetMessagesAsync(); + var secondUserEvent = sourceEvents + .OfType() + .FirstOrDefault(e => string.Equals(e.Data.Content, secondPrompt, StringComparison.Ordinal)) + ?? throw new InvalidOperationException("Expected the second user.message in persisted history"); + var boundaryEventId = secondUserEvent.Id.ToString(); + + // Runtime semantics (localSessionManager.forkSession): toEventId is exclusive, + // so the boundary event is NOT included in the forked session. + var fork = await Client.Rpc.Sessions.ForkAsync(session.SessionId, boundaryEventId); + Assert.False(string.IsNullOrWhiteSpace(fork.SessionId)); + Assert.NotEqual(session.SessionId, fork.SessionId); + + await using var forkedSession = await ResumeSessionAsync(fork.SessionId); + var forkedEvents = await forkedSession.GetMessagesAsync(); + Assert.DoesNotContain(forkedEvents, e => e.Id == secondUserEvent.Id); + + var forkedConversation = GetConversationMessages(forkedEvents); + Assert.Contains(forkedConversation, m => m.Role == "user" && m.Content == firstPrompt); + Assert.DoesNotContain(forkedConversation, m => m.Role == "user" && m.Content == secondPrompt); + } + + [Fact] + public async Task Should_Report_Error_When_Forking_Session_To_Unknown_Event_Id() + { + const string sourcePrompt = "Say FORK_UNKNOWN_EVENT_OK exactly."; + + await using var session = await CreateSessionAsync(); + await session.SendAndWaitAsync(new MessageOptions { Prompt = sourcePrompt }); + + var bogusEventId = Guid.NewGuid().ToString(); + + var ex = await Assert.ThrowsAnyAsync( + () => Client.Rpc.Sessions.ForkAsync(session.SessionId, bogusEventId)); + + Assert.Contains($"Event {bogusEventId} not found", ex.ToString(), StringComparison.OrdinalIgnoreCase); + Assert.DoesNotContain("Unhandled method sessions.fork", ex.ToString(), StringComparison.OrdinalIgnoreCase); + } + [Fact] public async Task Should_Call_Session_Usage_And_Permission_Rpcs() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); var metrics = await session.Rpc.Usage.GetMetricsAsync(); Assert.True(metrics.SessionStartTime > 0); @@ -207,7 +375,7 @@ public async Task Should_Call_Session_Usage_And_Permission_Rpcs() [Fact] public async Task Should_Report_Implemented_Errors_For_Unsupported_Session_Rpc_Paths() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); await AssertImplementedFailureAsync( () => session.Rpc.History.TruncateAsync("missing-event"), @@ -221,13 +389,36 @@ await AssertImplementedFailureAsync( [Fact] public async Task Should_Compact_Session_History_After_Messages() { - var session = await CreateSessionAsync(); + await using var session = await CreateSessionAsync(); await session.SendAndWaitAsync(new MessageOptions { Prompt = "What is 2+2?" }); var result = await session.Rpc.History.CompactAsync(); Assert.NotNull(result); + Assert.True(result.Success, "Expected History.CompactAsync to report Success=true"); + Assert.True(result.MessagesRemoved >= 0, "MessagesRemoved must be non-negative"); + // TODO: once copilot-agent-runtime PR #7285 ("Runtime: Fix compact history no-op + // accounting") merges and is rolled into the @github/copilot version pinned by + // nodejs/package-lock.json, re-tighten this to `result.TokensRemoved >= 0`. Until + // then `tokensRemoved = preCompactionTokens - postCompactionTokens` can legitimately + // be negative when the LLM-generated summary is more verbose than the messages it + // replaced (the SDK schema declares min(0) but the runtime does not enforce it). + + if (result.ContextWindow is { } ctx) + { + Assert.True(ctx.MessagesLength >= 0, "ContextWindow.MessagesLength must be non-negative"); + Assert.True(ctx.CurrentTokens >= 0, "ContextWindow.CurrentTokens must be non-negative"); + if (ctx.ConversationTokens is long convo) + { + Assert.True(convo >= 0, "ContextWindow.ConversationTokens must be non-negative when present"); + Assert.True(convo <= ctx.CurrentTokens, "ConversationTokens must not exceed CurrentTokens"); + } + } + + // Session must still be usable after compaction. + var name = await session.Rpc.Name.GetAsync(); + Assert.NotNull(name); } private static List<(string Role, string Content)> GetConversationMessages(IEnumerable events) diff --git a/dotnet/test/E2E/RpcShellAndFleetE2ETests.cs b/dotnet/test/E2E/RpcShellAndFleetE2ETests.cs index 1e241240c..1491c88a2 100644 --- a/dotnet/test/E2E/RpcShellAndFleetE2ETests.cs +++ b/dotnet/test/E2E/RpcShellAndFleetE2ETests.cs @@ -95,25 +95,15 @@ private static string CreateWriteFileCommand(string markerPath, string marker) private static async Task WaitForFileTextAsync(string path, string expected) { - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); - while (!cts.IsCancellationRequested) - { - if (File.Exists(path) && (await File.ReadAllTextAsync(path)).Contains(expected, StringComparison.Ordinal)) - { - return; - } - - try + await TestHelper.WaitForConditionAsync( + async () => { - await Task.Delay(TimeSpan.FromMilliseconds(100), cts.Token); - } - catch (OperationCanceledException) - { - break; - } - } - - throw new TimeoutException($"Timed out waiting for shell command to write '{expected}' to '{path}'."); + return File.Exists(path) && + (await File.ReadAllTextAsync(path)).Contains(expected, StringComparison.Ordinal); + }, + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: $"Timed out waiting for shell command to write '{expected}' to '{path}'.", + transientExceptionFilter: TestHelper.IsTransientFileSystemException); } private static async Task> WaitForMessagesAsync( @@ -123,25 +113,16 @@ private static async Task> WaitForMessagesAsync( // Fleet-mode tasks do not emit SessionIdleEvent on completion, so polling the // session message list is the simplest way to wait for the assistant's final // reply text without depending on idle-event semantics. - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(120)); - while (!cts.IsCancellationRequested) - { - var messages = (await session.GetMessagesAsync()).ToList(); - if (predicate(messages)) - { - return messages; - } - - try + IReadOnlyList messages = []; + await TestHelper.WaitForConditionAsync( + async () => { - await Task.Delay(TimeSpan.FromMilliseconds(250), cts.Token); - } - catch (OperationCanceledException) - { - break; - } - } - - throw new TimeoutException("Timed out waiting for fleet-mode assistant reply to satisfy predicate."); + messages = (await session.GetMessagesAsync()).ToList(); + return predicate(messages); + }, + timeout: TimeSpan.FromSeconds(120), + timeoutMessage: "Timed out waiting for fleet-mode assistant reply to satisfy predicate.", + pollInterval: TimeSpan.FromMilliseconds(250)); + return messages; } } diff --git a/dotnet/test/E2E/RpcShellEdgeCaseE2ETests.cs b/dotnet/test/E2E/RpcShellEdgeCaseE2ETests.cs new file mode 100644 index 000000000..13bea7ae4 --- /dev/null +++ b/dotnet/test/E2E/RpcShellEdgeCaseE2ETests.cs @@ -0,0 +1,190 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test.E2E; + +/// +/// Targeted edge-case tests for the shell RPC API (shell.exec, shell.kill). +/// These tests close several runtime branches that the basic exec/kill tests miss: +/// timeout-triggered SIGTERM, command-not-found error path, kill on unknown processId, +/// kill with terminating signals, kill with an invalid signal, and the custom-cwd path. +/// All assertions are based on observable side effects (file existence, process gone) so +/// they remain deterministic without relying on streamed shell.output / shell.exit RPC +/// notifications which the SDK does not surface as session events. +/// +public class RpcShellEdgeCaseE2ETests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "rpc_shell_edge_cases", output) +{ + [Fact] + public async Task Shell_Exec_With_Timeout_Kills_Long_Running_Command() + { + var session = await CreateSessionAsync(); + var markerPath = Path.Join(Ctx.WorkDir, $"shell-timeout-{Guid.NewGuid():N}.txt"); + var startedPath = Path.Join(Ctx.WorkDir, $"shell-timeout-started-{Guid.NewGuid():N}.txt"); + + // Sleep 30s but timeout at 200ms — runtime should SIGTERM the child before the + // sleep completes, which means the marker file must NEVER appear within a wait + // window comfortably greater than the timeout but well under the sleep duration. + var command = OperatingSystem.IsWindows() + ? $"echo started>\"{startedPath}\" & for /L %i in (1,1,2147483647) do @rem & echo should-not-exist>\"{markerPath}\"" + : $"printf 'started' > '{startedPath}'; sleep 30; printf 'should-not-exist' > '{markerPath}'"; + + var result = await session.Rpc.Shell.ExecAsync(command, timeout: TimeSpan.FromMilliseconds(200)); + Assert.False(string.IsNullOrWhiteSpace(result.ProcessId)); + + await TestHelper.WaitForConditionAsync( + () => File.Exists(startedPath), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: "Timed-out shell command did not start."); + + await AssertProcessMapCleanedUpAsync(session, result.ProcessId, "Timed-out shell command"); + + Assert.False(File.Exists(markerPath), "Marker file should not exist; timeout should have killed the child before the sleep completed."); + } + + [Fact] + public async Task Shell_Exec_With_Custom_Cwd_Honors_Override() + { + var session = await CreateSessionAsync(); + + var subDir = Path.Join(Ctx.WorkDir, $"shell-cwd-{Guid.NewGuid():N}"); + Directory.CreateDirectory(subDir); + var markerPath = Path.Join(subDir, "marker.txt"); + const string marker = "shell-cwd-marker"; + + // Write the marker as a path RELATIVE to cwd so we can prove the runtime used the + // override (default cwd is Ctx.WorkDir, not subDir). If the cwd parameter is + // ignored, the relative-path write would land in WorkDir, not subDir. + var command = OperatingSystem.IsWindows() + ? $"powershell -NoLogo -NoProfile -Command \"Set-Content -LiteralPath 'marker.txt' -Value '{marker}'\"" + : $"sh -c \"printf '%s' '{marker}' > marker.txt\""; + + var result = await session.Rpc.Shell.ExecAsync(command, cwd: subDir); + Assert.False(string.IsNullOrWhiteSpace(result.ProcessId)); + + await TestHelper.WaitForConditionAsync( + async () => File.Exists(markerPath) && (await File.ReadAllTextAsync(markerPath)).Contains(marker, StringComparison.Ordinal), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: $"Timed out waiting for shell command to write marker to '{markerPath}'.", + transientExceptionFilter: TestHelper.IsTransientFileSystemException); + } + + [Fact] + public async Task Shell_Exec_With_Nonexistent_Command_Returns_ProcessId_And_Cleans_Up() + { + var session = await CreateSessionAsync(); + var markerPath = Path.Join(Ctx.WorkDir, $"shell-not-found-{Guid.NewGuid():N}.txt"); + + // shell:true means the OS shell will print "not found" to stderr and exit 127 (POSIX) + // or 1 (cmd.exe). Either way the runtime must accept the request, return a processId, + // and clean up the process map so a subsequent kill returns killed:false. + var missingCommand = "definitely-not-a-real-command-" + Guid.NewGuid().ToString("N"); + var command = OperatingSystem.IsWindows() + ? $"{missingCommand} & echo done>\"{markerPath}\" & exit /b 1" + : $"{missingCommand}; code=$?; printf 'done' > '{markerPath}'; exit $code"; + + var result = await session.Rpc.Shell.ExecAsync(command); + Assert.False(string.IsNullOrWhiteSpace(result.ProcessId)); + + await TestHelper.WaitForConditionAsync( + () => File.Exists(markerPath), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: "Failed shell command did not reach its marker."); + + await AssertProcessMapCleanedUpAsync(session, result.ProcessId, "Failed shell command"); + } + + [Fact] + public async Task Shell_Kill_Unknown_ProcessId_Returns_False() + { + var session = await CreateSessionAsync(); + + var killResult = await session.Rpc.Shell.KillAsync($"unknown-{Guid.NewGuid():N}"); + + Assert.False(killResult.Killed); + } + + [Theory] + [InlineData(ShellKillSignal.SIGTERM)] + [InlineData(ShellKillSignal.SIGKILL)] + public async Task Shell_Kill_Cleans_Up_After_Terminating_Signal(ShellKillSignal signal) + { + var session = await CreateSessionAsync(); + var command = OperatingSystem.IsWindows() + ? "powershell -NoLogo -NoProfile -Command \"Start-Sleep -Seconds 60\"" + : "sleep 60"; + + var execResult = await session.Rpc.Shell.ExecAsync(command); + Assert.False(string.IsNullOrWhiteSpace(execResult.ProcessId)); + + var killResult = await session.Rpc.Shell.KillAsync(execResult.ProcessId, signal); + Assert.True(killResult.Killed); + + await AssertProcessMapCleanedUpAsync(session, execResult.ProcessId, $"Process killed with {signal}"); + } + + [Fact] + public async Task Shell_Exec_With_Stderr_Output_Cleans_Up() + { + var session = await CreateSessionAsync(); + var markerPath = Path.Join(Ctx.WorkDir, $"shell-stderr-{Guid.NewGuid():N}.txt"); + + // Command that writes to stderr and exits non-zero. Exercises the runtime's stderr + // stream-flush path and cleanup-on-non-zero-exit path. The marker proves the + // command reached the end before the single kill probe checks cleanup. + var command = OperatingSystem.IsWindows() + ? $"powershell -NoLogo -NoProfile -Command \"[Console]::Error.WriteLine('boom'); Set-Content -LiteralPath '{markerPath}' -Value 'done'; exit 2\"" + : $"echo boom 1>&2; printf 'done' > '{markerPath}'; exit 2"; + + var result = await session.Rpc.Shell.ExecAsync(command); + Assert.False(string.IsNullOrWhiteSpace(result.ProcessId)); + + await TestHelper.WaitForConditionAsync( + () => File.Exists(markerPath), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: "stderr-only command did not reach its marker."); + + await AssertProcessMapCleanedUpAsync(session, result.ProcessId, "stderr-only command"); + } + + [Fact] + public async Task Shell_Exec_With_Large_Stdout_Cleans_Up() + { + var session = await CreateSessionAsync(); + var markerPath = Path.Join(Ctx.WorkDir, $"shell-stdout-{Guid.NewGuid():N}.txt"); + + // Print a payload large enough to exceed the runtime's 64KB chunk threshold so the + // chunked-output path is executed. We use a single 200KB write so the runtime has to + // emit at least 3 chunks (200KB / 64KB ≈ 4). + var command = OperatingSystem.IsWindows() + ? $"powershell -NoLogo -NoProfile -Command \"Write-Host ('x' * 204800); Set-Content -LiteralPath '{markerPath}' -Value 'done'\"" + : $"printf '%0.s=' $(seq 1 204800); printf 'done' > '{markerPath}'"; + + var result = await session.Rpc.Shell.ExecAsync(command); + Assert.False(string.IsNullOrWhiteSpace(result.ProcessId)); + + await TestHelper.WaitForConditionAsync( + () => File.Exists(markerPath), + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: "Large-output command did not reach its marker."); + + await AssertProcessMapCleanedUpAsync(session, result.ProcessId, "Large-output command"); + } + + private static async Task AssertProcessMapCleanedUpAsync(CopilotSession session, string processId, string scenario) + { + // The shell RPC surface exposes kill but not a non-mutating status API. + // Give the runtime's close/exit handler a bounded grace period, then + // probe exactly once; if this returns true, the assertion fails instead + // of letting a polling kill make the test pass by cleaning up itself. + await Task.Delay(TimeSpan.FromSeconds(1)); + var killResult = await session.Rpc.Shell.KillAsync(processId); + Assert.False(killResult.Killed, $"{scenario} should have already exited and been removed from the runtime's process map."); + } +} diff --git a/dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs b/dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs index 3f029c37f..da8b2166f 100644 --- a/dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs +++ b/dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs @@ -3,6 +3,7 @@ *--------------------------------------------------------------------------------------------*/ using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; using Xunit; using Xunit.Abstractions; @@ -49,6 +50,91 @@ await AssertImplementedFailureAsync( "session.tasks.startAgent"); } + [Fact] + public async Task Should_Report_Implemented_Error_For_Invalid_Task_Agent_Model() + { + var session = await CreateSessionAsync(); + + await AssertImplementedFailureAsync( + () => session.Rpc.Tasks.StartAgentAsync( + agentType: "general-purpose", + prompt: "Say hi", + name: "sdk-test-task", + description: "SDK task agent validation", + model: "not-a-real-model"), + "session.tasks.startAgent"); + + var tasks = await session.Rpc.Tasks.ListAsync(); + Assert.Empty(tasks.Tasks); + } + + [Fact] + public async Task Should_Start_Background_Agent_And_Report_Task_Details() + { + var session = await CreateSessionAsync(); + + var ready = await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Reply with TASK_AGENT_READY exactly.", + }); + Assert.Contains("TASK_AGENT_READY", ready?.Data.Content ?? string.Empty, StringComparison.Ordinal); + + var started = await session.Rpc.Tasks.StartAgentAsync( + agentType: "general-purpose", + prompt: "Reply with TASK_AGENT_DONE exactly.", + name: "sdk-background-agent", + description: "SDK background agent coverage"); + Assert.False(string.IsNullOrWhiteSpace(started.AgentId)); + + TaskInfoAgent? task = null; + await TestHelper.WaitForConditionAsync( + async () => + { + task = await FindAgentTaskAsync(session, started.AgentId); + return task is not null; + }, + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: $"Background agent task '{started.AgentId}' did not appear in session.tasks.list."); + + Assert.NotNull(task); + Assert.Equal(started.AgentId, task.Id); + Assert.Equal("general-purpose", task.AgentType); + Assert.Equal("Reply with TASK_AGENT_DONE exactly.", task.Prompt); + Assert.Equal("SDK background agent coverage", task.Description); + Assert.Equal(TaskAgentInfoExecutionMode.Background, task.ExecutionMode); + Assert.False(task.CanPromoteToBackground.GetValueOrDefault()); + Assert.NotEqual(default, task.StartedAt); + + var promote = await session.Rpc.Tasks.PromoteToBackgroundAsync(started.AgentId); + Assert.False(promote.Promoted); + + await TestHelper.WaitForConditionAsync( + async () => + { + task = await FindAgentTaskAsync(session, started.AgentId); + return task?.LatestResponse?.Contains("TASK_AGENT_DONE", StringComparison.Ordinal) == true + || task?.Result?.Contains("TASK_AGENT_DONE", StringComparison.Ordinal) == true + || task?.Status is TaskAgentInfoStatus.Completed or TaskAgentInfoStatus.Failed; + }, + timeout: TimeSpan.FromSeconds(60), + timeoutMessage: $"Background agent task '{started.AgentId}' did not produce a final observable state."); + + Assert.NotNull(task); + Assert.Contains("TASK_AGENT_DONE", task.LatestResponse ?? task.Result ?? string.Empty); + + if (task.Status == TaskAgentInfoStatus.Idle) + { + var cancel = await session.Rpc.Tasks.CancelAsync(started.AgentId); + Assert.True(cancel.Cancelled); + } + + var remove = await session.Rpc.Tasks.RemoveAsync(started.AgentId); + Assert.True(remove.Removed); + + var afterRemove = await session.Rpc.Tasks.ListAsync(); + Assert.DoesNotContain(afterRemove.Tasks.OfType(), t => string.Equals(t.Id, started.AgentId, StringComparison.Ordinal)); + } + [Fact] public async Task Should_Return_Expected_Results_For_Missing_Pending_Handler_RequestIds() { @@ -78,5 +164,34 @@ public async Task Should_Return_Expected_Results_For_Missing_Pending_Handler_Req requestId: "missing-permanent-permission-request", result: new PermissionDecisionApprovePermanently { Domain = "example.com" }); Assert.False(permanentPermission.Success); + + var sessionApproval = await session.Rpc.Permissions.HandlePendingPermissionRequestAsync( + requestId: "missing-session-approval-request", + result: new PermissionDecisionApproveForSession + { + Approval = new PermissionDecisionApproveForSessionApprovalCustomTool + { + ToolName = "missing-tool", + }, + }); + Assert.False(sessionApproval.Success); + + var locationApproval = await session.Rpc.Permissions.HandlePendingPermissionRequestAsync( + requestId: "missing-location-approval-request", + result: new PermissionDecisionApproveForLocation + { + Approval = new PermissionDecisionApproveForLocationApprovalCustomTool + { + ToolName = "missing-tool", + }, + LocationKey = "missing-location", + }); + Assert.False(locationApproval.Success); + } + + private static async Task FindAgentTaskAsync(CopilotSession session, string agentId) + { + var tasks = await session.Rpc.Tasks.ListAsync(); + return tasks.Tasks.OfType().SingleOrDefault(t => string.Equals(t.Id, agentId, StringComparison.Ordinal)); } } diff --git a/dotnet/test/E2E/SessionConfigE2ETests.cs b/dotnet/test/E2E/SessionConfigE2ETests.cs index 9afc47bd4..4ba42ec50 100644 --- a/dotnet/test/E2E/SessionConfigE2ETests.cs +++ b/dotnet/test/E2E/SessionConfigE2ETests.cs @@ -120,6 +120,67 @@ public async Task Should_Use_Custom_SessionId() await session.DisposeAsync(); } + [Fact] + public async Task Should_Apply_ReasoningEffort_On_Session_Create() + { + const string reasoningModelId = "custom-reasoning-model"; + + var session = await CreateSessionAsync(new SessionConfig + { + Model = reasoningModelId, + Provider = CreateProxyProvider("create-reasoning"), + ReasoningEffort = "high", + }); + + var startEvent = Assert.Single((await session.GetMessagesAsync()).OfType()); + Assert.Equal(reasoningModelId, startEvent.Data.SelectedModel); + Assert.Equal("high", startEvent.Data.ReasoningEffort); + + await session.DisposeAsync(); + } + + [Theory] + [InlineData("low")] + [InlineData("medium")] + [InlineData("high")] + public async Task Should_Apply_All_ReasoningEffort_Values_On_Session_Create(string effort) + { + const string reasoningModelId = "custom-reasoning-model"; + + var session = await CreateSessionAsync(new SessionConfig + { + Model = reasoningModelId, + Provider = CreateProxyProvider($"reasoning-{effort}"), + ReasoningEffort = effort, + }); + + var startEvent = Assert.Single((await session.GetMessagesAsync()).OfType()); + Assert.Equal(reasoningModelId, startEvent.Data.SelectedModel); + Assert.Equal(effort, startEvent.Data.ReasoningEffort); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Should_Apply_ReasoningEffort_On_Session_Resume() + { + var originalSession = await CreateSessionAsync(); + const string reasoningModelId = "custom-reasoning-model"; + var resumedSession = await ResumeSessionAsync(originalSession.SessionId, new ResumeSessionConfig + { + Model = reasoningModelId, + Provider = CreateProxyProvider("resume-reasoning"), + ReasoningEffort = "high", + }); + + var resumeEvent = Assert.Single((await resumedSession.GetMessagesAsync()).OfType()); + Assert.Equal(reasoningModelId, resumeEvent.Data.SelectedModel); + Assert.Equal("high", resumeEvent.Data.ReasoningEffort); + + await resumedSession.DisposeAsync(); + await originalSession.DisposeAsync(); + } + [Fact] public async Task Should_Forward_ClientName_In_UserAgent() { diff --git a/dotnet/test/E2E/SessionE2ETests.cs b/dotnet/test/E2E/SessionE2ETests.cs index 202f02f6f..50b4dc1f5 100644 --- a/dotnet/test/E2E/SessionE2ETests.cs +++ b/dotnet/test/E2E/SessionE2ETests.cs @@ -324,13 +324,12 @@ public async Task Should_Receive_Session_Events() }, }); - // session.start is dispatched asynchronously via the event channel; - // wait briefly for the consumer to deliver it. - var started = await Task.WhenAny(sessionStartReceived.Task, Task.Delay(TimeSpan.FromSeconds(5))); - Assert.Equal(sessionStartReceived.Task, started); + // session.start is dispatched asynchronously via the event channel. + await sessionStartReceived.Task.WaitAsync(TimeSpan.FromSeconds(5)); Assert.Contains(earlyEvents, evt => evt is SessionStartEvent); var receivedEvents = new List(); + var receivedEventsLock = new object(); var idleReceived = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); var concurrentCount = 0; var maxConcurrent = 0; @@ -339,15 +338,25 @@ public async Task Should_Receive_Session_Events() { // Track concurrent handler invocations to verify serial dispatch. var current = Interlocked.Increment(ref concurrentCount); - var seenMax = Volatile.Read(ref maxConcurrent); - if (current > seenMax) - Interlocked.CompareExchange(ref maxConcurrent, current, seenMax); - - Thread.Sleep(10); + try + { + var seenMax = Volatile.Read(ref maxConcurrent); + if (current > seenMax) + Interlocked.CompareExchange(ref maxConcurrent, current, seenMax); - Interlocked.Decrement(ref concurrentCount); + // Keep the handler active long enough that concurrent dispatch would + // overlap deterministically, without using sleep-based synchronization. + Thread.SpinWait(100_000); + } + finally + { + Interlocked.Decrement(ref concurrentCount); + } - receivedEvents.Add(evt); + lock (receivedEventsLock) + { + receivedEvents.Add(evt); + } if (evt is SessionIdleEvent) { idleReceived.TrySetResult(true); @@ -361,10 +370,16 @@ public async Task Should_Receive_Session_Events() await idleReceived.Task.WaitAsync(TimeSpan.FromSeconds(60)); // Should have received multiple events (user message, assistant message, idle, etc.) - Assert.NotEmpty(receivedEvents); - Assert.Contains(receivedEvents, evt => evt is UserMessageEvent); - Assert.Contains(receivedEvents, evt => evt is AssistantMessageEvent); - Assert.Contains(receivedEvents, evt => evt is SessionIdleEvent); + List observedEvents; + lock (receivedEventsLock) + { + observedEvents = [.. receivedEvents]; + } + + Assert.NotEmpty(observedEvents); + Assert.Contains(observedEvents, evt => evt is UserMessageEvent); + Assert.Contains(observedEvents, evt => evt is AssistantMessageEvent); + Assert.Contains(observedEvents, evt => evt is SessionIdleEvent); // Events must be dispatched serially — never more than one handler invocation at a time. Assert.Equal(1, maxConcurrent); @@ -428,12 +443,15 @@ public async Task Should_List_Sessions_With_Context() await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say OK." }); SessionMetadata? ourSession = null; - await WaitForAsync(async () => - { - var sessions = await Client.ListSessionsAsync(); - ourSession = sessions.FirstOrDefault(s => s.SessionId == session.SessionId); - return ourSession is not null; - }, TimeSpan.FromSeconds(10)); + await TestHelper.WaitForConditionAsync( + async () => + { + var sessions = await Client.ListSessionsAsync(); + ourSession = sessions.FirstOrDefault(s => s.SessionId == session.SessionId); + return ourSession is not null; + }, + timeout: TimeSpan.FromSeconds(10), + timeoutMessage: "Timed out waiting for the current session to appear in ListSessionsAsync()."); Assert.NotNull(ourSession); var allSessions = await Client.ListSessionsAsync(); @@ -455,11 +473,14 @@ public async Task Should_Get_Session_Metadata_By_Id() await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say hello" }); SessionMetadata? metadata = null; - await WaitForAsync(async () => - { - metadata = await Client.GetSessionMetadataAsync(session.SessionId); - return metadata is not null; - }, TimeSpan.FromSeconds(10)); + await TestHelper.WaitForConditionAsync( + async () => + { + metadata = await Client.GetSessionMetadataAsync(session.SessionId); + return metadata is not null; + }, + timeout: TimeSpan.FromSeconds(10), + timeoutMessage: "Timed out waiting for GetSessionMetadataAsync() to return the persisted session."); Assert.NotNull(metadata); Assert.Equal(session.SessionId, metadata.SessionId); Assert.NotEqual(default, metadata.StartTime); @@ -569,7 +590,14 @@ public async Task Should_Log_Messages_At_Various_Levels() { var session = await CreateSessionAsync(); var events = new List(); - session.On(evt => events.Add(evt)); + var eventsLock = new object(); + session.On(evt => + { + lock (eventsLock) + { + events.Add(evt); + } + }); await session.LogAsync("Info message"); await session.LogAsync("Warning message", level: SessionLogLevel.Warning); @@ -577,26 +605,41 @@ public async Task Should_Log_Messages_At_Various_Levels() await session.LogAsync("Ephemeral message", ephemeral: true); // Poll until all 4 notification events arrive - await WaitForAsync(() => + await TestHelper.WaitForConditionAsync( + () => + { + List snapshot; + lock (eventsLock) + { + snapshot = [.. events]; + } + + var notifications = snapshot.Where(e => + e is SessionInfoEvent info && info.Data.InfoType == "notification" || + e is SessionWarningEvent warn && warn.Data.WarningType == "notification" || + e is SessionErrorEvent err && err.Data.ErrorType == "notification" + ).ToList(); + return notifications.Count >= 4; + }, + timeout: TimeSpan.FromSeconds(10), + timeoutMessage: "Timed out waiting for all four notification log events to be observed."); + + List observedEvents; + lock (eventsLock) { - var notifications = events.Where(e => - e is SessionInfoEvent info && info.Data.InfoType == "notification" || - e is SessionWarningEvent warn && warn.Data.WarningType == "notification" || - e is SessionErrorEvent err && err.Data.ErrorType == "notification" - ).ToList(); - return notifications.Count >= 4; - }, timeout: TimeSpan.FromSeconds(10)); + observedEvents = [.. events]; + } - var infoEvent = events.OfType().First(e => e.Data.Message == "Info message"); + var infoEvent = observedEvents.OfType().First(e => e.Data.Message == "Info message"); Assert.Equal("notification", infoEvent.Data.InfoType); - var warningEvent = events.OfType().First(e => e.Data.Message == "Warning message"); + var warningEvent = observedEvents.OfType().First(e => e.Data.Message == "Warning message"); Assert.Equal("notification", warningEvent.Data.WarningType); - var errorEvent = events.OfType().First(e => e.Data.Message == "Error message"); + var errorEvent = observedEvents.OfType().First(e => e.Data.Message == "Error message"); Assert.Equal("notification", errorEvent.Data.ErrorType); - var ephemeralEvent = events.OfType().First(e => e.Data.Message == "Ephemeral message"); + var ephemeralEvent = observedEvents.OfType().First(e => e.Data.Message == "Ephemeral message"); Assert.Equal("notification", ephemeralEvent.Data.InfoType); } @@ -776,7 +819,7 @@ public async Task Should_Send_With_Github_Reference_Attachment() await session.SendAndWaitAsync(new MessageOptions { - Prompt = "Summarize the referenced issue.", + Prompt = "Using only the GitHub reference metadata in this message, summarize the reference. Do not call any tools.", Attachments = [ new UserMessageAttachmentGithubReference @@ -922,36 +965,4 @@ public async Task Should_Resume_Session_With_Custom_Provider() await session.DisposeAsync(); } - - private static async Task WaitForAsync(Func condition, TimeSpan timeout) - { - using var cts = new CancellationTokenSource(timeout); - while (!condition()) - { - try - { - await Task.Delay(100, cts.Token); - } - catch (OperationCanceledException) - { - throw new TimeoutException($"Condition not met within {timeout}"); - } - } - } - - private static async Task WaitForAsync(Func> condition, TimeSpan timeout) - { - using var cts = new CancellationTokenSource(timeout); - while (!await condition()) - { - try - { - await Task.Delay(100, cts.Token); - } - catch (OperationCanceledException) - { - throw new TimeoutException($"Condition not met within {timeout}"); - } - } - } } diff --git a/dotnet/test/E2E/SessionFsE2ETests.cs b/dotnet/test/E2E/SessionFsE2ETests.cs index a9475c664..271c7f1e0 100644 --- a/dotnet/test/E2E/SessionFsE2ETests.cs +++ b/dotnet/test/E2E/SessionFsE2ETests.cs @@ -130,6 +130,10 @@ public async Task Should_Reject_SetProvider_When_Sessions_Already_Exist() { Console.Error.WriteLine($"Ignoring expected teardown IOException from ForceStopAsync: {ex.Message}"); } + finally + { + Ctx.UntrackClient(client2); + } } } finally @@ -505,42 +509,19 @@ private static string GetStoredPath(string providerRoot, string sessionId, strin private static async Task WaitForConditionAsync(Func condition, TimeSpan? timeout = null) { - await WaitForConditionAsync(() => Task.FromResult(condition()), timeout); + await TestHelper.WaitForConditionAsync( + condition, + timeout: timeout ?? TimeSpan.FromSeconds(30), + timeoutMessage: "Timed out waiting for the session_fs test condition."); } private static async Task WaitForConditionAsync(Func> condition, TimeSpan? timeout = null) { - using var cts = new CancellationTokenSource(timeout ?? TimeSpan.FromSeconds(30)); - Exception? lastException = null; - while (!cts.IsCancellationRequested) - { - try - { - if (await condition()) - { - return; - } - } - catch (IOException ex) - { - lastException = ex; - } - catch (UnauthorizedAccessException ex) - { - lastException = ex; - } - - try - { - await Task.Delay(100, cts.Token); - } - catch (OperationCanceledException) - { - break; - } - } - - throw new TimeoutException("Timed out waiting for condition.", lastException); + await TestHelper.WaitForConditionAsync( + condition, + timeout: timeout ?? TimeSpan.FromSeconds(30), + timeoutMessage: "Timed out waiting for the session_fs test condition.", + transientExceptionFilter: TestHelper.IsTransientFileSystemException); } private static async Task ReadAllTextSharedAsync(string path, CancellationToken cancellationToken = default) @@ -557,43 +538,21 @@ private static async Task TryDeleteDirectoryAsync(string path) return; } - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); - Exception? lastException = null; + await TestHelper.WaitForConditionAsync( + () => Task.FromResult(DeleteDirectoryIfPresent(path)), + timeout: TimeSpan.FromSeconds(5), + timeoutMessage: $"Timed out deleting directory '{path}'.", + transientExceptionFilter: TestHelper.IsTransientFileSystemException); - while (!cts.IsCancellationRequested) + static bool DeleteDirectoryIfPresent(string path) { - try - { - if (!Directory.Exists(path)) - { - return; - } - - Directory.Delete(path, recursive: true); - return; - } - catch (IOException ex) - { - lastException = ex; - } - catch (UnauthorizedAccessException ex) + if (!Directory.Exists(path)) { - lastException = ex; + return true; } - try - { - await Task.Delay(100, cts.Token); - } - catch (OperationCanceledException) - { - break; - } - } - - if (lastException is not null) - { - throw lastException; + Directory.Delete(path, recursive: true); + return !Directory.Exists(path); } } diff --git a/dotnet/test/E2E/SessionLifecycleE2ETests.cs b/dotnet/test/E2E/SessionLifecycleE2ETests.cs index 6c6d2812d..19134cb53 100644 --- a/dotnet/test/E2E/SessionLifecycleE2ETests.cs +++ b/dotnet/test/E2E/SessionLifecycleE2ETests.cs @@ -28,12 +28,15 @@ public async Task Should_List_Created_Sessions_After_Sending_A_Message() await session2.SendAndWaitAsync(new MessageOptions { Prompt = "Say world" }); IList? sessions = null; - await WaitForAsync(async () => - { - sessions = await Client.ListSessionsAsync(); - var ids = sessions.Select(s => s.SessionId).ToHashSet(); - return ids.Contains(session1.SessionId) && ids.Contains(session2.SessionId); - }, TimeSpan.FromSeconds(10)); + await TestHelper.WaitForConditionAsync( + async () => + { + sessions = await Client.ListSessionsAsync(); + var ids = sessions.Select(s => s.SessionId).ToHashSet(); + return ids.Contains(session1.SessionId) && ids.Contains(session2.SessionId); + }, + timeout: TimeSpan.FromSeconds(10), + timeoutMessage: "Timed out waiting for both created sessions to appear in ListSessionsAsync()."); Assert.NotNull(sessions); var sessionIds = sessions!.Select(s => s.SessionId).ToList(); @@ -54,11 +57,14 @@ public async Task Should_Delete_Session_Permanently() await session.SendAndWaitAsync(new MessageOptions { Prompt = "Say hi" }); // Wait for the session to appear in the list - await WaitForAsync(async () => - { - var before = await Client.ListSessionsAsync(); - return before.Any(s => s.SessionId == sessionId); - }, TimeSpan.FromSeconds(10)); + await TestHelper.WaitForConditionAsync( + async () => + { + var before = await Client.ListSessionsAsync(); + return before.Any(s => s.SessionId == sessionId); + }, + timeout: TimeSpan.FromSeconds(10), + timeoutMessage: "Timed out waiting for the persisted session to appear in ListSessionsAsync()."); await session.DisposeAsync(); await Client.DeleteSessionAsync(sessionId); @@ -115,18 +121,43 @@ public async Task Should_Support_Multiple_Concurrent_Sessions() await session2.DisposeAsync(); } - /// - /// Polls until it returns true or the timeout elapses. - /// - private static async Task WaitForAsync(Func> condition, TimeSpan timeout) + [Fact] + public async Task Should_Isolate_Events_Between_Concurrent_Sessions() { - var deadline = DateTime.UtcNow + timeout; - while (DateTime.UtcNow < deadline) + var session1 = await CreateSessionAsync(); + var session2 = await CreateSessionAsync(); + + var session1Events = new List(); + var session2Events = new List(); + + session1.On(evt => { lock (session1Events) { session1Events.Add(evt); } }); + session2.On(evt => { lock (session2Events) { session2Events.Add(evt); } }); + + // Send to both sessions + await session1.SendAndWaitAsync(new MessageOptions { - if (await condition()) return; - await Task.Delay(100); - } - // Final attempt — let the test assertion below catch the failure - await condition(); + Prompt = "Say 'session_one_response'.", + }); + await session2.SendAndWaitAsync(new MessageOptions + { + Prompt = "Say 'session_two_response'.", + }); + + List s1Snapshot, s2Snapshot; + lock (session1Events) { s1Snapshot = [.. session1Events]; } + lock (session2Events) { s2Snapshot = [.. session2Events]; } + + // Session 1's events should contain "session_one_response" but NOT "session_two_response" + var s1Messages = s1Snapshot.OfType().Select(e => e.Data.Content ?? "").ToList(); + Assert.Contains(s1Messages, m => m.Contains("session_one_response")); + Assert.DoesNotContain(s1Messages, m => m.Contains("session_two_response")); + + // Session 2's events should contain "session_two_response" but NOT "session_one_response" + var s2Messages = s2Snapshot.OfType().Select(e => e.Data.Content ?? "").ToList(); + Assert.Contains(s2Messages, m => m.Contains("session_two_response")); + Assert.DoesNotContain(s2Messages, m => m.Contains("session_one_response")); + + await session1.DisposeAsync(); + await session2.DisposeAsync(); } } diff --git a/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs b/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs index e2267c546..977518b59 100644 --- a/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs +++ b/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using GitHub.Copilot.SDK.Rpc; using GitHub.Copilot.SDK.Test.Harness; using Xunit; using Xunit.Abstractions; @@ -290,6 +291,83 @@ public async Task Should_Pass_Literal_Env_Values_To_Mcp_Server_Subprocess() await session.DisposeAsync(); } + [Fact] + public async Task Should_Round_Trip_Mcp_Server_Elicitation_Request() + { + var testHarnessDir = FindTestHarnessDir(); + var configPath = Path.Join(Ctx.WorkDir, $"elicitation-config-{Guid.NewGuid():N}.json"); + await File.WriteAllTextAsync( + configPath, + """ + [ + { + "message": "Pick a color.", + "requestedSchema": { + "type": "object", + "properties": { + "color": { + "type": "string", + "enum": ["red", "blue"] + } + }, + "required": ["color"] + } + } + ] + """); + + var elicitationContext = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var mcpServers = new Dictionary + { + ["test-elicitation-server"] = new McpStdioServerConfig + { + Command = "node", + Args = + [ + Path.Join(testHarnessDir, "test-mcp-elicitation-server.mjs"), + "--config", + configPath + ], + Cwd = testHarnessDir, + Tools = ["*"] + } + }; + + var session = await CreateSessionAsync(new SessionConfig + { + McpServers = mcpServers, + OnPermissionRequest = PermissionHandler.ApproveAll, + OnElicitationRequest = context => + { + elicitationContext.TrySetResult(context); + return Task.FromResult(new ElicitationResult + { + Action = UIElicitationResponseAction.Accept, + Content = new Dictionary { ["color"] = "blue" } + }); + }, + }); + + await WaitForMcpServerStatusAsync(session, "test-elicitation-server", McpServerStatus.Connected); + + var message = await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Use the test-elicitation-server-request_user_input tool and tell me the chosen color. Reply with just the color." + }); + + var request = await elicitationContext.Task.WaitAsync(TimeSpan.FromSeconds(60)); + + Assert.Equal("Pick a color.", request.Message); + Assert.Equal(ElicitationRequestedMode.Form, request.Mode); + Assert.Contains("test-elicitation-server", request.ElicitationSource ?? string.Empty, StringComparison.Ordinal); + Assert.NotNull(request.RequestedSchema); + Assert.Equal("object", request.RequestedSchema!.Type); + Assert.Contains("color", request.RequestedSchema.Properties.Keys); + Assert.Contains("blue", message?.Data.Content ?? string.Empty); + + await session.DisposeAsync(); + } + [Fact] public async Task Should_Accept_Both_MCP_Servers_And_Custom_Agents() { @@ -344,4 +422,22 @@ private static string FindTestHarnessDir() } throw new InvalidOperationException("Could not find test/harness/test-mcp-server.mjs"); } + + private static async Task WaitForMcpServerStatusAsync( + CopilotSession session, + string serverName, + McpServerStatus expectedStatus) + { + await TestHelper.WaitForConditionAsync( + async () => + { + var result = await session.Rpc.Mcp.ListAsync(); + return result.Servers.Any(server => + string.Equals(server.Name, serverName, StringComparison.Ordinal) + && server.Status == expectedStatus); + }, + timeout: TimeSpan.FromSeconds(60), + pollInterval: TimeSpan.FromMilliseconds(200), + timeoutMessage: $"{serverName} reaching {expectedStatus}"); + } } diff --git a/dotnet/test/E2E/StreamingFidelityE2ETests.cs b/dotnet/test/E2E/StreamingFidelityE2ETests.cs index e10732092..c6977c8e9 100644 --- a/dotnet/test/E2E/StreamingFidelityE2ETests.cs +++ b/dotnet/test/E2E/StreamingFidelityE2ETests.cs @@ -106,6 +106,75 @@ public async Task Should_Produce_Deltas_After_Session_Resume() await session2.DisposeAsync(); } + [Fact] + public async Task Should_Not_Produce_Deltas_After_Session_Resume_With_Streaming_Disabled() + { + var session = await CreateSessionAsync(new SessionConfig { Streaming = true }); + await session.SendAndWaitAsync(new MessageOptions { Prompt = "What is 3 + 6?" }); + await session.DisposeAsync(); + + // Resume using a new client with streaming DISABLED + using var newClient = Ctx.CreateClient(); + var session2 = await newClient.ResumeSessionAsync(session.SessionId, + new ResumeSessionConfig { OnPermissionRequest = PermissionHandler.ApproveAll, Streaming = false }); + + var events = new List(); + session2.On(evt => { lock (events) { events.Add(evt); } }); + + var answer = await session2.SendAndWaitAsync(new MessageOptions { Prompt = "Now if you double that, what do you get?" }); + Assert.NotNull(answer); + Assert.Contains("18", answer!.Data.Content ?? string.Empty); + + List snapshot; + lock (events) { snapshot = [.. events]; } + + // No deltas when streaming is toggled off + var deltaEvents = snapshot.OfType().ToList(); + Assert.Empty(deltaEvents); + + // But should still have a final assistant.message + var assistantEvents = snapshot.OfType().ToList(); + Assert.NotEmpty(assistantEvents); + + await session2.DisposeAsync(); + } + + [Fact] + public async Task Should_Emit_Streaming_Deltas_With_Reasoning_Effort_Configured() + { + // Verifies that setting ReasoningEffort alongside Streaming=true does not break + // the streaming pipeline — deltas still arrive and complete successfully. + var session = await CreateSessionAsync(new SessionConfig + { + Streaming = true, + ReasoningEffort = "high", + }); + + var events = new List(); + session.On(evt => { lock (events) { events.Add(evt); } }); + + await session.SendAndWaitAsync(new MessageOptions { Prompt = "What is 15 * 17?" }); + + List snapshot; + lock (events) { snapshot = [.. events]; } + + // With streaming + reasoning effort, we should still get content deltas + var deltaEvents = snapshot.OfType().ToList(); + Assert.NotEmpty(deltaEvents); + + // And a final assistant.message with the answer + var assistantEvents = snapshot.OfType().ToList(); + Assert.NotEmpty(assistantEvents); + Assert.Contains("255", assistantEvents.Last().Data.Content ?? string.Empty); + + // Verify the session was created with reasoning effort via GetMessages + var messages = await session.GetMessagesAsync(); + var startEvent = Assert.Single(messages.OfType()); + Assert.Equal("high", startEvent.Data.ReasoningEffort); + + await session.DisposeAsync(); + } + [Fact] public async Task Should_Emit_AssistantMessageStart_Before_Deltas_With_Matching_MessageId() { diff --git a/dotnet/test/E2E/TelemetryExportE2ETests.cs b/dotnet/test/E2E/TelemetryExportE2ETests.cs index 2ce52a00c..b4fced4e2 100644 --- a/dotnet/test/E2E/TelemetryExportE2ETests.cs +++ b/dotnet/test/E2E/TelemetryExportE2ETests.cs @@ -93,42 +93,42 @@ private static async Task> ReadTelemetryEntriesAsync( string path, Func, bool> isComplete) { - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); - while (!cts.IsCancellationRequested) - { - if (File.Exists(path) && new FileInfo(path).Length > 0) + IReadOnlyList entries = []; + await TestHelper.WaitForConditionAsync( + async () => { - var entries = new List(); - using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); - using var reader = new StreamReader(stream); - while (await reader.ReadLineAsync(cts.Token) is { } line) - { - if (string.IsNullOrWhiteSpace(line)) - { - continue; - } - - using var document = JsonDocument.Parse(line); - entries.Add(document.RootElement.Clone()); - } + entries = await ReadTelemetryEntriesOnceAsync(path); + return entries.Count > 0 && isComplete(entries); + }, + timeout: TimeSpan.FromSeconds(30), + timeoutMessage: $"Timed out waiting for telemetry records in '{path}'.", + transientExceptionFilter: exception => TestHelper.IsTransientFileSystemException(exception) || exception is JsonException); - if (entries.Count > 0 && isComplete(entries)) - { - return entries; - } - } + return entries; - try + static async Task> ReadTelemetryEntriesOnceAsync(string path) + { + if (!File.Exists(path) || new FileInfo(path).Length == 0) { - await Task.Delay(TimeSpan.FromMilliseconds(100), cts.Token); + return []; } - catch (OperationCanceledException) + + var entries = new List(); + using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); + using var reader = new StreamReader(stream); + while (await reader.ReadLineAsync() is { } line) { - break; + if (string.IsNullOrWhiteSpace(line)) + { + continue; + } + + using var document = JsonDocument.Parse(line); + entries.Add(document.RootElement.Clone()); } - } - throw new TimeoutException($"Timed out waiting for telemetry records in '{path}'."); + return entries; + } } private static string? GetTraceId(JsonElement entry) => GetStringProperty(entry, "traceId"); diff --git a/dotnet/test/E2E/ToolResultsE2ETests.cs b/dotnet/test/E2E/ToolResultsE2ETests.cs index 2454f5d23..c1283baa5 100644 --- a/dotnet/test/E2E/ToolResultsE2ETests.cs +++ b/dotnet/test/E2E/ToolResultsE2ETests.cs @@ -118,4 +118,104 @@ static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string f }, }); } + + [Fact] + public async Task Should_Handle_Tool_Result_With_Rejected_ResultType() + { + var toolExecutionComplete = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var toolHandlerCalled = false; + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(AttemptDeploy, "deploy_service", serializerOptions: ToolResultsJsonContext.Default.Options)], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + session.On(evt => + { + if (evt is ToolExecutionCompleteEvent toolEvt) + { + toolExecutionComplete.TrySetResult(toolEvt); + } + }); + var idle = TestHelper.GetNextEventOfTypeAsync(session); + + await session.SendAsync(new MessageOptions + { + Prompt = "Deploy the service using deploy_service. If it's rejected, tell me it was 'rejected by policy'." + }); + + var toolEvt = await toolExecutionComplete.Task.WaitAsync(TimeSpan.FromSeconds(60)); + // The tool handler was called and returned a "rejected" result + Assert.True(toolHandlerCalled, "Tool handler should have been called"); + Assert.NotNull(toolEvt); + Assert.False(toolEvt.Data.Success); + Assert.Equal("rejected", toolEvt.Data.Error?.Code); + Assert.Contains("Deployment rejected", toolEvt.Data.Error?.Message ?? string.Empty); + + // A rejected tool result may complete the turn without a follow-up assistant + // message; the stable contract is the tool result event plus session idle. + await idle; + + [Description("Deploys a service")] + ToolResultAIContent AttemptDeploy() + { + toolHandlerCalled = true; + return new(new() + { + TextResultForLlm = "Deployment rejected: policy violation - production deployments require approval", + ResultType = "rejected", + }); + } + } + + [Fact] + public async Task Should_Handle_Tool_Result_With_Denied_ResultType() + { + var toolExecutionComplete = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var toolHandlerCalled = false; + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = [AIFunctionFactory.Create(AccessSecret, "access_secret", serializerOptions: ToolResultsJsonContext.Default.Options)], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + session.On(evt => + { + if (evt is ToolExecutionCompleteEvent toolEvt) + { + toolExecutionComplete.TrySetResult(toolEvt); + } + }); + var idle = TestHelper.GetNextEventOfTypeAsync(session); + + await session.SendAsync(new MessageOptions + { + Prompt = "Use access_secret to get the API key. If access is denied, tell me it was 'access denied'." + }); + + var toolEvt = await toolExecutionComplete.Task.WaitAsync(TimeSpan.FromSeconds(60)); + // The tool handler was called and returned a "denied" result + Assert.True(toolHandlerCalled, "Tool handler should have been called"); + Assert.NotNull(toolEvt); + Assert.False(toolEvt.Data.Success); + Assert.Equal("denied", toolEvt.Data.Error?.Code); + Assert.Contains("Access denied", toolEvt.Data.Error?.Message ?? string.Empty); + + // A denied tool result may complete the turn without a follow-up assistant + // message; the stable contract is the tool result event plus session idle. + await idle; + + [Description("Accesses a secret")] + ToolResultAIContent AccessSecret() + { + toolHandlerCalled = true; + return new(new() + { + TextResultForLlm = "Access denied: insufficient permissions to read secrets", + ResultType = "denied", + }); + } + } } diff --git a/dotnet/test/E2E/ToolsE2ETests.cs b/dotnet/test/E2E/ToolsE2ETests.cs index 624f528e0..4ecabf96d 100644 --- a/dotnet/test/E2E/ToolsE2ETests.cs +++ b/dotnet/test/E2E/ToolsE2ETests.cs @@ -310,4 +310,89 @@ string EncryptStringDenied([Description("String to encrypt")] string input) return input.ToUpperInvariant(); } } + + [Fact] + public async Task Should_Execute_Multiple_Custom_Tools_In_Parallel_Single_Turn() + { + var toolACalled = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + var toolBCalled = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = + [ + AIFunctionFactory.Create(LookupCity, "lookup_city"), + AIFunctionFactory.Create(LookupCountry, "lookup_country"), + ], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + await session.SendAsync(new MessageOptions + { + Prompt = "Use lookup_city with 'Paris' and lookup_country with 'France' at the same time, then combine both results in your reply." + }); + + // Both tools should have been called + var cityResult = await toolACalled.Task.WaitAsync(TimeSpan.FromSeconds(60)); + var countryResult = await toolBCalled.Task.WaitAsync(TimeSpan.FromSeconds(60)); + Assert.Equal("Paris", cityResult); + Assert.Equal("France", countryResult); + + var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); + Assert.NotNull(assistantMessage); + var content = assistantMessage!.Data.Content ?? string.Empty; + Assert.Contains("CITY_PARIS", content); + Assert.Contains("COUNTRY_FRANCE", content); + + [Description("Looks up city information")] + string LookupCity([Description("City name")] string city) + { + toolACalled.TrySetResult(city); + return $"CITY_{city.ToUpperInvariant()}"; + } + + [Description("Looks up country information")] + string LookupCountry([Description("Country name")] string country) + { + toolBCalled.TrySetResult(country); + return $"COUNTRY_{country.ToUpperInvariant()}"; + } + } + + [Fact] + public async Task Should_Respect_AvailableTools_And_ExcludedTools_Combined() + { + bool excludedToolCalled = false; + + var session = await CreateSessionAsync(new SessionConfig + { + Tools = + [ + AIFunctionFactory.Create(AllowedTool, "allowed_tool"), + AIFunctionFactory.Create(ExcludedTool, "excluded_tool"), + ], + AvailableTools = ["allowed_tool", "excluded_tool"], + ExcludedTools = ["excluded_tool"], + OnPermissionRequest = PermissionHandler.ApproveAll, + }); + + var result = await session.SendAndWaitAsync(new MessageOptions + { + Prompt = "Use the allowed_tool with input 'test'. Do NOT use excluded_tool.", + }); + + Assert.NotNull(result); + Assert.Contains("ALLOWED_TEST", result!.Data.Content ?? string.Empty); + Assert.False(excludedToolCalled, "Excluded tool should not have been called"); + + [Description("An allowed tool")] + string AllowedTool([Description("Input value")] string input) => $"ALLOWED_{input.ToUpperInvariant()}"; + + [Description("A tool that should be excluded")] + string ExcludedTool([Description("Input value")] string input) + { + excludedToolCalled = true; + return $"EXCLUDED_{input.ToUpperInvariant()}"; + } + } } diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index 846b7651d..f863b651c 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -104,10 +104,11 @@ public async Task StopAsync(bool skipWritingCache = false) if (_process is { HasExited: false }) { - try { _process.Kill(); await _process.WaitForExitAsync(); } + try { _process.Kill(entireProcessTree: true); await _process.WaitForExitAsync(); } catch { /* Ignore */ } } + _process?.Dispose(); _process = null; _startupTask = null; } diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 46162b50f..299616d28 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -47,7 +47,7 @@ public void Log(LogLevel logLevel, EventId eventId, TState state, Except } } - private static string GetTestName(ITestOutputHelper output) + internal static string GetTestName(ITestOutputHelper output) { // xUnit doesn't provide a public API to get the current test name. var type = output.GetType(); @@ -58,12 +58,13 @@ private static string GetTestName(ITestOutputHelper output) public async Task InitializeAsync() { + await Ctx.CleanupAfterTestAsync(); await Ctx.ConfigureForTestAsync(_snapshotCategory, _testName); } public Task DisposeAsync() { - return Task.CompletedTask; + return Ctx.CleanupAfterTestAsync(); } /// diff --git a/dotnet/test/Harness/E2ETestContext.cs b/dotnet/test/Harness/E2ETestContext.cs index bc431b31f..88627ba6d 100644 --- a/dotnet/test/Harness/E2ETestContext.cs +++ b/dotnet/test/Harness/E2ETestContext.cs @@ -19,6 +19,9 @@ public sealed class E2ETestContext : IAsyncDisposable private readonly CapiProxy _proxy; private readonly string _repoRoot; + private readonly object _clientsLock = new(); + private readonly List _persistentClients = []; + private readonly List _transientClients = []; private E2ETestContext(string homeDir, string workDir, string proxyUrl, CapiProxy proxy, string repoRoot) { @@ -169,7 +172,11 @@ public IReadOnlyDictionary GetEnvironment() return env!; } - public CopilotClient CreateClient(bool? useStdio = null, CopilotClientOptions? options = null, bool autoInjectGitHubToken = true) + public CopilotClient CreateClient( + bool? useStdio = null, + CopilotClientOptions? options = null, + bool autoInjectGitHubToken = true, + bool persistent = false) { options ??= new CopilotClientOptions(); @@ -191,16 +198,143 @@ public CopilotClient CreateClient(bool? useStdio = null, CopilotClientOptions? o options.GitHubToken = "fake-token-for-e2e-tests"; } - return new(options); + var client = new CopilotClient(options); + lock (_clientsLock) + { + if (persistent) + { + _persistentClients.Add(client); + } + else + { + _transientClients.Add(client); + } + } + return client; + } + + public void UntrackClient(CopilotClient client) + { + lock (_clientsLock) + { + _persistentClients.Remove(client); + _transientClients.Remove(client); + } + } + + public async Task CleanupAfterTestAsync() + { + // Per-test cleanup only stops clients created for a specific test. + // The shared persistent client and temp directories are cleaned when the fixture is disposed. + var errors = new List(); + CopilotClient[] transientClients; + + lock (_clientsLock) + { + transientClients = [.. _transientClients]; + _transientClients.Clear(); + } + + foreach (var client in transientClients) + { + try + { + await client.ForceStopAsync(); + } + catch (Exception ex) when (IsTransientCleanupException(ex)) + { + errors.Add(ex); + } + } + + if (errors.Count == 1) + { + throw errors[0]; + } + if (errors.Count > 1) + { + throw new AggregateException(errors); + } } public async ValueTask DisposeAsync() { + var errors = new List(); + CopilotClient[] clients; + + lock (_clientsLock) + { + clients = [.. _persistentClients.Concat(_transientClients)]; + _persistentClients.Clear(); + _transientClients.Clear(); + } + + foreach (var client in clients) + { + try + { + await client.ForceStopAsync(); + } + catch (Exception ex) when (IsTransientCleanupException(ex)) + { + errors.Add(ex); + } + } + // Skip writing snapshots in CI to avoid corrupting them on test failures var isCI = !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("GITHUB_ACTIONS")); - await _proxy.StopAsync(skipWritingCache: isCI); + try { await _proxy.StopAsync(skipWritingCache: isCI); } catch (Exception ex) when (IsTransientCleanupException(ex)) { errors.Add(ex); } + + try { await DeleteDirectoryAsync(HomeDir); } catch (Exception ex) when (IsTransientCleanupException(ex)) { errors.Add(ex); } + try { await DeleteDirectoryAsync(WorkDir); } catch (Exception ex) when (IsTransientCleanupException(ex)) { errors.Add(ex); } + + if (errors.Count == 1) + { + throw errors[0]; + } + if (errors.Count > 1) + { + throw new AggregateException(errors); + } + } + + private static async Task DeleteDirectoryAsync(string path) + { + const int maxAttempts = 40; + var delay = TimeSpan.FromMilliseconds(50); + var lastException = (Exception?)null; - try { if (Directory.Exists(HomeDir)) Directory.Delete(HomeDir, true); } catch { } - try { if (Directory.Exists(WorkDir)) Directory.Delete(WorkDir, true); } catch { } + for (var attempt = 1; attempt <= maxAttempts; attempt++) + { + if (!Directory.Exists(path)) + { + return; + } + + try + { + Directory.Delete(path, recursive: true); + return; + } + catch (Exception ex) when (IsTransientCleanupException(ex)) + { + lastException = ex; + if (attempt == maxAttempts) + { + break; + } + + await Task.Delay(delay); + delay = TimeSpan.FromMilliseconds(Math.Min(delay.TotalMilliseconds * 2, 250)); + } + } + + if (Directory.Exists(path)) + { + throw new IOException($"Failed to delete directory '{path}' after {maxAttempts} attempts.", lastException); + } } + + private static bool IsTransientCleanupException(Exception exception) + => exception is IOException or UnauthorizedAccessException; } diff --git a/dotnet/test/Harness/E2ETestFixture.cs b/dotnet/test/Harness/E2ETestFixture.cs index f1e396c98..9dbdfbe2f 100644 --- a/dotnet/test/Harness/E2ETestFixture.cs +++ b/dotnet/test/Harness/E2ETestFixture.cs @@ -15,16 +15,11 @@ public class E2ETestFixture : IAsyncLifetime public async Task InitializeAsync() { Ctx = await E2ETestContext.CreateAsync(); - Client = Ctx.CreateClient(); + Client = Ctx.CreateClient(persistent: true); } public async Task DisposeAsync() { - if (Client is not null) - { - await Client.ForceStopAsync(); - } - await Ctx.DisposeAsync(); } } diff --git a/dotnet/test/Harness/TestHelper.cs b/dotnet/test/Harness/TestHelper.cs index 52e681b88..1afd21d3c 100644 --- a/dotnet/test/Harness/TestHelper.cs +++ b/dotnet/test/Harness/TestHelper.cs @@ -11,6 +11,7 @@ public static class TestHelper // assistant message arrives. Subsequent tests in the same fixture typically // complete in well under a second. private static readonly TimeSpan DefaultEventTimeout = TimeSpan.FromSeconds(120); + private static readonly TimeSpan DefaultPollInterval = TimeSpan.FromMilliseconds(100); public static async Task GetFinalAssistantMessageAsync( CopilotSession session, @@ -115,13 +116,20 @@ async void CheckExistingMessages() public static async Task GetNextEventOfTypeAsync( CopilotSession session, TimeSpan? timeout = null) where T : SessionEvent + => await GetNextEventOfTypeAsync(session, static _ => true, timeout); + + public static async Task GetNextEventOfTypeAsync( + CopilotSession session, + Func predicate, + TimeSpan? timeout = null, + string? timeoutDescription = null) where T : SessionEvent { var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); using var cts = new CancellationTokenSource(timeout ?? DefaultEventTimeout); using var subscription = session.On(evt => { - if (evt is T matched) + if (evt is T matched && predicate(matched)) { tcs.TrySetResult(matched); } @@ -132,8 +140,76 @@ public static async Task GetNextEventOfTypeAsync( }); cts.Token.Register(() => tcs.TrySetException( - new TimeoutException($"Timeout waiting for event of type '{typeof(T).Name}'"))); + new TimeoutException($"Timeout waiting for {timeoutDescription ?? $"event of type '{typeof(T).Name}'"}"))); return await tcs.Task; } + + public static Task WaitForConditionAsync( + Func condition, + TimeSpan? timeout = null, + string? timeoutMessage = null, + TimeSpan? pollInterval = null) + => WaitForConditionAsync( + () => Task.FromResult(condition()), + timeout, + timeoutMessage, + transientExceptionFilter: null, + pollInterval); + + public static async Task WaitForConditionAsync( + Func> condition, + TimeSpan? timeout = null, + string? timeoutMessage = null, + Func? transientExceptionFilter = null, + TimeSpan? pollInterval = null) + { + using var cts = new CancellationTokenSource(timeout ?? DefaultEventTimeout); + Exception? lastTransientException = null; + + while (true) + { + try + { + if (await condition()) + { + return; + } + + lastTransientException = null; + } + catch (Exception ex) when (transientExceptionFilter?.Invoke(ex) == true) + { + lastTransientException = ex; + } + + try + { + await Task.Delay(pollInterval ?? DefaultPollInterval, cts.Token); + } + catch (OperationCanceledException) when (cts.IsCancellationRequested) + { + break; + } + } + + try + { + if (await condition()) + { + return; + } + } + catch (Exception ex) when (transientExceptionFilter?.Invoke(ex) == true) + { + lastTransientException = ex; + } + + throw lastTransientException is null + ? new TimeoutException(timeoutMessage ?? "Timed out waiting for condition.") + : new TimeoutException(timeoutMessage ?? "Timed out waiting for condition.", lastTransientException); + } + + public static bool IsTransientFileSystemException(Exception exception) + => exception is IOException or UnauthorizedAccessException; } diff --git a/go/client.go b/go/client.go index b61960a28..851dcf4e2 100644 --- a/go/client.go +++ b/go/client.go @@ -1387,7 +1387,7 @@ func (c *Client) verifyProtocolVersion(ctx context.Context) error { connectResult, err := c.internalRPC.Connect(ctx, &rpc.ConnectRequest{Token: tokenPtr}) if err != nil { var rpcErr *jsonrpc2.Error - if errors.As(err, &rpcErr) && rpcErr.Code == jsonrpc2.ErrMethodNotFound.Code { + if errors.As(err, &rpcErr) && (rpcErr.Code == jsonrpc2.ErrMethodNotFound.Code || rpcErr.Message == "Unhandled method connect") { // Legacy server without `connect`; fall back to `ping`. A token, if any, // is silently dropped — the legacy server can't enforce one. pingResult, perr := c.Ping(ctx, "") diff --git a/go/internal/e2e/abort_e2e_test.go b/go/internal/e2e/abort_e2e_test.go new file mode 100644 index 000000000..10514b5db --- /dev/null +++ b/go/internal/e2e/abort_e2e_test.go @@ -0,0 +1,204 @@ +package e2e + +import ( + "strings" + "sync" + "testing" + "time" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" +) + +func TestAbortE2E(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + // Verifies that Abort cleanly interrupts an active turn during streaming + // without leaving dangling state or causing exceptions in the event delivery pipeline. + t.Run("should abort during active streaming", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: true, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + var mu sync.Mutex + var events []copilot.SessionEvent + firstDelta := make(chan *copilot.AssistantMessageDeltaData, 1) + + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + if d, ok := event.Data.(*copilot.AssistantMessageDeltaData); ok { + select { + case firstDelta <- d: + default: + } + } + }) + + // Fire-and-forget — we'll abort before it finishes + go func() { + _, _ = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Write a very long essay about the history of computing, covering every decade from the 1940s to the 2020s in great detail.", + }) + }() + + // Wait for at least one delta to arrive (proves streaming started) + var delta *copilot.AssistantMessageDeltaData + select { + case delta = <-firstDelta: + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for first streaming delta") + } + if delta.DeltaContent == "" { + t.Error("Expected first delta to have content") + } + + // Now abort mid-stream + if err := session.Abort(t.Context()); err != nil { + t.Fatalf("Abort failed: %v", err) + } + + mu.Lock() + snapshot := make([]copilot.SessionEvent, len(events)) + copy(snapshot, events) + mu.Unlock() + + // Key contract: at least one delta arrived before abort + hasDelta := false + for _, e := range snapshot { + if e.Type == copilot.SessionEventTypeAssistantMessageDelta { + hasDelta = true + break + } + } + if !hasDelta { + t.Error("Expected at least one assistant.message_delta event before abort") + } + + // Session should be usable after abort. Wait for the specific recovery + // message rather than racing against a late idle from the aborted turn. + recoveryReceived := make(chan *copilot.AssistantMessageData, 1) + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.AssistantMessageData); ok { + if strings.Contains(strings.ToLower(d.Content), "abort_recovery_ok") { + select { + case recoveryReceived <- d: + default: + } + } + } + }) + + go func() { + _, _ = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Say 'abort_recovery_ok'.", + }) + }() + + select { + case msg := <-recoveryReceived: + if !strings.Contains(strings.ToLower(msg.Content), "abort_recovery_ok") { + t.Errorf("Expected recovery message to contain 'abort_recovery_ok', got %q", msg.Content) + } + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for recovery message after abort") + } + }) + + // Verifies that Abort cleanly interrupts an active turn during tool execution. + t.Run("should abort during active tool execution", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type ValueParams struct { + Value string `json:"value" jsonschema:"Value to analyze"` + } + toolStarted := make(chan string, 1) + releaseTool := make(chan string, 1) + + slowTool := copilot.DefineTool("slow_analysis", "A slow analysis tool that blocks until released", + func(params ValueParams, inv copilot.ToolInvocation) (string, error) { + select { + case toolStarted <- params.Value: + default: + } + return <-releaseTool, nil + }) + slowTool.SkipPermission = true + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{slowTool}, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + // Fire-and-forget + go func() { + _, _ = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Use slow_analysis with value 'test_abort'. Wait for the result.", + }) + }() + + // Wait for the tool to start executing + var toolValue string + select { + case toolValue = <-toolStarted: + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for slow_analysis tool to start") + } + if toolValue != "test_abort" { + t.Errorf("Expected tool value 'test_abort', got %q", toolValue) + } + + // Abort while the tool is running + if err := session.Abort(t.Context()); err != nil { + t.Fatalf("Abort failed: %v", err) + } + + // Release the tool so its goroutine doesn't leak + select { + case releaseTool <- "RELEASED_AFTER_ABORT": + default: + } + + // Session should be usable after abort + recoveryReceived := make(chan *copilot.AssistantMessageData, 1) + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.AssistantMessageData); ok { + if strings.Contains(d.Content, "tool_abort_recovery_ok") { + select { + case recoveryReceived <- d: + default: + } + } + } + }) + + go func() { + _, _ = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Say 'tool_abort_recovery_ok'.", + }) + }() + + select { + case msg := <-recoveryReceived: + if !strings.Contains(msg.Content, "tool_abort_recovery_ok") { + t.Errorf("Expected recovery message to contain 'tool_abort_recovery_ok', got %q", msg.Content) + } + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for recovery message after abort") + } + }) +} diff --git a/go/internal/e2e/event_fidelity_e2e_test.go b/go/internal/e2e/event_fidelity_e2e_test.go index d37395313..54ba39060 100644 --- a/go/internal/e2e/event_fidelity_e2e_test.go +++ b/go/internal/e2e/event_fidelity_e2e_test.go @@ -6,6 +6,7 @@ import ( "strings" "sync" "testing" + "time" copilot "github.com/github/copilot-sdk/go" "github.com/github/copilot-sdk/go/internal/e2e/testharness" @@ -16,6 +17,265 @@ func TestEventFidelityE2E(t *testing.T) { client := ctx.NewClient() t.Cleanup(func() { client.ForceStop() }) + t.Run("should emit assistant usage event after model call", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + var mu sync.Mutex + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "What is 5+5? Reply with just the number.", + }); err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + snapshot := snapshotEventFidelityEvents(&mu, &events) + + var usageEvent *copilot.AssistantUsageData + for i := len(snapshot) - 1; i >= 0; i-- { + if d, ok := snapshot[i].Data.(*copilot.AssistantUsageData); ok { + usageEvent = d + break + } + } + + if usageEvent == nil { + t.Fatalf("Expected at least one assistant.usage event; events=%v", eventFidelityTypes(snapshot)) + } + if usageEvent.Model == "" { + t.Errorf("Expected assistant.usage event to have a non-empty model field, got %#v", usageEvent) + } + + // Verify the event itself has a valid ID and timestamp + for _, evt := range snapshot { + if evt.Type == copilot.SessionEventTypeAssistantUsage { + if evt.ID == "" { + t.Error("Expected assistant.usage event to have a non-empty ID") + } + if evt.Timestamp.IsZero() { + t.Error("Expected assistant.usage event to have a non-zero timestamp") + } + break + } + } + }) + + t.Run("should emit session usage info event after model call", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + var mu sync.Mutex + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "What is 5+5? Reply with just the number.", + }); err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + snapshot := snapshotEventFidelityEvents(&mu, &events) + + var usageInfo *copilot.SessionUsageInfoData + for i := len(snapshot) - 1; i >= 0; i-- { + if d, ok := snapshot[i].Data.(*copilot.SessionUsageInfoData); ok { + usageInfo = d + break + } + } + + if usageInfo == nil { + t.Fatalf("Expected at least one session.usage_info event; events=%v", eventFidelityTypes(snapshot)) + } + if usageInfo.CurrentTokens <= 0 { + t.Errorf("Expected session.usage_info.currentTokens > 0, got %v", usageInfo.CurrentTokens) + } + if usageInfo.MessagesLength <= 0 { + t.Errorf("Expected session.usage_info.messagesLength > 0, got %v", usageInfo.MessagesLength) + } + if usageInfo.TokenLimit <= 0 { + t.Errorf("Expected session.usage_info.tokenLimit > 0, got %v", usageInfo.TokenLimit) + } + }) + + t.Run("should emit pending messages modified event when message queue changes", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + pendingModified := make(chan *copilot.SessionEvent, 1) + session.On(func(event copilot.SessionEvent) { + if event.Type == copilot.SessionEventTypePendingMessagesModified { + select { + case pendingModified <- &event: + default: + } + } + }) + + if _, err := session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "What is 9+9? Reply with just the number.", + }); err != nil { + t.Fatalf("Send failed: %v", err) + } + + select { + case evt := <-pendingModified: + if evt == nil { + t.Error("Expected a non-nil pending_messages.modified event") + } + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for pending_messages.modified event") + } + + answer, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("Failed to get final assistant message: %v", err) + } + if ad, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(ad.Content, "18") { + t.Errorf("Expected answer to contain '18', got %v", answer.Data) + } + }) + + t.Run("should preserve message order in getmessages after tool use", func(t *testing.T) { + ctx.ConfigureForTest(t) + + if err := os.WriteFile(filepath.Join(ctx.WorkDir, "order.txt"), []byte("ORDER_CONTENT_42"), 0644); err != nil { + t.Fatalf("Failed to write order.txt: %v", err) + } + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Read the file 'order.txt' and tell me what the number is.", + }); err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + messages, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("GetMessages failed: %v", err) + } + + types := make([]copilot.SessionEventType, 0, len(messages)) + for _, m := range messages { + types = append(types, m.Type) + } + + sessionStartIdx := -1 + userMsgIdx := -1 + toolStartIdx := -1 + toolCompleteIdx := -1 + assistantMsgIdx := -1 + + for i, typ := range types { + if typ == copilot.SessionEventTypeSessionStart && sessionStartIdx < 0 { + sessionStartIdx = i + } + if typ == copilot.SessionEventTypeUserMessage && userMsgIdx < 0 { + userMsgIdx = i + } + if typ == copilot.SessionEventTypeToolExecutionStart && toolStartIdx < 0 { + toolStartIdx = i + } + if typ == copilot.SessionEventTypeToolExecutionComplete && toolCompleteIdx < 0 { + toolCompleteIdx = i + } + if typ == copilot.SessionEventTypeAssistantMessage { + assistantMsgIdx = i + } + } + + if sessionStartIdx < 0 { + t.Fatalf("Expected session.start event in GetMessages; types=%v", types) + } + if userMsgIdx < 0 { + t.Fatalf("Expected user.message event in GetMessages; types=%v", types) + } + if toolStartIdx < 0 { + t.Fatalf("Expected tool.execution_start event in GetMessages; types=%v", types) + } + if toolCompleteIdx < 0 { + t.Fatalf("Expected tool.execution_complete event in GetMessages; types=%v", types) + } + if assistantMsgIdx < 0 { + t.Fatalf("Expected assistant.message event in GetMessages; types=%v", types) + } + + if sessionStartIdx >= userMsgIdx { + t.Errorf("Expected session.start (%d) before user.message (%d); types=%v", sessionStartIdx, userMsgIdx, types) + } + if userMsgIdx >= toolStartIdx { + t.Errorf("Expected user.message (%d) before tool.execution_start (%d); types=%v", userMsgIdx, toolStartIdx, types) + } + if toolStartIdx >= toolCompleteIdx { + t.Errorf("Expected tool.execution_start (%d) before tool.execution_complete (%d); types=%v", toolStartIdx, toolCompleteIdx, types) + } + if toolCompleteIdx >= assistantMsgIdx { + t.Errorf("Expected tool.execution_complete (%d) before final assistant.message (%d); types=%v", toolCompleteIdx, assistantMsgIdx, types) + } + + // Verify user.message mentions the file + for _, msg := range messages { + if msg.Type == copilot.SessionEventTypeUserMessage { + if d, ok := msg.Data.(*copilot.UserMessageData); ok { + if !strings.Contains(d.Content, "order.txt") { + t.Errorf("Expected user.message to mention 'order.txt', got %q", d.Content) + } + } + break + } + } + + // Verify assistant.message references the number + for i := len(messages) - 1; i >= 0; i-- { + if messages[i].Type == copilot.SessionEventTypeAssistantMessage { + if d, ok := messages[i].Data.(*copilot.AssistantMessageData); ok { + if !strings.Contains(d.Content, "42") { + t.Errorf("Expected assistant.message to contain '42', got %q", d.Content) + } + } + break + } + } + }) + t.Run("should emit events in correct order for tool-using conversation", func(t *testing.T) { ctx.ConfigureForTest(t) diff --git a/go/internal/e2e/multi_client_e2e_test.go b/go/internal/e2e/multi_client_e2e_test.go index 4426912c5..7638d3212 100644 --- a/go/internal/e2e/multi_client_e2e_test.go +++ b/go/internal/e2e/multi_client_e2e_test.go @@ -152,11 +152,10 @@ func TestMultiClientE2E(t *testing.T) { t.Fatalf("Failed to create session: %v", err) } - // Client 2 resumes — its handler never resolves, so only client 1's approval takes effect + // Client 2 observes the permission request but leaves the decision to client 1. session2, err := client2.ResumeSession(t.Context(), session1.SessionID, &copilot.ResumeSessionConfig{ OnPermissionRequest: func(request copilot.PermissionRequest, invocation copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { - // Block forever so only client 1's handler responds - select {} + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindNoResult}, nil }, }) if err != nil { @@ -246,10 +245,10 @@ func TestMultiClientE2E(t *testing.T) { t.Fatalf("Failed to create session: %v", err) } - // Client 2 resumes — its handler never resolves so only client 1's denial takes effect + // Client 2 observes the permission request but leaves the decision to client 1. session2, err := client2.ResumeSession(t.Context(), session1.SessionID, &copilot.ResumeSessionConfig{ OnPermissionRequest: func(request copilot.PermissionRequest, invocation copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { - select {} + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindNoResult}, nil }, }) if err != nil { diff --git a/go/internal/e2e/multi_turn_e2e_test.go b/go/internal/e2e/multi_turn_e2e_test.go index 248e01a2c..8a91a359f 100644 --- a/go/internal/e2e/multi_turn_e2e_test.go +++ b/go/internal/e2e/multi_turn_e2e_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "strings" + "sync" "testing" copilot "github.com/github/copilot-sdk/go" @@ -30,6 +31,14 @@ func TestMultiTurnE2E(t *testing.T) { } t.Cleanup(func() { _ = session.Disconnect() }) + var mu sync.Mutex + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + msg1, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "Read the file 'secret.txt' and tell me what the magic number is.", }) @@ -39,6 +48,7 @@ func TestMultiTurnE2E(t *testing.T) { if content := assistantContent(t, msg1); !strings.Contains(content, "42") { t.Fatalf("Expected first response to contain 42, got %q", content) } + assertToolTurnOrdering(t, snapshotAndClearMultiTurnEvents(&mu, &events), "file read turn") msg2, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "What is that magic number multiplied by 2?", @@ -62,11 +72,28 @@ func TestMultiTurnE2E(t *testing.T) { } t.Cleanup(func() { _ = session.Disconnect() }) + var mu sync.Mutex + var events []copilot.SessionEvent + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "Create a file called 'greeting.txt' with the content 'Hello from multi-turn test'.", }); err != nil { t.Fatalf("First SendAndWait failed: %v", err) } + // File should have been created with the expected content + greetingContent, err := os.ReadFile(filepath.Join(ctx.WorkDir, "greeting.txt")) + if err != nil { + t.Fatalf("Failed to read greeting.txt: %v", err) + } + if !strings.Contains(string(greetingContent), "Hello from multi-turn test") { + t.Errorf("Expected greeting.txt to contain 'Hello from multi-turn test', got %q", string(greetingContent)) + } + assertToolTurnOrdering(t, snapshotAndClearMultiTurnEvents(&mu, &events), "file creation turn") msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "Read the file 'greeting.txt' and tell me its exact contents.", @@ -77,5 +104,106 @@ func TestMultiTurnE2E(t *testing.T) { if content := assistantContent(t, msg); !strings.Contains(content, "Hello from multi-turn test") { t.Fatalf("Expected response to contain created file contents, got %q", content) } + assertToolTurnOrdering(t, snapshotAndClearMultiTurnEvents(&mu, &events), "file read turn") }) } + +func snapshotAndClearMultiTurnEvents(mu *sync.Mutex, events *[]copilot.SessionEvent) []copilot.SessionEvent { + mu.Lock() + defer mu.Unlock() + snapshot := make([]copilot.SessionEvent, len(*events)) + copy(snapshot, *events) + *events = (*events)[:0] + return snapshot +} + +// assertToolTurnOrdering verifies that for a turn with tool use the events arrive in the +// expected order: user.message → tool.execution_start(s) → tool.execution_complete(s) +// → assistant.message → session.idle. +func assertToolTurnOrdering(t *testing.T, events []copilot.SessionEvent, turnDescription string) { + t.Helper() + + observedTypes := make([]copilot.SessionEventType, 0, len(events)) + for _, e := range events { + observedTypes = append(observedTypes, e.Type) + } + + userMessageIdx := indexOfEventType(events, copilot.SessionEventTypeUserMessage, 0) + if userMessageIdx < 0 { + // A turn without a tool call (e.g., pure text answer) may not need ordering. + // Only assert if tool events are present. + if !containsEventType(events, copilot.SessionEventTypeToolExecutionStart) { + return + } + t.Errorf("Expected user.message in %s but none found; types=%v", turnDescription, observedTypes) + return + } + + firstToolStartIdx := indexOfEventType(events, copilot.SessionEventTypeToolExecutionStart, 0) + if firstToolStartIdx < 0 { + // No tool use in this turn — nothing to assert. + return + } + lastToolCompleteIdx := lastIndexOfEventType(events, copilot.SessionEventTypeToolExecutionComplete) + assistantAfterToolsIdx := indexOfEventType(events, copilot.SessionEventTypeAssistantMessage, lastToolCompleteIdx+1) + sessionIdleIdx := indexOfEventType(events, copilot.SessionEventTypeSessionIdle, 0) + + if userMessageIdx >= firstToolStartIdx { + t.Errorf("[%s] Expected user.message before first tool start; types=%v", turnDescription, observedTypes) + } + + // Match each tool.execution_complete to a preceding tool.execution_start with the same ToolCallID. + starts := make(map[string]int) + for i, e := range events { + if e.Type == copilot.SessionEventTypeToolExecutionStart { + if d, ok := e.Data.(*copilot.ToolExecutionStartData); ok { + starts[d.ToolCallID] = i + } + } + } + for _, e := range events { + if e.Type == copilot.SessionEventTypeToolExecutionComplete { + if d, ok := e.Data.(*copilot.ToolExecutionCompleteData); ok { + if _, found := starts[d.ToolCallID]; !found { + t.Errorf("[%s] tool.execution_complete for %q has no matching tool.execution_start; types=%v", + turnDescription, d.ToolCallID, observedTypes) + } + } + } + } + + if assistantAfterToolsIdx < 0 { + t.Errorf("[%s] Expected assistant.message after final tool completion; types=%v", turnDescription, observedTypes) + } + if sessionIdleIdx < 0 { + t.Errorf("[%s] Expected session.idle; types=%v", turnDescription, observedTypes) + } + if assistantAfterToolsIdx >= 0 && lastToolCompleteIdx >= assistantAfterToolsIdx { + t.Errorf("[%s] Expected final tool completion before final assistant.message; types=%v", turnDescription, observedTypes) + } + if assistantAfterToolsIdx >= 0 && sessionIdleIdx >= 0 && assistantAfterToolsIdx >= sessionIdleIdx { + t.Errorf("[%s] Expected assistant.message before session.idle; types=%v", turnDescription, observedTypes) + } +} + +func indexOfEventType(events []copilot.SessionEvent, typ copilot.SessionEventType, startIdx int) int { + for i := startIdx; i < len(events); i++ { + if events[i].Type == typ { + return i + } + } + return -1 +} + +func lastIndexOfEventType(events []copilot.SessionEvent, typ copilot.SessionEventType) int { + for i := len(events) - 1; i >= 0; i-- { + if events[i].Type == typ { + return i + } + } + return -1 +} + +func containsEventType(events []copilot.SessionEvent, typ copilot.SessionEventType) bool { + return indexOfEventType(events, typ, 0) >= 0 +} diff --git a/go/internal/e2e/pending_work_resume_e2e_test.go b/go/internal/e2e/pending_work_resume_e2e_test.go index aa1786f66..dde7c0bd0 100644 --- a/go/internal/e2e/pending_work_resume_e2e_test.go +++ b/go/internal/e2e/pending_work_resume_e2e_test.go @@ -378,23 +378,6 @@ func TestPendingWorkResumeE2E(t *testing.T) { t.Fatalf("HandlePendingToolCall(A) failed: err=%v result=%+v", err, resA) } - ctxFinal, cancel := context.WithTimeout(t.Context(), pendingWorkTimeout) - defer cancel() - answer, err := testharness.GetFinalAssistantMessage(ctxFinal, session2) - if err != nil { - t.Fatalf("Failed to wait for final assistant message: %v", err) - } - assistant, ok := answer.Data.(*copilot.AssistantMessageData) - if !ok { - t.Fatalf("Expected AssistantMessageData, got %T", answer.Data) - } - if !strings.Contains(assistant.Content, "PARALLEL_A_ALPHA") { - t.Errorf("Expected response to contain 'PARALLEL_A_ALPHA', got %q", assistant.Content) - } - if !strings.Contains(assistant.Content, "PARALLEL_B_BETA") { - t.Errorf("Expected response to contain 'PARALLEL_B_BETA', got %q", assistant.Content) - } - select { case releaseA <- "ORIGINAL_A_SHOULD_NOT_WIN": default: @@ -469,6 +452,214 @@ func TestPendingWorkResumeE2E(t *testing.T) { resumedSession.Disconnect() }) + + t.Run("should keep pending external tool handleable on warm resume when continuependingwork is false", func(t *testing.T) { + ctx.ConfigureForTest(t) + + _, cliURL := startTcpServer(t, ctx) + + type ValueParams struct { + Value string `json:"value" jsonschema:"Value to look up"` + } + toolStarted := make(chan string, 1) + releaseTool := make(chan string, 1) + + originalTool := copilot.DefineTool("resume_external_tool", "Looks up a value after resumption", + func(params ValueParams, inv copilot.ToolInvocation) (string, error) { + select { + case toolStarted <- params.Value: + default: + } + return <-releaseTool, nil + }) + + suspendedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.CLIUrl = cliURL + opts.CLIPath = "" + opts.TCPConnectionToken = sharedTcpToken + }) + session1, err := suspendedClient.CreateSession(t.Context(), &copilot.SessionConfig{ + Tools: []copilot.Tool{originalTool}, + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + sessionID := session1.SessionID + + toolEventCh := waitForExternalToolRequests(session1, []string{"resume_external_tool"}) + + if _, err := session1.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Use resume_external_tool with value 'beta', then reply with the result.", + }); err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + toolEvents, err := waitForExternalToolResults(toolEventCh, pendingWorkTimeout) + if err != nil { + t.Fatalf("waiting for external tool requests: %v", err) + } + toolEvent := toolEvents["resume_external_tool"] + + select { + case v := <-toolStarted: + if v != "beta" { + t.Errorf("Expected original tool started with 'beta', got %q", v) + } + case <-time.After(pendingWorkTimeout): + t.Fatal("Timed out waiting for original tool to start") + } + + suspendedClient.ForceStop() + + resumedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.CLIUrl = cliURL + opts.CLIPath = "" + opts.TCPConnectionToken = sharedTcpToken + }) + t.Cleanup(func() { resumedClient.ForceStop() }) + + session2, err := resumedClient.ResumeSession(t.Context(), sessionID, &copilot.ResumeSessionConfig{ + ContinuePendingWork: false, + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to resume session: %v", err) + } + + // Verify resume event reflects ContinuePendingWork=false and SessionWasActive=true + messages, err := session2.GetMessages(t.Context()) + if err != nil { + t.Fatalf("GetMessages failed: %v", err) + } + var resumeEvent *copilot.SessionResumeData + for _, msg := range messages { + if msg.Type == copilot.SessionEventTypeSessionResume { + if d, ok := msg.Data.(*copilot.SessionResumeData); ok { + resumeEvent = d + break + } + } + } + if resumeEvent == nil { + t.Fatal("Expected a session.resume event") + } + if resumeEvent.ContinuePendingWork == nil || *resumeEvent.ContinuePendingWork != false { + t.Errorf("Expected ContinuePendingWork=false in resume event, got %v", resumeEvent.ContinuePendingWork) + } + if resumeEvent.SessionWasActive == nil || *resumeEvent.SessionWasActive != true { + t.Errorf("Expected SessionWasActive=true in resume event, got %v", resumeEvent.SessionWasActive) + } + + // Even with ContinuePendingWork=false, the pending tool call should still be + // handleable via HandlePendingToolCall. + toolResult, err := session2.RPC.Tools.HandlePendingToolCall(t.Context(), &rpc.HandlePendingToolCallRequest{ + RequestID: toolEvent.RequestID, + Result: &rpc.ExternalToolResult{ + String: copilot.String("EXTERNAL_RESUMED_BETA"), + }, + }) + if err != nil { + t.Fatalf("Failed to handle pending tool call: %v", err) + } + if !toolResult.Success { + t.Errorf("Expected HandlePendingToolCall to succeed, got %+v", toolResult) + } + + select { + case releaseTool <- "ORIGINAL_SHOULD_NOT_WIN": + default: + } + + session2.Disconnect() + }) + + t.Run("should report continuependingwork true in resume event", func(t *testing.T) { + ctx.ConfigureForTest(t) + + _, cliURL := startTcpServer(t, ctx) + + var sessionID string + func() { + firstClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.CLIUrl = cliURL + opts.CLIPath = "" + opts.TCPConnectionToken = sharedTcpToken + }) + defer firstClient.ForceStop() + + firstSession, err := firstClient.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create first session: %v", err) + } + sessionID = firstSession.SessionID + + answer, err := firstSession.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_ONE", + }) + if err != nil { + t.Fatalf("Failed to send first turn: %v", err) + } + if assistant, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistant.Content, "CONTINUE_PENDING_WORK_TRUE_TURN_ONE") { + t.Errorf("Expected first answer to contain 'CONTINUE_PENDING_WORK_TRUE_TURN_ONE', got %v", answer.Data) + } + + firstSession.Disconnect() + }() + + resumedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.CLIUrl = cliURL + opts.CLIPath = "" + opts.TCPConnectionToken = sharedTcpToken + }) + t.Cleanup(func() { resumedClient.ForceStop() }) + + resumedSession, err := resumedClient.ResumeSession(t.Context(), sessionID, &copilot.ResumeSessionConfig{ + ContinuePendingWork: true, + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to resume session: %v", err) + } + + // Verify resume event reflects ContinuePendingWork=true and SessionWasActive=false (cold resume) + messages, err := resumedSession.GetMessages(t.Context()) + if err != nil { + t.Fatalf("GetMessages failed: %v", err) + } + var resumeEvent *copilot.SessionResumeData + for _, msg := range messages { + if msg.Type == copilot.SessionEventTypeSessionResume { + if d, ok := msg.Data.(*copilot.SessionResumeData); ok { + resumeEvent = d + break + } + } + } + if resumeEvent == nil { + t.Fatal("Expected a session.resume event") + } + if resumeEvent.ContinuePendingWork == nil || *resumeEvent.ContinuePendingWork != true { + t.Errorf("Expected ContinuePendingWork=true in resume event, got %v", resumeEvent.ContinuePendingWork) + } + if resumeEvent.SessionWasActive != nil && *resumeEvent.SessionWasActive != false { + t.Errorf("Expected SessionWasActive=false (or nil) for cold resume, got %v", resumeEvent.SessionWasActive) + } + + followUp, err := resumedSession.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_TWO", + }) + if err != nil { + t.Fatalf("Failed to send follow-up turn: %v", err) + } + if assistant, ok := followUp.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistant.Content, "CONTINUE_PENDING_WORK_TRUE_TURN_TWO") { + t.Errorf("Expected follow-up answer to contain 'CONTINUE_PENDING_WORK_TRUE_TURN_TWO', got %v", followUp.Data) + } + + resumedSession.Disconnect() + }) } // serverCliURL extracts the local CLI URL from a TCP-mode server client. diff --git a/go/internal/e2e/permissions_e2e_test.go b/go/internal/e2e/permissions_e2e_test.go index 34ab11d35..14116dd58 100644 --- a/go/internal/e2e/permissions_e2e_test.go +++ b/go/internal/e2e/permissions_e2e_test.go @@ -11,6 +11,7 @@ import ( copilot "github.com/github/copilot-sdk/go" "github.com/github/copilot-sdk/go/internal/e2e/testharness" + "github.com/github/copilot-sdk/go/rpc" ) func TestPermissionsE2E(t *testing.T) { @@ -288,8 +289,6 @@ func TestPermissionsE2E(t *testing.T) { session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: func(req copilot.PermissionRequest, inv copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { permissionRequestReceived.Set(true) - // Simulate async work. - time.Sleep(20 * time.Millisecond) return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindApproved}, nil }, }) @@ -407,6 +406,395 @@ func TestPermissionsE2E(t *testing.T) { t.Error("Expected ToolCallID to be populated on shell permission request") } }) + + t.Run("should wait for slow permission handler", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type lifecycleEvent struct { + Phase string + ToolCallID string + } + + handlerEntered := make(chan struct{}, 1) + releaseHandler := make(chan struct{}) + targetToolCallID := make(chan string, 1) + var lifecycleMu sync.Mutex + var lifecycle []lifecycleEvent + + addLifecycle := func(phase, toolCallID string) { + lifecycleMu.Lock() + lifecycle = append(lifecycle, lifecycleEvent{phase, toolCallID}) + lifecycleMu.Unlock() + } + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: func(req copilot.PermissionRequest, inv copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { + if req.Kind != copilot.PermissionRequestKindShell { + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindApproved}, nil + } + toolCallID := "" + if req.ToolCallID != nil { + toolCallID = *req.ToolCallID + } + addLifecycle("permission-start", toolCallID) + select { + case targetToolCallID <- toolCallID: + default: + } + select { + case handlerEntered <- struct{}{}: + default: + } + <-releaseHandler + addLifecycle("permission-complete", toolCallID) + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindApproved}, nil + }, + }) + if err != nil { + t.Fatalf("CreateSession failed: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + session.On(func(event copilot.SessionEvent) { + switch d := event.Data.(type) { + case *copilot.ToolExecutionStartData: + addLifecycle("tool-start", d.ToolCallID) + case *copilot.ToolExecutionCompleteData: + addLifecycle("tool-complete", d.ToolCallID) + } + }) + + go func() { + _, _ = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Run 'echo slow_handler_test'", + }) + }() + + select { + case <-handlerEntered: + case <-time.After(30 * time.Second): + t.Fatal("Timed out waiting for permission handler to be entered") + } + var targetID string + select { + case targetID = <-targetToolCallID: + default: + } + + // Verify tool-complete has not yet happened while handler is still running + lifecycleMu.Lock() + for _, evt := range lifecycle { + if evt.Phase == "tool-complete" && evt.ToolCallID == targetID { + t.Error("tool-complete should not have occurred before permission handler completed") + } + } + lifecycleMu.Unlock() + + close(releaseHandler) + + message, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("GetFinalAssistantMessage failed: %v", err) + } + + lifecycleMu.Lock() + orderedLifecycle := make([]lifecycleEvent, len(lifecycle)) + copy(orderedLifecycle, lifecycle) + lifecycleMu.Unlock() + + permStartIdx, permCompleteIdx, toolStartIdx, toolCompleteIdx := -1, -1, -1, -1 + for i, evt := range orderedLifecycle { + if evt.ToolCallID != targetID && targetID != "" { + continue + } + switch evt.Phase { + case "permission-start": + if permStartIdx < 0 { + permStartIdx = i + } + case "permission-complete": + if permCompleteIdx < 0 { + permCompleteIdx = i + } + case "tool-start": + if toolStartIdx < 0 { + toolStartIdx = i + } + case "tool-complete": + if toolCompleteIdx < 0 { + toolCompleteIdx = i + } + } + } + + if permStartIdx < 0 || permCompleteIdx < 0 || toolCompleteIdx < 0 { + t.Errorf("Expected permission-start, permission-complete, and tool-complete in lifecycle; got %v", orderedLifecycle) + } + if permCompleteIdx >= 0 && toolCompleteIdx >= 0 && permCompleteIdx >= toolCompleteIdx { + t.Errorf("Expected permission completion before tool completion; lifecycle=%v", orderedLifecycle) + } + if toolStartIdx >= 0 && toolCompleteIdx >= 0 && toolStartIdx >= toolCompleteIdx { + t.Errorf("Expected tool start before tool completion; lifecycle=%v", orderedLifecycle) + } + + if md, ok := message.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(md.Content, "slow_handler_test") { + t.Errorf("Expected assistant message to reference 'slow_handler_test', got %v", message.Data) + } + }) + + t.Run("should handle concurrent permission requests from parallel tools", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type EmptyParams struct{} + + var permissionRequestCount int + var permissionRequestsMu sync.Mutex + var permissionRequests []copilot.PermissionRequest + bothStarted := make(chan struct{}) + var bothStartedOnce sync.Once + + firstToolCalled := make(chan struct{}, 1) + secondToolCalled := make(chan struct{}, 1) + firstToolCompleted := make(chan *copilot.ToolExecutionCompleteData, 1) + secondToolCompleted := make(chan *copilot.ToolExecutionCompleteData, 1) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + Tools: []copilot.Tool{ + copilot.DefineTool("first_permission_tool", "First concurrent permission test tool", + func(_ EmptyParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) { + select { + case firstToolCalled <- struct{}{}: + default: + } + return copilot.ToolResult{ + TextResultForLLM: "first_permission_tool completed after permission approval", + ResultType: "rejected", + }, nil + }), + copilot.DefineTool("second_permission_tool", "Second concurrent permission test tool", + func(_ EmptyParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) { + select { + case secondToolCalled <- struct{}{}: + default: + } + return copilot.ToolResult{ + TextResultForLLM: "second_permission_tool completed after permission approval", + ResultType: "rejected", + }, nil + }), + }, + AvailableTools: []string{"first_permission_tool", "second_permission_tool"}, + OnPermissionRequest: func(req copilot.PermissionRequest, inv copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { + permissionRequestsMu.Lock() + permissionRequestCount++ + permissionRequests = append(permissionRequests, req) + count := permissionRequestCount + permissionRequestsMu.Unlock() + if count >= 2 { + bothStartedOnce.Do(func() { close(bothStarted) }) + } + select { + case <-bothStarted: + case <-time.After(30 * time.Second): + } + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindApproved}, nil + }, + }) + if err != nil { + t.Fatalf("CreateSession failed: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok { + var errMsg string + if d.Error != nil { + errMsg = d.Error.Message + } + switch { + case strings.Contains(errMsg, "first_permission_tool"): + select { + case firstToolCompleted <- d: + default: + } + case strings.Contains(errMsg, "second_permission_tool"): + select { + case secondToolCompleted <- d: + default: + } + } + } + }) + + if _, err := session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Call both first_permission_tool and second_permission_tool in the same turn. Do not call any other tools.", + }); err != nil { + t.Fatalf("Send failed: %v", err) + } + + select { + case <-firstToolCalled: + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for first_permission_tool to be called") + } + select { + case <-secondToolCalled: + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for second_permission_tool to be called") + } + + permissionRequestsMu.Lock() + reqCount := permissionRequestCount + reqs := make([]copilot.PermissionRequest, len(permissionRequests)) + copy(reqs, permissionRequests) + permissionRequestsMu.Unlock() + + if reqCount < 2 { + t.Errorf("Expected at least 2 permission requests, got %d", reqCount) + } + hasFirst := false + hasSecond := false + for _, req := range reqs { + if req.Kind == copilot.PermissionRequestKindCustomTool { + if req.ToolName != nil { + if *req.ToolName == "first_permission_tool" { + hasFirst = true + } + if *req.ToolName == "second_permission_tool" { + hasSecond = true + } + } + } + } + if !hasFirst { + t.Error("Expected permission request for first_permission_tool") + } + if !hasSecond { + t.Error("Expected permission request for second_permission_tool") + } + + assertRejectedToolComplete := func(name string, ch <-chan *copilot.ToolExecutionCompleteData, expectedMessage string) { + t.Helper() + select { + case d := <-ch: + if d.Success { + t.Errorf("Expected %s tool execution to complete with Success=false", name) + } + if d.Error == nil { + t.Errorf("Expected %s tool execution to include an error", name) + return + } + if d.Error.Code == nil || *d.Error.Code != "rejected" { + t.Errorf("Expected %s tool execution error code 'rejected', got %v", name, d.Error.Code) + } + if !strings.Contains(d.Error.Message, expectedMessage) { + t.Errorf("Expected %s tool execution error message to contain %q, got %q", name, expectedMessage, d.Error.Message) + } + case <-time.After(60 * time.Second): + t.Fatalf("Timed out waiting for %s tool.execution_complete", name) + } + } + assertRejectedToolComplete("first_permission_tool", firstToolCompleted, "first_permission_tool completed after permission approval") + assertRejectedToolComplete("second_permission_tool", secondToolCompleted, "second_permission_tool completed after permission approval") + }) + + t.Run("should deny permission with noresult kind", func(t *testing.T) { + ctx.ConfigureForTest(t) + + permissionCalled := make(chan struct{}, 1) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: func(req copilot.PermissionRequest, inv copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { + select { + case permissionCalled <- struct{}{}: + default: + } + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindNoResult}, nil + }, + }) + if err != nil { + t.Fatalf("CreateSession failed: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + if _, err := session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Run 'node --version'", + }); err != nil { + t.Fatalf("Send failed: %v", err) + } + + select { + case <-permissionCalled: + // Expected: legacy no-result does not send a permission decision. + case <-time.After(30 * time.Second): + t.Fatal("Timed out waiting for permission handler to be called") + } + + _ = session.Abort(t.Context()) + }) + + t.Run("should short circuit permission handler when set approve all enabled", func(t *testing.T) { + ctx.ConfigureForTest(t) + + var handlerCallCount int + var handlerCallCountMu sync.Mutex + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: func(req copilot.PermissionRequest, inv copilot.PermissionInvocation) (copilot.PermissionRequestResult, error) { + handlerCallCountMu.Lock() + handlerCallCount++ + handlerCallCountMu.Unlock() + return copilot.PermissionRequestResult{Kind: copilot.PermissionRequestResultKindApproved}, nil + }, + }) + if err != nil { + t.Fatalf("CreateSession failed: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + // Runtime contract: when approveAllToolPermissionRequests is true the runtime + // short-circuits the permission flow before invoking the SDK-supplied handler. + setResult, err := session.RPC.Permissions.SetApproveAll(t.Context(), &rpc.PermissionsSetApproveAllRequest{Enabled: true}) + if err != nil { + t.Fatalf("SetApproveAll failed: %v", err) + } + if !setResult.Success { + t.Fatalf("SetApproveAll returned success=false") + } + defer func() { + _, _ = session.RPC.Permissions.SetApproveAll(t.Context(), &rpc.PermissionsSetApproveAllRequest{Enabled: false}) + }() + + toolCompleted := make(chan struct{}, 1) + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok && d.Success { + select { + case toolCompleted <- struct{}{}: + default: + } + } + }) + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Run 'echo test' and tell me what happens", + }); err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + select { + case <-toolCompleted: + // A real shell tool completed successfully under runtime-level approval. + case <-time.After(30 * time.Second): + t.Fatal("Timed out waiting for successful tool.execution_complete") + } + + handlerCallCountMu.Lock() + count := handlerCallCount + handlerCallCountMu.Unlock() + if count != 0 { + t.Errorf("Expected permission handler to NOT be called when SetApproveAll is enabled, got %d calls", count) + } + }) } // atomicBool is a tiny helper for concurrent flag updates in handler callbacks. diff --git a/go/internal/e2e/rpc_event_side_effects_e2e_test.go b/go/internal/e2e/rpc_event_side_effects_e2e_test.go new file mode 100644 index 000000000..169e22bc2 --- /dev/null +++ b/go/internal/e2e/rpc_event_side_effects_e2e_test.go @@ -0,0 +1,321 @@ +package e2e + +import ( + "fmt" + "strings" + "testing" + "time" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" + "github.com/github/copilot-sdk/go/rpc" +) + +const rpcEventSideEffectsTimeout = 30 * time.Second + +// Mirrors dotnet/test/RpcEventSideEffectsE2ETests.cs (snapshot category "rpc_event_side_effects"). +func TestRpcEventSideEffectsE2E(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + if err := client.Start(t.Context()); err != nil { + t.Fatalf("Failed to start client: %v", err) + } + + t.Run("should emit mode changed event when mode set", func(t *testing.T) { + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + awaitModeChanged := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionModeChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionModeChangedData) + return ok && data.NewMode == "plan" && data.PreviousMode == "interactive" + }, + "session.mode_changed event for interactive to plan", + ) + + if _, err := session.RPC.Mode.Set(t.Context(), &rpc.ModeSetRequest{Mode: rpc.SessionModePlan}); err != nil { + t.Fatalf("Failed to set mode to plan: %v", err) + } + + evt := awaitEvent(t, awaitModeChanged) + data := evt.Data.(*copilot.SessionModeChangedData) + if data.NewMode != "plan" || data.PreviousMode != "interactive" { + t.Fatalf("Unexpected mode change: %+v", data) + } + }) + + t.Run("should emit plan changed event for update and delete", func(t *testing.T) { + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + awaitCreate := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionPlanChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionPlanChangedData) + return ok && data.Operation == copilot.PlanChangedOperationCreate + }, + "session.plan_changed create event", + ) + if _, err := session.RPC.Plan.Update(t.Context(), &rpc.PlanUpdateRequest{Content: "# Test plan\n- item"}); err != nil { + t.Fatalf("Failed to update plan: %v", err) + } + if data := awaitEvent(t, awaitCreate).Data.(*copilot.SessionPlanChangedData); data.Operation != copilot.PlanChangedOperationCreate { + t.Fatalf("Expected create operation, got %+v", data) + } + + awaitDelete := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionPlanChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionPlanChangedData) + return ok && data.Operation == copilot.PlanChangedOperationDelete + }, + "session.plan_changed delete event", + ) + if _, err := session.RPC.Plan.Delete(t.Context()); err != nil { + t.Fatalf("Failed to delete plan: %v", err) + } + if data := awaitEvent(t, awaitDelete).Data.(*copilot.SessionPlanChangedData); data.Operation != copilot.PlanChangedOperationDelete { + t.Fatalf("Expected delete operation, got %+v", data) + } + }) + + t.Run("should emit plan changed update operation on second update", func(t *testing.T) { + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + if _, err := session.RPC.Plan.Update(t.Context(), &rpc.PlanUpdateRequest{Content: "# initial"}); err != nil { + t.Fatalf("Failed to create plan: %v", err) + } + + awaitUpdate := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionPlanChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionPlanChangedData) + return ok && data.Operation == copilot.PlanChangedOperationUpdate + }, + "session.plan_changed update event", + ) + if _, err := session.RPC.Plan.Update(t.Context(), &rpc.PlanUpdateRequest{Content: "# updated content"}); err != nil { + t.Fatalf("Failed to update plan: %v", err) + } + if data := awaitEvent(t, awaitUpdate).Data.(*copilot.SessionPlanChangedData); data.Operation != copilot.PlanChangedOperationUpdate { + t.Fatalf("Expected update operation, got %+v", data) + } + }) + + t.Run("should emit workspace file changed event when file created", func(t *testing.T) { + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + path := fmt.Sprintf("side-effect-%d.txt", time.Now().UnixNano()) + awaitChanged := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionWorkspaceFileChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionWorkspaceFileChangedData) + return ok && data.Path == path + }, + "session.workspace_file_changed event", + ) + if _, err := session.RPC.Workspaces.CreateFile(t.Context(), &rpc.WorkspacesCreateFileRequest{Path: path, Content: "hello"}); err != nil { + t.Fatalf("Failed to create workspace file: %v", err) + } + data := awaitEvent(t, awaitChanged).Data.(*copilot.SessionWorkspaceFileChangedData) + if data.Path != path { + t.Fatalf("Expected path %q, got %+v", path, data) + } + if data.Operation != copilot.WorkspaceFileChangedOperationCreate && data.Operation != copilot.WorkspaceFileChangedOperationUpdate { + t.Fatalf("Unexpected workspace file operation: %+v", data) + } + }) + + t.Run("should emit title changed event when name set", func(t *testing.T) { + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + title := fmt.Sprintf("Renamed-%d", time.Now().UnixNano()) + awaitTitleChanged := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionTitleChanged, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionTitleChangedData) + return ok && data.Title == title + }, + "session.title_changed event", + ) + if _, err := session.RPC.Name.Set(t.Context(), &rpc.NameSetRequest{Name: title}); err != nil { + t.Fatalf("Failed to set session name: %v", err) + } + if data := awaitEvent(t, awaitTitleChanged).Data.(*copilot.SessionTitleChangedData); data.Title != title { + t.Fatalf("Expected title %q, got %+v", title, data) + } + }) + + t.Run("should emit snapshot rewind event and remove events on truncate", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Say SNAPSHOT_REWIND_TARGET exactly."}); err != nil { + t.Fatalf("Failed to create persisted message: %v", err) + } + + messages, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("Failed to read messages: %v", err) + } + userEvent := firstUserMessageEvent(messages) + if userEvent == nil { + t.Fatal("Expected at least one user.message in persisted history") + } + targetEventID := userEvent.ID + + awaitRewind := waitForMatchingEvent( + session, + copilot.SessionEventTypeSessionSnapshotRewind, + func(event copilot.SessionEvent) bool { + data, ok := event.Data.(*copilot.SessionSnapshotRewindData) + return ok && strings.EqualFold(data.UpToEventID, targetEventID) + }, + "session.snapshot_rewind event", + ) + truncateResult, err := session.RPC.History.Truncate(t.Context(), &rpc.HistoryTruncateRequest{EventID: targetEventID}) + if err != nil { + t.Fatalf("Failed to truncate history: %v", err) + } + if truncateResult.EventsRemoved < 1 { + t.Fatalf("Expected truncate to remove at least one event, got %+v", truncateResult) + } + rewindData := awaitEvent(t, awaitRewind).Data.(*copilot.SessionSnapshotRewindData) + if !strings.EqualFold(rewindData.UpToEventID, targetEventID) { + t.Fatalf("Expected rewind to target %q, got %+v", targetEventID, rewindData) + } + if rewindData.EventsRemoved != float64(truncateResult.EventsRemoved) { + t.Fatalf("Expected rewind count %d, got %+v", truncateResult.EventsRemoved, rewindData) + } + + messagesAfter, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("Failed to read messages after truncate: %v", err) + } + for _, event := range messagesAfter { + if event.ID == targetEventID { + t.Fatalf("Expected truncated event %q to be removed", targetEventID) + } + } + }) + + t.Run("should allow session use after truncate", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session := createEventSideEffectsSession(t, client) + defer session.Disconnect() + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Say SNAPSHOT_REWIND_TARGET exactly."}); err != nil { + t.Fatalf("Failed to create persisted message: %v", err) + } + + messages, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("Failed to read messages: %v", err) + } + userEvent := firstUserMessageEvent(messages) + if userEvent == nil { + t.Fatal("Expected at least one user.message in persisted history") + } + + truncateResult, err := session.RPC.History.Truncate(t.Context(), &rpc.HistoryTruncateRequest{EventID: userEvent.ID}) + if err != nil { + t.Fatalf("Failed to truncate history: %v", err) + } + if truncateResult.EventsRemoved < 1 { + t.Fatalf("Expected truncate to remove at least one event, got %+v", truncateResult) + } + + mode, err := session.RPC.Mode.Get(t.Context()) + if err != nil { + t.Fatalf("Failed to get mode after truncate: %v", err) + } + if mode == nil || (*mode != rpc.SessionModeInteractive && *mode != rpc.SessionModePlan && *mode != rpc.SessionModeAutopilot) { + t.Fatalf("Unexpected mode after truncate: %v", mode) + } + workspace, err := session.RPC.Workspaces.GetWorkspace(t.Context()) + if err != nil { + t.Fatalf("Failed to get workspace after truncate: %v", err) + } + if workspace.Workspace == nil { + t.Fatal("Expected workspace metadata after truncate") + } + }) +} + +func createEventSideEffectsSession(t *testing.T, client *copilot.Client) *copilot.Session { + t.Helper() + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + return session +} + +func waitForMatchingEvent(session *copilot.Session, eventType copilot.SessionEventType, predicate func(copilot.SessionEvent) bool, description string) func() (*copilot.SessionEvent, error) { + result := make(chan *copilot.SessionEvent, 1) + errCh := make(chan error, 1) + unsubscribe := session.On(func(event copilot.SessionEvent) { + if event.Type == eventType && predicate(event) { + select { + case result <- &event: + default: + } + } else if event.Type == copilot.SessionEventTypeSessionError { + msg := "session error" + if data, ok := event.Data.(*copilot.SessionErrorData); ok { + msg = data.Message + } + select { + case errCh <- fmt.Errorf("%s while waiting for %s", msg, description): + default: + } + } + }) + + return func() (*copilot.SessionEvent, error) { + defer unsubscribe() + select { + case event := <-result: + return event, nil + case err := <-errCh: + return nil, err + case <-time.After(rpcEventSideEffectsTimeout): + return nil, fmt.Errorf("timed out waiting for %s", description) + } + } +} + +func awaitEvent(t *testing.T, await func() (*copilot.SessionEvent, error)) *copilot.SessionEvent { + t.Helper() + event, err := await() + if err != nil { + t.Fatal(err) + } + return event +} + +func firstUserMessageEvent(events []copilot.SessionEvent) *copilot.SessionEvent { + for i := range events { + if _, ok := events[i].Data.(*copilot.UserMessageData); ok { + return &events[i] + } + } + return nil +} diff --git a/go/internal/e2e/rpc_session_state_e2e_test.go b/go/internal/e2e/rpc_session_state_e2e_test.go index 9296e04c9..885deb805 100644 --- a/go/internal/e2e/rpc_session_state_e2e_test.go +++ b/go/internal/e2e/rpc_session_state_e2e_test.go @@ -317,6 +317,117 @@ func TestRpcSessionStateE2E(t *testing.T) { } }) + t.Run("should fork session to event id excluding boundary event", func(t *testing.T) { + ctx.ConfigureForTest(t) + + const firstPrompt = "Say FORK_BOUNDARY_FIRST exactly." + const secondPrompt = "Say FORK_BOUNDARY_SECOND exactly." + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + defer session.Disconnect() + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: firstPrompt}); err != nil { + t.Fatalf("Failed to send first prompt: %v", err) + } + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: secondPrompt}); err != nil { + t.Fatalf("Failed to send second prompt: %v", err) + } + + sourceEvents, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("Failed to read source messages: %v", err) + } + var secondUserEvent *copilot.SessionEvent + for i := range sourceEvents { + data, ok := sourceEvents[i].Data.(*copilot.UserMessageData) + if ok && data.Content == secondPrompt { + secondUserEvent = &sourceEvents[i] + break + } + } + if secondUserEvent == nil { + t.Fatal("Expected the second user.message in persisted history") + } + boundaryEventID := secondUserEvent.ID + + fork, err := client.RPC.Sessions.Fork(t.Context(), &rpc.SessionsForkRequest{ + SessionID: session.SessionID, + ToEventID: &boundaryEventID, + }) + if err != nil { + t.Fatalf("Failed to fork session to event id: %v", err) + } + if strings.TrimSpace(fork.SessionID) == "" { + t.Fatal("Expected non-empty fork session id") + } + if fork.SessionID == session.SessionID { + t.Errorf("Expected fork session id to differ from source %q", session.SessionID) + } + + forkedSession, err := client.ResumeSession(t.Context(), fork.SessionID, &copilot.ResumeSessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to resume forked session: %v", err) + } + defer forkedSession.Disconnect() + + forkedEvents, err := forkedSession.GetMessages(t.Context()) + if err != nil { + t.Fatalf("Failed to read forked messages: %v", err) + } + for _, event := range forkedEvents { + if event.ID == boundaryEventID { + t.Fatalf("toEventId is exclusive; boundary event %q must not be in forked session", boundaryEventID) + } + } + forkedConversation := conversationMessages(forkedEvents) + if !containsConversation(forkedConversation, "user", firstPrompt, false) { + t.Errorf("Expected forked conversation to contain first prompt %q, got %v", firstPrompt, forkedConversation) + } + if containsConversation(forkedConversation, "user", secondPrompt, false) { + t.Errorf("Expected forked conversation to exclude second prompt %q, got %v", secondPrompt, forkedConversation) + } + }) + + t.Run("should report error when forking session to unknown event id", func(t *testing.T) { + ctx.ConfigureForTest(t) + + const sourcePrompt = "Say FORK_UNKNOWN_EVENT_OK exactly." + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + defer session.Disconnect() + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: sourcePrompt}); err != nil { + t.Fatalf("Failed to send source prompt: %v", err) + } + + bogusEventID := "00000000-0000-4000-8000-000000000000" + _, err = client.RPC.Sessions.Fork(t.Context(), &rpc.SessionsForkRequest{ + SessionID: session.SessionID, + ToEventID: &bogusEventID, + }) + if err == nil { + t.Fatal("Expected sessions.fork to fail for unknown event id") + } + if !strings.Contains(strings.ToLower(err.Error()), strings.ToLower("Event "+bogusEventID+" not found")) { + t.Errorf("Expected error mentioning unknown event %q, got %v", bogusEventID, err) + } + if strings.Contains(strings.ToLower(err.Error()), "unhandled method sessions.fork") { + t.Errorf("sessions.fork should be implemented; error suggests it isn't: %v", err) + } + }) + t.Run("should call session usage and permission rpcs", func(t *testing.T) { session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, diff --git a/go/internal/e2e/session_e2e_test.go b/go/internal/e2e/session_e2e_test.go index 126a150b5..fa2500fe5 100644 --- a/go/internal/e2e/session_e2e_test.go +++ b/go/internal/e2e/session_e2e_test.go @@ -720,10 +720,13 @@ func TestSessionE2E(t *testing.T) { } var receivedEvents []copilot.SessionEvent - idle := make(chan bool) + var receivedEventsMu sync.Mutex + idle := make(chan bool, 1) session.On(func(event copilot.SessionEvent) { + receivedEventsMu.Lock() receivedEvents = append(receivedEvents, event) + receivedEventsMu.Unlock() if event.Type == "session.idle" { select { case idle <- true: @@ -746,14 +749,17 @@ func TestSessionE2E(t *testing.T) { } // Should have received multiple events - if len(receivedEvents) == 0 { + receivedEventsMu.Lock() + eventsSnapshot := append([]copilot.SessionEvent(nil), receivedEvents...) + receivedEventsMu.Unlock() + if len(eventsSnapshot) == 0 { t.Error("Expected to receive events, got none") } hasUserMessage := false hasAssistantMessage := false hasSessionIdle := false - for _, evt := range receivedEvents { + for _, evt := range eventsSnapshot { switch evt.Type { case "user.message": hasUserMessage = true @@ -1462,7 +1468,7 @@ func TestSessionAttachmentsE2E(t *testing.T) { title := "Add E2E attachment coverage" url := "https://github.com/github/copilot-sdk/issues/1234" _, err = session.SendAndWait(t.Context(), copilot.MessageOptions{ - Prompt: "Summarize the referenced issue.", + Prompt: "Using only the GitHub reference metadata in this message, summarize the reference. Do not call any tools.", Attachments: []copilot.Attachment{{ Type: copilot.AttachmentTypeGithubReference, Number: &number, diff --git a/go/internal/e2e/streaming_fidelity_e2e_test.go b/go/internal/e2e/streaming_fidelity_e2e_test.go index f1fe1db34..99c85ce63 100644 --- a/go/internal/e2e/streaming_fidelity_e2e_test.go +++ b/go/internal/e2e/streaming_fidelity_e2e_test.go @@ -210,4 +210,157 @@ func TestStreamingFidelityE2E(t *testing.T) { } } }) + + t.Run("should not produce deltas after session resume with streaming disabled", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: true, + }) + if err != nil { + t.Fatalf("Failed to create session with streaming: %v", err) + } + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What is 3 + 6?"}); err != nil { + t.Fatalf("Failed to send first message: %v", err) + } + + // Resume using a new client with streaming DISABLED + newClient := ctx.NewClient() + defer newClient.ForceStop() + + session2, err := newClient.ResumeSession(t.Context(), session.SessionID, &copilot.ResumeSessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: false, + }) + if err != nil { + t.Fatalf("Failed to resume session: %v", err) + } + + var events []copilot.SessionEvent + var mu sync.Mutex + session2.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + + answer, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now if you double that, what do you get?"}) + if err != nil { + t.Fatalf("Failed to send follow-up: %v", err) + } + if answer == nil { + t.Error("Expected non-nil answer") + } else if ad, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(ad.Content, "18") { + t.Errorf("Expected answer to contain '18', got %v", answer) + } + + mu.Lock() + snapshot := make([]copilot.SessionEvent, len(events)) + copy(snapshot, events) + mu.Unlock() + + // No deltas when streaming is toggled off + for _, e := range snapshot { + if e.Type == "assistant.message_delta" { + t.Errorf("Expected no delta events after resume with streaming disabled; got delta at index %d", len(snapshot)) + break + } + } + + // But should still have a final assistant.message + hasAssistantMessage := false + for _, e := range snapshot { + if e.Type == "assistant.message" { + hasAssistantMessage = true + break + } + } + if !hasAssistantMessage { + t.Error("Expected a final assistant.message event after resume with streaming disabled") + } + + _ = session2.Disconnect() + }) + + t.Run("should emit streaming deltas with reasoning effort configured", func(t *testing.T) { + ctx.ConfigureForTest(t) + + // Verifies that setting ReasoningEffort alongside Streaming=true does not break + // the streaming pipeline — deltas still arrive and complete successfully. + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Streaming: true, + ReasoningEffort: "high", + }) + if err != nil { + t.Fatalf("Failed to create session with streaming + reasoning effort: %v", err) + } + t.Cleanup(func() { _ = session.Disconnect() }) + + var events []copilot.SessionEvent + var mu sync.Mutex + session.On(func(event copilot.SessionEvent) { + mu.Lock() + events = append(events, event) + mu.Unlock() + }) + + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What is 15 * 17?"}); err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + mu.Lock() + snapshot := make([]copilot.SessionEvent, len(events)) + copy(snapshot, events) + mu.Unlock() + + // With streaming + reasoning effort, we should still get content deltas + var deltaEvents []copilot.SessionEvent + for _, e := range snapshot { + if e.Type == "assistant.message_delta" { + deltaEvents = append(deltaEvents, e) + } + } + if len(deltaEvents) < 1 { + t.Error("Expected at least 1 delta event with streaming + reasoning effort") + } + + // And a final assistant.message with the answer + var lastAssistantContent string + for _, e := range snapshot { + if e.Type == "assistant.message" { + if ad, ok := e.Data.(*copilot.AssistantMessageData); ok { + lastAssistantContent = ad.Content + } + } + } + if lastAssistantContent == "" { + t.Error("Expected a final assistant.message with content") + } + if !strings.Contains(lastAssistantContent, "255") { + t.Errorf("Expected assistant message to contain '255' (15*17), got %q", lastAssistantContent) + } + + // Verify the session was created with reasoning effort via GetMessages + messages, err := session.GetMessages(t.Context()) + if err != nil { + t.Fatalf("GetMessages failed: %v", err) + } + var sessionStartReasoningEffort string + for _, msg := range messages { + if msg.Type == copilot.SessionEventTypeSessionStart { + if d, ok := msg.Data.(*copilot.SessionStartData); ok { + if d.ReasoningEffort != nil { + sessionStartReasoningEffort = *d.ReasoningEffort + } + } + break + } + } + if sessionStartReasoningEffort != "high" { + t.Errorf("Expected session.start.reasoningEffort='high', got %q", sessionStartReasoningEffort) + } + }) } diff --git a/go/internal/e2e/tool_results_e2e_test.go b/go/internal/e2e/tool_results_e2e_test.go index 701e26625..0ae0ec08e 100644 --- a/go/internal/e2e/tool_results_e2e_test.go +++ b/go/internal/e2e/tool_results_e2e_test.go @@ -3,6 +3,7 @@ package e2e import ( "strings" "testing" + "time" copilot "github.com/github/copilot-sdk/go" "github.com/github/copilot-sdk/go/internal/e2e/testharness" @@ -180,4 +181,160 @@ func TestToolResultsE2E(t *testing.T) { t.Errorf("Failed to disconnect session: %v", err) } }) + + t.Run("should handle tool result with rejected resulttype", func(t *testing.T) { + ctx.ConfigureForTest(t) + + toolHandlerCalled := false + toolCompleted := make(chan *copilot.ToolExecutionCompleteData, 1) + idle := make(chan struct{}, 1) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + { + Name: "deploy_service", + Description: "Deploys a service", + Handler: func(inv copilot.ToolInvocation) (copilot.ToolResult, error) { + toolHandlerCalled = true + return copilot.ToolResult{ + TextResultForLLM: "Deployment rejected: policy violation - production deployments require approval", + ResultType: "rejected", + }, nil + }, + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok { + select { + case toolCompleted <- d: + default: + } + } else if event.Type == copilot.SessionEventTypeSessionIdle { + select { + case idle <- struct{}{}: + default: + } + } + }) + + _, err = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Deploy the service using deploy_service. If it's rejected, tell me it was 'rejected by policy'.", + }) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + select { + case d := <-toolCompleted: + if !toolHandlerCalled { + t.Error("Tool handler should have been called") + } + if d.Success { + t.Error("Expected Success=false for rejected tool result") + } + if d.Error == nil { + t.Error("Expected non-nil Error for rejected tool result") + } else { + if d.Error.Code == nil || *d.Error.Code != "rejected" { + t.Errorf("Expected error code 'rejected', got %v", d.Error.Code) + } + if !strings.Contains(d.Error.Message, "Deployment rejected") { + t.Errorf("Expected error message to contain 'Deployment rejected', got %q", d.Error.Message) + } + } + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for tool execution complete") + } + + // Rejected tool results may end the turn without a follow-up assistant message. + select { + case <-idle: + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for session idle") + } + _ = session.Disconnect() + }) + + t.Run("should handle tool result with denied resulttype", func(t *testing.T) { + ctx.ConfigureForTest(t) + + toolHandlerCalled := false + toolCompleted := make(chan *copilot.ToolExecutionCompleteData, 1) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + { + Name: "access_secret", + Description: "Accesses a secret", + Handler: func(inv copilot.ToolInvocation) (copilot.ToolResult, error) { + toolHandlerCalled = true + return copilot.ToolResult{ + TextResultForLLM: "Access denied: insufficient permissions to read secrets", + ResultType: "denied", + }, nil + }, + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + session.On(func(event copilot.SessionEvent) { + if d, ok := event.Data.(*copilot.ToolExecutionCompleteData); ok { + select { + case toolCompleted <- d: + default: + } + } + }) + + _, err = session.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Use access_secret to get the API key. If access is denied, tell me it was 'access denied'.", + }) + if err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + select { + case d := <-toolCompleted: + if !toolHandlerCalled { + t.Error("Tool handler should have been called") + } + if d.Success { + t.Error("Expected Success=false for denied tool result") + } + if d.Error == nil { + t.Error("Expected non-nil Error for denied tool result") + } else { + if d.Error.Code == nil || *d.Error.Code != "denied" { + t.Errorf("Expected error code 'denied', got %v", d.Error.Code) + } + if !strings.Contains(d.Error.Message, "Access denied") { + t.Errorf("Expected error message to contain 'Access denied', got %q", d.Error.Message) + } + } + case <-time.After(60 * time.Second): + t.Fatal("Timed out waiting for tool execution complete") + } + + answer, err := testharness.GetFinalAssistantMessage(t.Context(), session) + if err != nil { + t.Fatalf("Failed to get final assistant message: %v", err) + } + if answer == nil { + t.Error("Expected non-nil final assistant message") + } + + if err := session.Disconnect(); err != nil { + t.Errorf("Failed to disconnect session: %v", err) + } + }) } diff --git a/go/internal/e2e/tools_e2e_test.go b/go/internal/e2e/tools_e2e_test.go index c795ef8dd..4f2fbf802 100644 --- a/go/internal/e2e/tools_e2e_test.go +++ b/go/internal/e2e/tools_e2e_test.go @@ -314,6 +314,132 @@ func TestToolsE2E(t *testing.T) { } }) + t.Run("should execute multiple custom tools in parallel single turn", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type CityParams struct { + City string `json:"city" jsonschema:"City name"` + } + type CountryParams struct { + Country string `json:"country" jsonschema:"Country name"` + } + + cityCalled := make(chan string, 1) + countryCalled := make(chan string, 1) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + copilot.DefineTool("lookup_city", "Looks up city information", + func(params CityParams, inv copilot.ToolInvocation) (string, error) { + select { + case cityCalled <- params.City: + default: + } + return "CITY_" + strings.ToUpper(params.City), nil + }), + copilot.DefineTool("lookup_country", "Looks up country information", + func(params CountryParams, inv copilot.ToolInvocation) (string, error) { + select { + case countryCalled <- params.Country: + default: + } + return "COUNTRY_" + strings.ToUpper(params.Country), nil + }), + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + answer, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Use lookup_city with 'Paris' and lookup_country with 'France' at the same time, then combine both results in your reply.", + }) + if err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + // Verify both tools were called + var cityArg, countryArg string + select { + case cityArg = <-cityCalled: + default: + } + select { + case countryArg = <-countryCalled: + default: + } + + if cityArg == "" { + t.Error("lookup_city tool was not called") + } + if countryArg == "" { + t.Error("lookup_country tool was not called") + } + + if answer == nil { + t.Error("Expected non-nil assistant message") + } else if md, ok := answer.Data.(*copilot.AssistantMessageData); !ok { + t.Error("Expected AssistantMessageData") + } else { + if !strings.Contains(md.Content, "CITY_PARIS") { + t.Errorf("Expected content to contain 'CITY_PARIS', got %q", md.Content) + } + if !strings.Contains(md.Content, "COUNTRY_FRANCE") { + t.Errorf("Expected content to contain 'COUNTRY_FRANCE', got %q", md.Content) + } + } + }) + + t.Run("should respect availabletools and excludedtools combined", func(t *testing.T) { + ctx.ConfigureForTest(t) + + type InputParams struct { + Input string `json:"input" jsonschema:"Input value"` + } + + excludedToolCalled := false + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + Tools: []copilot.Tool{ + copilot.DefineTool("allowed_tool", "An allowed tool", + func(params InputParams, inv copilot.ToolInvocation) (string, error) { + return "ALLOWED_" + strings.ToUpper(params.Input), nil + }), + copilot.DefineTool("excluded_tool", "A tool that should be excluded", + func(params InputParams, inv copilot.ToolInvocation) (string, error) { + excludedToolCalled = true + return "EXCLUDED_" + strings.ToUpper(params.Input), nil + }), + }, + AvailableTools: []string{"allowed_tool", "excluded_tool"}, + ExcludedTools: []string{"excluded_tool"}, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + answer, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Use the allowed_tool with input 'test'. Do NOT use excluded_tool.", + }) + if err != nil { + t.Fatalf("SendAndWait failed: %v", err) + } + + if answer == nil { + t.Error("Expected non-nil assistant message") + } else if md, ok := answer.Data.(*copilot.AssistantMessageData); !ok { + t.Error("Expected AssistantMessageData") + } else if !strings.Contains(md.Content, "ALLOWED_TEST") { + t.Errorf("Expected content to contain 'ALLOWED_TEST', got %q", md.Content) + } + + if excludedToolCalled { + t.Error("Excluded tool should not have been called") + } + }) + t.Run("overrides built-in tool with custom tool", func(t *testing.T) { ctx.ConfigureForTest(t) diff --git a/nodejs/src/client.ts b/nodejs/src/client.ts index 3f4f702d0..9b6939489 100644 --- a/nodejs/src/client.ts +++ b/nodejs/src/client.ts @@ -1129,7 +1129,11 @@ export class CopilotClient { ); serverVersion = result.protocolVersion; } catch (err) { - if (err instanceof ResponseError && err.code === ErrorCodes.MethodNotFound) { + if ( + err instanceof ResponseError && + (err.code === ErrorCodes.MethodNotFound || + err.message === "Unhandled method connect") + ) { // Legacy server without `connect`; fall back to `ping`. A token, if any, // is silently dropped — the legacy server can't enforce one. serverVersion = (await raceAgainstExit(this.ping())).protocolVersion; diff --git a/nodejs/test/e2e/abort.e2e.test.ts b/nodejs/test/e2e/abort.e2e.test.ts new file mode 100644 index 000000000..87d91fc5e --- /dev/null +++ b/nodejs/test/e2e/abort.e2e.test.ts @@ -0,0 +1,135 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { approveAll, defineTool } from "../../src/index.js"; +import { createSdkTestContext } from "./harness/sdkTestContext.js"; + +describe("Abort", async () => { + const { copilotClient: client } = await createSdkTestContext(); + const TEST_TIMEOUT_MS = 120_000; + + async function withTimeout(promise: Promise, ms: number, label: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(`Timeout: ${label}`)), ms); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } + } + + it("should abort during active streaming", { timeout: TEST_TIMEOUT_MS }, async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + streaming: true, + }); + + let firstDeltaResolve!: (value: void) => void; + const firstDeltaReceived = new Promise((resolve) => { + firstDeltaResolve = resolve; + }); + + const events: { type: string }[] = []; + session.on((event) => { + events.push({ type: event.type }); + if (event.type === "assistant.message_delta") { + firstDeltaResolve(); + } + }); + + // Fire-and-forget — we'll abort before it finishes + void session.send({ + prompt: "Write a very long essay about the history of computing, covering every decade from the 1940s to the 2020s in great detail.", + }); + + // Wait for at least one delta to arrive (proves streaming started) + await withTimeout(firstDeltaReceived, 60_000, "first assistant.message_delta"); + + const deltaEvents = events.filter((e) => e.type === "assistant.message_delta"); + expect(deltaEvents.length).toBeGreaterThanOrEqual(1); + + // Abort mid-stream + await session.abort(); + + // Session should be usable after abort — send a follow-up and get a response + const followUp = await session.sendAndWait({ + prompt: "Say 'abort_recovery_ok'.", + }); + expect(followUp?.data.content?.toLowerCase()).toContain("abort_recovery_ok"); + + await session.disconnect(); + }); + + it("should abort during active tool execution", { timeout: TEST_TIMEOUT_MS }, async () => { + let toolStartedResolve!: (value: string) => void; + const toolStarted = new Promise((resolve) => { + toolStartedResolve = resolve; + }); + + let releaseToolResolve!: (value: string) => void; + const releaseTool = new Promise((resolve) => { + releaseToolResolve = resolve; + }); + + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("slow_analysis", { + description: "A slow analysis tool that blocks until released", + parameters: z.object({ + value: z.string().describe("Value to analyze"), + }), + handler: async ({ value }) => { + toolStartedResolve(value); + return await releaseTool; + }, + }), + ], + }); + + // Fire-and-forget + void session.send({ + prompt: "Use slow_analysis with value 'test_abort'. Wait for the result.", + }); + + // Wait for the tool to start executing + const toolValue = await withTimeout(toolStarted, 60_000, "slow_analysis start"); + expect(toolValue).toBe("test_abort"); + + // Abort while the tool is running + await session.abort(); + + // Release the tool so its task doesn't leak + releaseToolResolve("RELEASED_AFTER_ABORT"); + + // Session should be usable after abort — verify with a follow-up + let recoveryResolve!: (value: void) => void; + const recoveryReceived = new Promise((resolve) => { + recoveryResolve = resolve; + }); + + session.on((event) => { + if ( + event.type === "assistant.message" && + event.data.content?.includes("tool_abort_recovery_ok") + ) { + recoveryResolve(); + } + }); + + void session.send({ + prompt: "Say 'tool_abort_recovery_ok'.", + }); + + await withTimeout(recoveryReceived, 60_000, "tool abort recovery message"); + + await session.disconnect(); + }); +}); diff --git a/nodejs/test/e2e/client_api.e2e.test.ts b/nodejs/test/e2e/client_api.e2e.test.ts index 3cad926c5..4adaad6ec 100644 --- a/nodejs/test/e2e/client_api.e2e.test.ts +++ b/nodejs/test/e2e/client_api.e2e.test.ts @@ -9,6 +9,15 @@ import { createSdkTestContext } from "./harness/sdkTestContext.js"; describe("Client session management", async () => { const { copilotClient: client } = await createSdkTestContext(); + async function waitFor(predicate: () => Promise, timeoutMs = 10_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (await predicate()) return; + await new Promise((resolve) => setTimeout(resolve, 50)); + } + throw new Error(`Condition was not met within ${timeoutMs}ms`); + } + async function assertFailure( action: () => Promise, expectedMessage: string @@ -20,17 +29,27 @@ describe("Client session management", async () => { }); } + it("should get null last session id before any sessions exist", async () => { + await client.start(); + + const result = await client.getLastSessionId(); + expect(result).toBeFalsy(); + }); + it("should delete session by id", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); const sessionId = session.sessionId; await session.sendAndWait({ prompt: "Say OK." }); + await waitFor(async () => + (await client.listSessions()).some((s) => s.sessionId === sessionId) + ); await session.disconnect(); await client.deleteSession(sessionId); const metadata = await client.getSessionMetadata(sessionId); expect(metadata).toBeFalsy(); - }); + }, 60_000); it("should report error when deleting unknown session id", async () => { await client.start(); @@ -42,13 +61,6 @@ describe("Client session management", async () => { ); }); - it("should get null last session id before any sessions exist", async () => { - await client.start(); - - const result = await client.getLastSessionId(); - expect(result).toBeFalsy(); - }); - it("should track last session id after session created", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); await session.sendAndWait({ prompt: "Say OK." }); diff --git a/nodejs/test/e2e/client_lifecycle.e2e.test.ts b/nodejs/test/e2e/client_lifecycle.e2e.test.ts index 737ae3347..d85a67531 100644 --- a/nodejs/test/e2e/client_lifecycle.e2e.test.ts +++ b/nodejs/test/e2e/client_lifecycle.e2e.test.ts @@ -155,4 +155,52 @@ describe("Client Lifecycle", async () => { unsubscribeActive(); } }); + + it("should receive session updated lifecycle event for non ephemeral activity", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + + const updated = deferred(); + const unsubscribe = client.on("session.updated", (evt) => { + if (evt.sessionId === session.sessionId) { + updated.resolve(evt); + } + }); + + try { + // Setting a non-ephemeral mode triggers a session.updated lifecycle event + await session.rpc.mode.set({ mode: "plan" }); + + const evt = await withTimeout(updated.promise, 10_000, "session.updated"); + expect(evt.type).toBe("session.updated"); + expect(evt.sessionId).toBe(session.sessionId); + } finally { + unsubscribe(); + await session.disconnect(); + } + }); + + it("should receive session deleted lifecycle event when deleted", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + + // Make an LLM call first to ensure the session is persisted + const message = await session.sendAndWait({ prompt: "Say SESSION_DELETED_OK exactly." }); + expect(message?.data.content).toContain("SESSION_DELETED_OK"); + + const deleted = deferred(); + const unsubscribe = client.on("session.deleted", (evt) => { + if (evt.sessionId === session.sessionId) { + deleted.resolve(evt); + } + }); + + try { + await client.deleteSession(session.sessionId); + + const evt = await withTimeout(deleted.promise, 10_000, "session.deleted"); + expect(evt.type).toBe("session.deleted"); + expect(evt.sessionId).toBe(session.sessionId); + } finally { + unsubscribe(); + } + }); }); diff --git a/nodejs/test/e2e/event_fidelity.e2e.test.ts b/nodejs/test/e2e/event_fidelity.e2e.test.ts index 7cd65b6fc..2161fa877 100644 --- a/nodejs/test/e2e/event_fidelity.e2e.test.ts +++ b/nodejs/test/e2e/event_fidelity.e2e.test.ts @@ -7,6 +7,7 @@ import { join } from "path"; import { describe, expect, it } from "vitest"; import { SessionEvent, approveAll } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext"; +import { getFinalAssistantMessage, getNextEventOfType } from "./harness/sdkTestHelper.js"; describe("Event Fidelity", async () => { const { copilotClient: client, workDir } = await createSdkTestContext(); @@ -131,4 +132,106 @@ describe("Event Fidelity", async () => { await session.disconnect(); }); + + it("should emit assistant usage event after model call", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + const events: SessionEvent[] = []; + session.on((event) => { + events.push(event); + }); + + await session.sendAndWait({ + prompt: "What is 5+5? Reply with just the number.", + }); + + const usageEvent = [...events].reverse().find((e) => e.type === "assistant.usage"); + expect(usageEvent).toBeDefined(); + expect(typeof usageEvent!.data.model).toBe("string"); + expect((usageEvent!.data.model as string).length).toBeGreaterThan(0); + expect(usageEvent!.id).toBeDefined(); + expect(typeof usageEvent!.id).toBe("string"); + expect(usageEvent!.timestamp).toBeDefined(); + expect(typeof usageEvent!.timestamp).toBe("string"); + + await session.disconnect(); + }); + + it("should emit session usage info event after model call", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + const events: SessionEvent[] = []; + session.on((event) => { + events.push(event); + }); + + await session.sendAndWait({ + prompt: "What is 5+5? Reply with just the number.", + }); + + const usageInfoEvent = [...events].reverse().find((e) => e.type === "session.usage_info"); + expect(usageInfoEvent).toBeDefined(); + expect(usageInfoEvent!.data.currentTokens).toBeGreaterThan(0); + expect(usageInfoEvent!.data.messagesLength).toBeGreaterThan(0); + expect(usageInfoEvent!.data.tokenLimit).toBeGreaterThan(0); + + await session.disconnect(); + }); + + it("should emit pending messages modified event when message queue changes", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + + const pendingModifiedP = getNextEventOfType(session, "pending_messages.modified"); + + void session.send({ + prompt: "What is 9+9? Reply with just the number.", + }); + + const [pendingEvent, answer] = await Promise.all([ + pendingModifiedP, + getFinalAssistantMessage(session), + ]); + + expect(pendingEvent).toBeDefined(); + expect(answer?.data.content).toContain("18"); + + await session.disconnect(); + }); + + it("should preserve message order in getMessages after tool use", async () => { + await writeFile(join(workDir, "order.txt"), "ORDER_CONTENT_42"); + + const session = await client.createSession({ onPermissionRequest: approveAll }); + + await session.sendAndWait({ + prompt: "Read the file 'order.txt' and tell me what the number is.", + }); + + const messages = await session.getMessages(); + const types = messages.map((m) => m.type); + + const sessionStartIdx = types.indexOf("session.start"); + const userMsgIdx = types.indexOf("user.message"); + const toolStartIdx = types.indexOf("tool.execution_start"); + const toolCompleteIdx = types.indexOf("tool.execution_complete"); + const assistantMsgIdx = types.lastIndexOf("assistant.message"); + + expect(sessionStartIdx).toBeGreaterThanOrEqual(0); + expect(userMsgIdx).toBeGreaterThanOrEqual(0); + expect(toolStartIdx).toBeGreaterThanOrEqual(0); + expect(toolCompleteIdx).toBeGreaterThanOrEqual(0); + expect(assistantMsgIdx).toBeGreaterThanOrEqual(0); + + expect(sessionStartIdx).toBeLessThan(userMsgIdx); + expect(userMsgIdx).toBeLessThan(toolStartIdx); + expect(toolStartIdx).toBeLessThan(toolCompleteIdx); + expect(toolCompleteIdx).toBeLessThan(assistantMsgIdx); + + const userEvent = messages.find((m) => m.type === "user.message"); + expect(userEvent?.data.content).toContain("order.txt"); + + const assistantEvents = messages.filter((m) => m.type === "assistant.message"); + const lastAssistant = assistantEvents[assistantEvents.length - 1]!; + expect(lastAssistant.data.content).toContain("42"); + + await session.disconnect(); + }); }); diff --git a/nodejs/test/e2e/multi-client.e2e.test.ts b/nodejs/test/e2e/multi-client.e2e.test.ts index 14e1a3754..4a6c5a0d4 100644 --- a/nodejs/test/e2e/multi-client.e2e.test.ts +++ b/nodejs/test/e2e/multi-client.e2e.test.ts @@ -23,11 +23,45 @@ describe("Multi-client broadcast", async () => { const actualPort = (client1 as unknown as { actualPort: number }).actualPort; let client2 = new CopilotClient({ cliUrl: `localhost:${actualPort}`, tcpConnectionToken }); + const EVENT_TIMEOUT_MS = 30_000; afterAll(async () => { await client2.stop(); }); + async function withTimeout(promise: Promise, ms: number, label: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(`Timeout: ${label}`)), ms); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } + } + + function waitForEvent( + session: { on: (handler: (event: SessionEvent) => void) => () => void }, + type: SessionEvent["type"], + label: string + ): Promise { + return withTimeout( + new Promise((resolve) => { + const unsub = session.on((event) => { + if (event.type === type) { + unsub(); + resolve(event); + } + }); + }), + EVENT_TIMEOUT_MS, + label + ); + } + it("both clients see tool request and completion events", async () => { const tool = defineTool("magic_number", { description: "Returns a magic number", @@ -49,20 +83,26 @@ describe("Multi-client broadcast", async () => { }); // Set up event waiters BEFORE sending the prompt to avoid race conditions - const waitForEvent = (session: typeof session1, type: string) => - new Promise((resolve) => { - const unsub = session.on((event) => { - if (event.type === type) { - unsub(); - resolve(event); - } - }); - }); - - const client1RequestedP = waitForEvent(session1, "external_tool.requested"); - const client2RequestedP = waitForEvent(session2, "external_tool.requested"); - const client1CompletedP = waitForEvent(session1, "external_tool.completed"); - const client2CompletedP = waitForEvent(session2, "external_tool.completed"); + const client1RequestedP = waitForEvent( + session1, + "external_tool.requested", + "client1 external_tool.requested" + ); + const client2RequestedP = waitForEvent( + session2, + "external_tool.requested", + "client2 external_tool.requested" + ); + const client1CompletedP = waitForEvent( + session1, + "external_tool.completed", + "client1 external_tool.completed" + ); + const client2CompletedP = waitForEvent( + session2, + "external_tool.completed", + "client2 external_tool.completed" + ); // Send a prompt that triggers the custom tool const response = await session1.sendAndWait({ @@ -96,18 +136,31 @@ describe("Multi-client broadcast", async () => { }, }); - // Client 2 resumes the same session — its handler never resolves, - // so only client 1's approval takes effect (no race) + // Client 2 observes the permission request but leaves the decision to client 1. const session2 = await client2.resumeSession(session1.sessionId, { - onPermissionRequest: () => new Promise(() => {}), + onPermissionRequest: () => ({ kind: "no-result" as const }), }); - // Track events seen by each client - const client1Events: SessionEvent[] = []; - const client2Events: SessionEvent[] = []; - - session1.on((event) => client1Events.push(event)); - session2.on((event) => client2Events.push(event)); + const client1PermRequestedP = waitForEvent( + session1, + "permission.requested", + "client1 permission.requested" + ); + const client2PermRequestedP = waitForEvent( + session2, + "permission.requested", + "client2 permission.requested" + ); + const client1PermCompletedP = waitForEvent( + session1, + "permission.completed", + "client1 permission.completed" + ); + const client2PermCompletedP = waitForEvent( + session2, + "permission.completed", + "client2 permission.completed" + ); // Send a prompt that triggers a write operation (requires permission) const response = await session1.sendAndWait({ @@ -120,23 +173,15 @@ describe("Multi-client broadcast", async () => { expect(client1PermissionRequests.length).toBeGreaterThan(0); // Both clients should have seen permission.requested events - const client1PermRequested = client1Events.filter((e) => e.type === "permission.requested"); - const client2PermRequested = client2Events.filter((e) => e.type === "permission.requested"); - expect(client1PermRequested.length).toBeGreaterThan(0); - expect(client2PermRequested.length).toBeGreaterThan(0); + await client1PermRequestedP; + await client2PermRequestedP; // Both clients should have seen permission.completed events with approved result - const client1PermCompleted = client1Events.filter( - (e): e is SessionEvent & { type: "permission.completed" } => - e.type === "permission.completed" - ); - const client2PermCompleted = client2Events.filter( - (e): e is SessionEvent & { type: "permission.completed" } => - e.type === "permission.completed" - ); - expect(client1PermCompleted.length).toBeGreaterThan(0); - expect(client2PermCompleted.length).toBeGreaterThan(0); - for (const event of [...client1PermCompleted, ...client2PermCompleted]) { + const client1PermCompleted = await client1PermCompletedP; + const client2PermCompleted = await client2PermCompletedP; + for (const event of [client1PermCompleted, client2PermCompleted]) { + expect(event.type).toBe("permission.completed"); + if (event.type !== "permission.completed") continue; expect(event.data.result.kind).toBe("approved"); } @@ -149,16 +194,31 @@ describe("Multi-client broadcast", async () => { onPermissionRequest: () => ({ kind: "reject" as const }), }); - // Client 2 resumes — its handler never resolves so only client 1's denial takes effect + // Client 2 observes the permission request but leaves the decision to client 1. const session2 = await client2.resumeSession(session1.sessionId, { - onPermissionRequest: () => new Promise(() => {}), + onPermissionRequest: () => ({ kind: "no-result" as const }), }); - const client1Events: SessionEvent[] = []; - const client2Events: SessionEvent[] = []; - - session1.on((event) => client1Events.push(event)); - session2.on((event) => client2Events.push(event)); + const client1PermRequestedP = waitForEvent( + session1, + "permission.requested", + "client1 permission.requested" + ); + const client2PermRequestedP = waitForEvent( + session2, + "permission.requested", + "client2 permission.requested" + ); + const client1PermCompletedP = waitForEvent( + session1, + "permission.completed", + "client1 permission.completed" + ); + const client2PermCompletedP = waitForEvent( + session2, + "permission.completed", + "client2 permission.completed" + ); // Ask the agent to write a file (requires permission) const { writeFile } = await import("fs/promises"); @@ -176,25 +236,15 @@ describe("Multi-client broadcast", async () => { expect(content).toBe("protected content"); // Both clients should have seen permission.requested and permission.completed - expect( - client1Events.filter((e) => e.type === "permission.requested").length - ).toBeGreaterThan(0); - expect( - client2Events.filter((e) => e.type === "permission.requested").length - ).toBeGreaterThan(0); + await client1PermRequestedP; + await client2PermRequestedP; // Both clients should see the denial in the completed event - const client1PermCompleted = client1Events.filter( - (e): e is SessionEvent & { type: "permission.completed" } => - e.type === "permission.completed" - ); - const client2PermCompleted = client2Events.filter( - (e): e is SessionEvent & { type: "permission.completed" } => - e.type === "permission.completed" - ); - expect(client1PermCompleted.length).toBeGreaterThan(0); - expect(client2PermCompleted.length).toBeGreaterThan(0); - for (const event of [...client1PermCompleted, ...client2PermCompleted]) { + const client1PermCompleted = await client1PermCompletedP; + const client2PermCompleted = await client2PermCompletedP; + for (const event of [client1PermCompleted, client2PermCompleted]) { + expect(event.type).toBe("permission.completed"); + if (event.type !== "permission.completed") continue; expect(event.data.result.kind).toBe("denied-interactively-by-user"); } diff --git a/nodejs/test/e2e/multi_turn.e2e.test.ts b/nodejs/test/e2e/multi_turn.e2e.test.ts index 111fe0970..4b4a3d616 100644 --- a/nodejs/test/e2e/multi_turn.e2e.test.ts +++ b/nodejs/test/e2e/multi_turn.e2e.test.ts @@ -5,21 +5,111 @@ import { writeFile } from "fs/promises"; import { join } from "path"; import { describe, expect, it } from "vitest"; -import { approveAll } from "../../src/index.js"; +import { SessionEvent, approveAll } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext"; describe("Multi-turn Tool Usage", async () => { const { copilotClient: client, workDir } = await createSdkTestContext(); + function snapshotAndClearEvents(events: SessionEvent[]): SessionEvent[] { + const snapshot = [...events]; + events.length = 0; + return snapshot; + } + + function assertToolTurnOrdering(turnEvents: SessionEvent[], turnDescription: string): void { + const types = turnEvents.map((e) => e.type); + const observedTypes = types.join(", "); + + const userMsgIdx = types.indexOf("user.message"); + expect( + userMsgIdx, + `Expected user.message in ${turnDescription}. Observed: ${observedTypes}` + ).toBeGreaterThanOrEqual(0); + + const toolStarts = turnEvents + .map((e, i) => ({ e, i })) + .filter(({ e }) => e.type === "tool.execution_start"); + const toolCompletes = turnEvents + .map((e, i) => ({ e, i })) + .filter(({ e }) => e.type === "tool.execution_complete"); + + expect( + toolStarts.length, + `Expected tool starts in ${turnDescription}. Observed: ${observedTypes}` + ).toBeGreaterThan(0); + expect( + toolCompletes.length, + `Expected tool completes in ${turnDescription}. Observed: ${observedTypes}` + ).toBeGreaterThan(0); + + const firstToolStartIdx = Math.min(...toolStarts.map(({ i }) => i)); + expect( + userMsgIdx, + `Expected user.message before first tool start in ${turnDescription}. Observed: ${observedTypes}` + ).toBeLessThan(firstToolStartIdx); + + for (const { e: complete, i: completeIdx } of toolCompletes) { + const matchingStart = toolStarts.find( + ({ e: start, i: startIdx }) => + start.data.toolCallId === complete.data.toolCallId && startIdx < completeIdx + ); + expect( + matchingStart, + `Expected matching tool start for tool complete with id ${complete.data.toolCallId}` + ).toBeDefined(); + } + + const lastToolCompleteIdx = Math.max(...toolCompletes.map(({ i }) => i)); + let assistantAfterToolsIdx = -1; + for (let i = lastToolCompleteIdx + 1; i < turnEvents.length; i++) { + if (turnEvents[i]!.type === "assistant.message") { + assistantAfterToolsIdx = i; + break; + } + } + + let sessionIdleIdx = -1; + const searchFrom = assistantAfterToolsIdx >= 0 ? assistantAfterToolsIdx + 1 : 0; + for (let i = searchFrom; i < turnEvents.length; i++) { + if (turnEvents[i]!.type === "session.idle") { + sessionIdleIdx = i; + break; + } + } + + expect( + assistantAfterToolsIdx, + `Expected assistant.message after tool completion in ${turnDescription}. Observed: ${observedTypes}` + ).toBeGreaterThanOrEqual(0); + expect( + sessionIdleIdx, + `Expected session.idle after assistant.message in ${turnDescription}. Observed: ${observedTypes}` + ).toBeGreaterThanOrEqual(0); + expect( + lastToolCompleteIdx, + `Expected final tool completion before final assistant message in ${turnDescription}. Observed: ${observedTypes}` + ).toBeLessThan(assistantAfterToolsIdx); + expect( + assistantAfterToolsIdx, + `Expected final assistant message before idle in ${turnDescription}. Observed: ${observedTypes}` + ).toBeLessThan(sessionIdleIdx); + } + it("should use tool results from previous turns", async () => { // Write a file, then ask the model to read it and reason about its content await writeFile(join(workDir, "secret.txt"), "The magic number is 42."); const session = await client.createSession({ onPermissionRequest: approveAll }); + const events: SessionEvent[] = []; + session.on((event) => { + events.push(event); + }); const msg1 = await session.sendAndWait({ prompt: "Read the file 'secret.txt' and tell me what the magic number is.", }); expect(msg1?.data.content).toContain("42"); + assertToolTurnOrdering(snapshotAndClearEvents(events), "file read turn"); // Follow-up that requires context from the previous turn const msg2 = await session.sendAndWait({ @@ -30,16 +120,27 @@ describe("Multi-turn Tool Usage", async () => { it("should handle file creation then reading across turns", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); + const events: SessionEvent[] = []; + session.on((event) => { + events.push(event); + }); // First turn: create a file await session.sendAndWait({ prompt: "Create a file called 'greeting.txt' with the content 'Hello from multi-turn test'.", }); + // Verify file was created with correct content before checking ordering + const { readFile } = await import("fs/promises"); + const createdContent = await readFile(join(workDir, "greeting.txt"), "utf-8"); + expect(createdContent).toBe("Hello from multi-turn test"); + assertToolTurnOrdering(snapshotAndClearEvents(events), "file creation turn"); + // Second turn: read the file const msg = await session.sendAndWait({ prompt: "Read the file 'greeting.txt' and tell me its exact contents.", }); expect(msg?.data.content).toContain("Hello from multi-turn test"); + assertToolTurnOrdering(snapshotAndClearEvents(events), "file read turn"); }); }); diff --git a/nodejs/test/e2e/pending_work_resume.e2e.test.ts b/nodejs/test/e2e/pending_work_resume.e2e.test.ts index b81cdb5e0..eec241cd3 100644 --- a/nodejs/test/e2e/pending_work_resume.e2e.test.ts +++ b/nodejs/test/e2e/pending_work_resume.e2e.test.ts @@ -402,16 +402,6 @@ describe("Pending work resume", async () => { }); expect(resultA.success).toBe(true); - const answer = await waitWithTimeout( - getFinalAssistantMessage(session2), - PENDING_WORK_TIMEOUT_MS, - "final assistant message" - ); - - const content = answer.data.content ?? ""; - expect(content).toContain("PARALLEL_A_ALPHA"); - expect(content).toContain("PARALLEL_B_BETA"); - await session2.disconnect(); } finally { if (!releaseOriginalToolA.settled()) { @@ -464,4 +454,141 @@ describe("Pending work resume", async () => { await resumedSession.disconnect(); } ); + + it( + "should keep pending external tool handleable on warm resume when continuePendingWork is false", + { timeout: TEST_TIMEOUT_MS }, + async () => { + const originalToolStarted = deferred(); + const releaseOriginalTool = deferred(); + let invocationCount = 0; + + const server = createTcpServer(); + await server.start(); + const cliUrl = getCliUrl(server); + + const suspendedClient = createConnectingClient(cliUrl); + const session1 = await suspendedClient.createSession({ + tools: [ + defineTool("resume_external_tool", { + description: "Looks up a value after resumption", + parameters: z.object({ value: z.string() }), + handler: async ({ value }) => { + invocationCount++; + originalToolStarted.resolve(value); + return await releaseOriginalTool.promise; + }, + }), + ], + onPermissionRequest: approveAll, + }); + const sessionId = session1.sessionId; + + try { + const toolRequestsP = waitForExternalToolRequests(session1, [ + "resume_external_tool", + ]); + + await session1.send({ + prompt: "Use resume_external_tool with value 'beta', then reply with the result.", + }); + + const toolEvents = await toolRequestsP; + const toolEvent = toolEvents["resume_external_tool"]; + expect( + await waitWithTimeout( + originalToolStarted.promise, + PENDING_WORK_TIMEOUT_MS, + "originalToolStarted" + ) + ).toBe("beta"); + + await suspendedClient.forceStop(); + + const resumedClient = createConnectingClient(cliUrl); + const session2 = await resumedClient.resumeSession(sessionId, { + continuePendingWork: false, + onPermissionRequest: approveAll, + }); + + // Verify resume event has continuePendingWork: false and sessionWasActive: true + const messages = await session2.getMessages(); + const resumeEvent = messages.find((m) => m.type === "session.resume"); + expect(resumeEvent).toBeDefined(); + expect(resumeEvent!.data.continuePendingWork).toBe(false); + expect(resumeEvent!.data.sessionWasActive).toBe(true); + + // Handle the pending tool call directly via RPC + const resumedResult = await session2.rpc.tools.handlePendingToolCall({ + requestId: toolEvent.data.requestId, + result: "EXTERNAL_RESUMED_BETA", + }); + expect(resumedResult.success).toBe(true); + + const answer = await waitWithTimeout( + getFinalAssistantMessage(session2), + PENDING_WORK_TIMEOUT_MS, + "final assistant message" + ); + + expect(invocationCount).toBe(1); + expect(answer.data.content ?? "").toContain("EXTERNAL_RESUMED_BETA"); + + await session2.disconnect(); + } finally { + if (!releaseOriginalTool.settled()) { + releaseOriginalTool.resolve("ORIGINAL_SHOULD_NOT_WIN"); + } + } + } + ); + + it( + "should report continuePendingWork true in resume event", + { timeout: TEST_TIMEOUT_MS }, + async () => { + const server = createTcpServer(); + await server.start(); + const cliUrl = getCliUrl(server); + + let sessionId: string; + { + const firstClient = createConnectingClient(cliUrl); + const firstSession = await firstClient.createSession({ + onPermissionRequest: approveAll, + }); + sessionId = firstSession.sessionId; + + const firstAnswer = await firstSession.sendAndWait({ + prompt: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_ONE", + }); + expect(firstAnswer?.data.content ?? "").toContain( + "CONTINUE_PENDING_WORK_TRUE_TURN_ONE" + ); + + await firstSession.disconnect(); + await firstClient.forceStop(); + } + + const resumedClient = createConnectingClient(cliUrl); + const resumedSession = await resumedClient.resumeSession(sessionId, { + continuePendingWork: true, + onPermissionRequest: approveAll, + }); + + // Verify resume event has continuePendingWork: true and sessionWasActive: false + const messages = await resumedSession.getMessages(); + const resumeEvent = messages.find((m) => m.type === "session.resume"); + expect(resumeEvent).toBeDefined(); + expect(resumeEvent!.data.continuePendingWork).toBe(true); + expect(resumeEvent!.data.sessionWasActive).toBe(false); + + const followUp = await resumedSession.sendAndWait({ + prompt: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_TWO", + }); + expect(followUp?.data.content ?? "").toContain("CONTINUE_PENDING_WORK_TRUE_TURN_TWO"); + + await resumedSession.disconnect(); + } + ); }); diff --git a/nodejs/test/e2e/per_session_auth.e2e.test.ts b/nodejs/test/e2e/per_session_auth.e2e.test.ts index 4cab9eb44..8ba753069 100644 --- a/nodejs/test/e2e/per_session_auth.e2e.test.ts +++ b/nodejs/test/e2e/per_session_auth.e2e.test.ts @@ -48,7 +48,7 @@ describe("Per-session GitHub auth", async () => { expect(authStatus.copilotPlan).toBe("individual_pro"); await session.disconnect(); - }); + }, 60_000); it("should isolate auth between sessions with different tokens", async () => { const sessionA = await client.createSession({ diff --git a/nodejs/test/e2e/permissions.e2e.test.ts b/nodejs/test/e2e/permissions.e2e.test.ts index c70861bdf..bf60a19aa 100644 --- a/nodejs/test/e2e/permissions.e2e.test.ts +++ b/nodejs/test/e2e/permissions.e2e.test.ts @@ -5,9 +5,15 @@ import { readFile, writeFile } from "fs/promises"; import { join } from "path"; import { describe, expect, it } from "vitest"; -import type { PermissionRequest, PermissionRequestResult } from "../../src/index.js"; -import { approveAll } from "../../src/index.js"; +import { z } from "zod"; +import type { + PermissionRequest, + PermissionRequestResult, + ToolResultObject, +} from "../../src/index.js"; +import { approveAll, defineTool } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; +import { getFinalAssistantMessage, getNextEventOfType } from "./harness/sdkTestHelper.js"; describe("Permission callbacks", async () => { const { copilotClient: client, workDir } = await createSdkTestContext(); @@ -135,8 +141,7 @@ describe("Permission callbacks", async () => { onPermissionRequest: async (request, _invocation) => { permissionRequests.push(request); - // Simulate async permission check (e.g., user prompt) - await new Promise((resolve) => setTimeout(resolve, 10)); + await Promise.resolve(); return { kind: "approve-once" }; }, @@ -216,4 +221,218 @@ describe("Permission callbacks", async () => { await session.disconnect(); }); + + it("should wait for slow permission handler", async () => { + let handlerStartedResolve: () => void; + let releaseHandler: () => void; + let targetToolCallId: string | undefined; + + const handlerStarted = new Promise((resolve) => { + let resolved = false; + handlerStartedResolve = () => { + if (!resolved) { + resolved = true; + resolve(); + } + }; + }); + const handlerGate = new Promise((resolve) => { + releaseHandler = resolve; + }); + + let permissionCount = 0; + const lifecycle: Array<{ phase: string; toolCallId?: string }> = []; + + const session = await client.createSession({ + onPermissionRequest: async ( + request: PermissionRequest + ): Promise => { + permissionCount++; + targetToolCallId = request.toolCallId; + lifecycle.push({ phase: "permission-start", toolCallId: request.toolCallId }); + handlerStartedResolve!(); + await handlerGate; + lifecycle.push({ phase: "permission-complete", toolCallId: request.toolCallId }); + return { kind: "approve-once" }; + }, + }); + session.on((event) => { + if (event.type === "tool.execution_start") { + lifecycle.push({ phase: "tool-start", toolCallId: event.data.toolCallId }); + } else if (event.type === "tool.execution_complete") { + lifecycle.push({ phase: "tool-complete", toolCallId: event.data.toolCallId }); + } + }); + + const sessionDone = getFinalAssistantMessage(session); + + void session.send({ prompt: "Run 'echo slow_handler_test'" }); + + // Wait for permission handler to be invoked + await handlerStarted; + expect( + lifecycle.some( + (entry) => + entry.phase === "tool-complete" && + (!targetToolCallId || entry.toolCallId === targetToolCallId) + ) + ).toBe(false); + + // Handler is blocked — release it now + releaseHandler!(); + + const answer = await sessionDone; + expect(answer.data.content).toContain("slow_handler_test"); + expect(permissionCount).toBe(1); + const permissionCompleteIndex = lifecycle.findIndex( + (entry) => + entry.phase === "permission-complete" && + (!targetToolCallId || entry.toolCallId === targetToolCallId) + ); + const toolCompleteIndex = lifecycle.findIndex( + (entry) => + entry.phase === "tool-complete" && + (!targetToolCallId || entry.toolCallId === targetToolCallId) + ); + expect(permissionCompleteIndex).toBeGreaterThanOrEqual(0); + expect(toolCompleteIndex).toBeGreaterThanOrEqual(0); + expect(permissionCompleteIndex).toBeLessThan(toolCompleteIndex); + + await session.disconnect(); + }); + + it("should handle concurrent permission requests from parallel tools", async () => { + let resolveFirst: (() => void) | undefined; + let resolveSecond: (() => void) | undefined; + const firstArrived = new Promise((r) => (resolveFirst = r)); + const secondArrived = new Promise((r) => (resolveSecond = r)); + let requestCount = 0; + let firstToolCalled = false; + let secondToolCalled = false; + const permissionRequests: Array = []; + const toolCompletions: string[] = []; + + const session = await client.createSession({ + tools: [ + defineTool("first_permission_tool", { + description: "First concurrent permission test tool", + parameters: z.object({}), + handler: async (): Promise => { + firstToolCalled = true; + return { + textResultForLlm: + "first_permission_tool completed after permission approval", + resultType: "rejected", + }; + }, + }), + defineTool("second_permission_tool", { + description: "Second concurrent permission test tool", + parameters: z.object({}), + handler: async (): Promise => { + secondToolCalled = true; + return { + textResultForLlm: + "second_permission_tool completed after permission approval", + resultType: "rejected", + }; + }, + }), + ], + availableTools: ["first_permission_tool", "second_permission_tool"], + onPermissionRequest: async ( + request: PermissionRequest + ): Promise => { + permissionRequests.push(request as PermissionRequest & { toolName?: string }); + requestCount++; + if (requestCount === 1) resolveFirst?.(); + if (requestCount === 2) resolveSecond?.(); + // Wait until both have arrived before approving + await Promise.all([firstArrived, secondArrived]); + return { kind: "approve-once" }; + }, + }); + session.on((event) => { + if (event.type === "tool.execution_complete" && event.data.error?.message) { + toolCompletions.push(event.data.error.message); + } + }); + + const idle = getNextEventOfType(session, "session.idle"); + await session.send({ + prompt: "Call both first_permission_tool and second_permission_tool in the same turn. Do not call any other tools.", + }); + await Promise.all([firstArrived, secondArrived]); + await idle; + + expect(requestCount).toBe(2); + expect( + permissionRequests.some((request) => request.toolName === "first_permission_tool") + ).toBe(true); + expect( + permissionRequests.some((request) => request.toolName === "second_permission_tool") + ).toBe(true); + expect(firstToolCalled).toBe(true); + expect(secondToolCalled).toBe(true); + expect( + toolCompletions.some((message) => + message.includes("first_permission_tool completed after permission approval") + ) + ).toBe(true); + expect( + toolCompletions.some((message) => + message.includes("second_permission_tool completed after permission approval") + ) + ).toBe(true); + + await session.disconnect(); + }); + + it("should deny permission with noresult kind", async () => { + // With no-result, the TypeScript SDK does not send any response to the CLI's permission + // request, leaving the tool execution pending. We verify the permission handler fires. + let resolvePermissionCalled!: () => void; + const permissionCalled = new Promise((resolve) => { + resolvePermissionCalled = resolve; + }); + + const session = await client.createSession({ + onPermissionRequest: (_request: PermissionRequest): PermissionRequestResult => { + resolvePermissionCalled(); + return { kind: "no-result" }; + }, + }); + + void session.send({ prompt: "Run 'node --version'" }); + + await permissionCalled; + + await session.disconnect(); + }); + + it("should short circuit permission handler when set approve all enabled", async () => { + let handlerCalled = false; + + const session = await client.createSession({ + onPermissionRequest: (_request: PermissionRequest): PermissionRequestResult => { + handlerCalled = true; + return { kind: "approve-once" }; + }, + }); + + // Enable approve-all server-side short circuit + await session.rpc.permissions.setApproveAll({ enabled: true }); + + try { + const answer = await session.sendAndWait({ + prompt: "Run 'echo test' and tell me what happens", + }); + expect(handlerCalled).toBe(false); + expect(answer?.data.content).toContain("test"); + } finally { + await session.rpc.permissions.setApproveAll({ enabled: false }); + } + + await session.disconnect(); + }); }); diff --git a/nodejs/test/e2e/rpc_event_side_effects.e2e.test.ts b/nodejs/test/e2e/rpc_event_side_effects.e2e.test.ts new file mode 100644 index 000000000..16432c7af --- /dev/null +++ b/nodejs/test/e2e/rpc_event_side_effects.e2e.test.ts @@ -0,0 +1,203 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +import { randomUUID } from "crypto"; +import { describe, expect, it } from "vitest"; +import { approveAll } from "../../src/index.js"; +import type { CopilotSession, SessionEvent } from "../../src/index.js"; +import { createSdkTestContext } from "./harness/sdkTestContext.js"; + +const EVENT_TIMEOUT_MS = 30_000; + +function waitForEvent( + session: CopilotSession, + predicate: (event: SessionEvent) => event is T, + description: string, + timeoutMs = EVENT_TIMEOUT_MS +): Promise { + return new Promise((resolve, reject) => { + let unsubscribe: () => void = () => {}; + const timer = setTimeout(() => { + unsubscribe(); + reject(new Error(`Timed out waiting for ${description}`)); + }, timeoutMs); + + unsubscribe = session.on((event) => { + if (predicate(event)) { + clearTimeout(timer); + unsubscribe(); + resolve(event); + } else if (event.type === "session.error") { + clearTimeout(timer); + unsubscribe(); + reject(new Error(`${event.data.message}\n${event.data.stack ?? ""}`)); + } + }); + }); +} + +describe("Session RPC event side effects", async () => { + const { copilotClient: client } = await createSdkTestContext(); + + it("should emit mode changed event when mode set", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + const modeChanged = waitForEvent( + session, + (event): event is Extract => + event.type === "session.mode_changed" && + event.data.newMode === "plan" && + event.data.previousMode === "interactive", + "session.mode_changed event for interactive to plan" + ); + + await session.rpc.mode.set({ mode: "plan" }); + + const event = await modeChanged; + expect(event.data.newMode).toBe("plan"); + expect(event.data.previousMode).toBe("interactive"); + } finally { + await session.disconnect(); + } + }); + + it("should emit plan changed event for update and delete", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + const created = waitForEvent( + session, + (event): event is Extract => + event.type === "session.plan_changed" && event.data.operation === "create", + "session.plan_changed create event" + ); + await session.rpc.plan.update({ content: "# Test plan\n- item" }); + expect((await created).data.operation).toBe("create"); + + const deleted = waitForEvent( + session, + (event): event is Extract => + event.type === "session.plan_changed" && event.data.operation === "delete", + "session.plan_changed delete event" + ); + await session.rpc.plan.delete(); + expect((await deleted).data.operation).toBe("delete"); + } finally { + await session.disconnect(); + } + }); + + it("should emit plan changed update operation on second update", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + await session.rpc.plan.update({ content: "# initial" }); + + const updated = waitForEvent( + session, + (event): event is Extract => + event.type === "session.plan_changed" && event.data.operation === "update", + "session.plan_changed update event" + ); + await session.rpc.plan.update({ content: "# updated content" }); + + expect((await updated).data.operation).toBe("update"); + } finally { + await session.disconnect(); + } + }); + + it("should emit workspace file changed event when file created", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + const path = `side-effect-${randomUUID()}.txt`; + const changed = waitForEvent( + session, + ( + event + ): event is Extract => + event.type === "session.workspace_file_changed" && event.data.path === path, + `session.workspace_file_changed event for ${path}` + ); + + await session.rpc.workspaces.createFile({ path, content: "hello" }); + + const event = await changed; + expect(event.data.path).toBe(path); + expect(["create", "update"]).toContain(event.data.operation); + } finally { + await session.disconnect(); + } + }); + + it("should emit title changed event when name set", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + const title = `Renamed-${randomUUID()}`; + const titleChanged = waitForEvent( + session, + (event): event is Extract => + event.type === "session.title_changed" && event.data.title === title, + "session.title_changed event after name.set" + ); + + await session.rpc.name.set({ name: title }); + + expect((await titleChanged).data.title).toBe(title); + } finally { + await session.disconnect(); + } + }); + + it("should emit snapshot rewind event and remove events on truncate", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + await session.sendAndWait({ prompt: "Say SNAPSHOT_REWIND_TARGET exactly." }); + + const messages = await session.getMessages(); + const userEvent = messages.find((event) => event.type === "user.message"); + expect(userEvent).toBeDefined(); + const targetEventId = userEvent!.id; + + const rewind = waitForEvent( + session, + (event): event is Extract => + event.type === "session.snapshot_rewind" && + event.data.upToEventId.toLowerCase() === targetEventId.toLowerCase(), + "session.snapshot_rewind event after truncate" + ); + + const truncateResult = await session.rpc.history.truncate({ eventId: targetEventId }); + expect(truncateResult.eventsRemoved).toBeGreaterThanOrEqual(1); + + const rewindEvent = await rewind; + expect(rewindEvent.data.eventsRemoved).toBe(truncateResult.eventsRemoved); + expect(rewindEvent.data.upToEventId.toLowerCase()).toBe(targetEventId.toLowerCase()); + + const messagesAfter = await session.getMessages(); + expect(messagesAfter.some((event) => event.id === targetEventId)).toBe(false); + } finally { + await session.disconnect(); + } + }); + + it("should allow session use after truncate", async () => { + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + await session.sendAndWait({ prompt: "Say SNAPSHOT_REWIND_TARGET exactly." }); + + const messages = await session.getMessages(); + const userEvent = messages.find((event) => event.type === "user.message"); + expect(userEvent).toBeDefined(); + + const truncateResult = await session.rpc.history.truncate({ eventId: userEvent!.id }); + expect(truncateResult.eventsRemoved).toBeGreaterThanOrEqual(1); + + const mode = await session.rpc.mode.get(); + expect(["interactive", "plan", "autopilot"]).toContain(mode); + const workspace = await session.rpc.workspaces.getWorkspace(); + expect(workspace.workspace).toBeDefined(); + } finally { + await session.disconnect(); + } + }); +}); diff --git a/nodejs/test/e2e/rpc_session_state.e2e.test.ts b/nodejs/test/e2e/rpc_session_state.e2e.test.ts index c21f6f8c9..8adda8ab1 100644 --- a/nodejs/test/e2e/rpc_session_state.e2e.test.ts +++ b/nodejs/test/e2e/rpc_session_state.e2e.test.ts @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +import { randomUUID } from "crypto"; import { describe, expect, it } from "vitest"; import { approveAll } from "../../src/index.js"; import type { SessionEvent } from "../../src/index.js"; @@ -206,6 +207,75 @@ describe("Session-scoped RPC", async () => { await session.disconnect(); }); + it("should fork session to event id excluding boundary event", async () => { + const firstPrompt = "Say FORK_BOUNDARY_FIRST exactly."; + const secondPrompt = "Say FORK_BOUNDARY_SECOND exactly."; + + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + await session.sendAndWait({ prompt: firstPrompt }); + await session.sendAndWait({ prompt: secondPrompt }); + + const sourceEvents = await session.getMessages(); + const secondUserEvent = sourceEvents.find( + (event) => event.type === "user.message" && event.data.content === secondPrompt + ); + expect(secondUserEvent).toBeDefined(); + const boundaryEventId = secondUserEvent!.id; + + const fork = await client.rpc.sessions.fork({ + sessionId: session.sessionId, + toEventId: boundaryEventId, + }); + expect(fork.sessionId.trim()).toBeTruthy(); + expect(fork.sessionId).not.toBe(session.sessionId); + + const forkedSession = await client.resumeSession(fork.sessionId, { + onPermissionRequest: approveAll, + }); + try { + const forkedEvents = await forkedSession.getMessages(); + expect(forkedEvents.some((event) => event.id === boundaryEventId)).toBe(false); + + const forkedConversation = getConversationMessages(forkedEvents); + expect( + forkedConversation.some((m) => m.role === "user" && m.content === firstPrompt) + ).toBe(true); + expect( + forkedConversation.some((m) => m.role === "user" && m.content === secondPrompt) + ).toBe(false); + } finally { + await forkedSession.disconnect(); + } + } finally { + await session.disconnect(); + } + }); + + it("should report error when forking session to unknown event id", async () => { + const sourcePrompt = "Say FORK_UNKNOWN_EVENT_OK exactly."; + const session = await client.createSession({ onPermissionRequest: approveAll }); + try { + await session.sendAndWait({ prompt: sourcePrompt }); + + const bogusEventId = randomUUID(); + await expect( + client.rpc.sessions.fork({ + sessionId: session.sessionId, + toEventId: bogusEventId, + }) + ).rejects.toSatisfy((err: unknown) => { + const text = + err instanceof Error ? `${err.message}\n${err.stack ?? ""}` : String(err); + expect(text.toLowerCase()).toContain(`event ${bogusEventId} not found`); + expect(text.toLowerCase()).not.toContain("unhandled method sessions.fork"); + return true; + }); + } finally { + await session.disconnect(); + } + }); + it("should call session usage and permission rpcs", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); diff --git a/nodejs/test/e2e/rpc_tasks_and_handlers.e2e.test.ts b/nodejs/test/e2e/rpc_tasks_and_handlers.e2e.test.ts index e74dfb370..6b0e5f7bf 100644 --- a/nodejs/test/e2e/rpc_tasks_and_handlers.e2e.test.ts +++ b/nodejs/test/e2e/rpc_tasks_and_handlers.e2e.test.ts @@ -37,7 +37,7 @@ describe("Session tasks RPC and pending handlers", async () => { expect(remove.removed).toBe(false); await session.disconnect(); - }); + }, 60_000); it("should report implemented error for missing task agent type", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); diff --git a/nodejs/test/e2e/session.e2e.test.ts b/nodejs/test/e2e/session.e2e.test.ts index fc1cc4c1e..50a76bdf1 100644 --- a/nodejs/test/e2e/session.e2e.test.ts +++ b/nodejs/test/e2e/session.e2e.test.ts @@ -700,7 +700,7 @@ describe("Sessions", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); await session.sendAndWait({ - prompt: "Summarize the referenced issue.", + prompt: "Using only the GitHub reference metadata in this message, summarize the reference. Do not call any tools.", // GitHub reference is a valid runtime attachment type but not part of // the public TS attachment shape; cast through unknown to forward it. attachments: [ diff --git a/nodejs/test/e2e/session_lifecycle.e2e.test.ts b/nodejs/test/e2e/session_lifecycle.e2e.test.ts index 45217a281..8b8c9f524 100644 --- a/nodejs/test/e2e/session_lifecycle.e2e.test.ts +++ b/nodejs/test/e2e/session_lifecycle.e2e.test.ts @@ -110,4 +110,43 @@ describe("Session Lifecycle", async () => { await session1.disconnect(); await session2.disconnect(); }); + + it("should isolate events between concurrent sessions", async () => { + const session1 = await client.createSession({ onPermissionRequest: approveAll }); + const session2 = await client.createSession({ onPermissionRequest: approveAll }); + + const events1: SessionEvent[] = []; + const events2: SessionEvent[] = []; + session1.on((event) => events1.push(event)); + session2.on((event) => events2.push(event)); + + const [msg1, msg2] = await Promise.all([ + session1.sendAndWait({ + prompt: "Say 'session_one_response'.", + }), + session2.sendAndWait({ + prompt: "Say 'session_two_response'.", + }), + ]); + + expect(msg1?.data.content).toContain("session_one_response"); + expect(msg2?.data.content).toContain("session_two_response"); + + // Session 1's events should not contain session 2's response text + const session1AssistantContent = events1 + .filter((e) => e.type === "assistant.message") + .map((e) => e.data.content ?? "") + .join(" "); + expect(session1AssistantContent).not.toContain("session_two_response"); + + // Session 2's events should not contain session 1's response text + const session2AssistantContent = events2 + .filter((e) => e.type === "assistant.message") + .map((e) => e.data.content ?? "") + .join(" "); + expect(session2AssistantContent).not.toContain("session_one_response"); + + await session1.disconnect(); + await session2.disconnect(); + }); }); diff --git a/nodejs/test/e2e/streaming_fidelity.e2e.test.ts b/nodejs/test/e2e/streaming_fidelity.e2e.test.ts index d91e6c5d8..88cbdf879 100644 --- a/nodejs/test/e2e/streaming_fidelity.e2e.test.ts +++ b/nodejs/test/e2e/streaming_fidelity.e2e.test.ts @@ -110,4 +110,69 @@ describe("Streaming Fidelity", async () => { await session2.disconnect(); }); + + it("should not produce deltas after session resume with streaming disabled", async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + streaming: true, + }); + await session.sendAndWait({ prompt: "What is 3 + 6?" }); + await session.disconnect(); + + // Resume using a new client with streaming DISABLED + const newClient = new CopilotClient({ + env, + gitHubToken: isCI ? "fake-token-for-e2e-tests" : undefined, + }); + onTestFinished(() => newClient.forceStop()); + const session2 = await newClient.resumeSession(session.sessionId, { + onPermissionRequest: approveAll, + streaming: false, + }); + + const events: SessionEvent[] = []; + session2.on((event) => events.push(event)); + + const answer = await session2.sendAndWait({ + prompt: "Now if you double that, what do you get?", + }); + expect(answer?.data.content).toContain("18"); + + const deltaEvents = events.filter((e) => e.type === "assistant.message_delta"); + expect(deltaEvents.length).toBe(0); + + const assistantEvents = events.filter((e) => e.type === "assistant.message"); + expect(assistantEvents.length).toBeGreaterThanOrEqual(1); + + await session2.disconnect(); + }); + + it("should emit streaming deltas with reasoning effort configured", async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + streaming: true, + reasoningEffort: "high", + }); + + const events: SessionEvent[] = []; + session.on((event) => events.push(event)); + + await session.sendAndWait({ prompt: "What is 15 * 17?" }); + + const deltaEvents = events.filter((e) => e.type === "assistant.message_delta"); + expect(deltaEvents.length).toBeGreaterThanOrEqual(1); + + const assistantEvents = events.filter((e) => e.type === "assistant.message"); + expect(assistantEvents.length).toBeGreaterThanOrEqual(1); + const lastAssistant = assistantEvents[assistantEvents.length - 1]!; + expect(lastAssistant.data.content).toContain("255"); + + // Verify the session was created with reasoning effort via getMessages + const messages = await session.getMessages(); + const startEvent = messages.find((m) => m.type === "session.start"); + expect(startEvent).toBeDefined(); + expect(startEvent!.data.reasoningEffort).toBe("high"); + + await session.disconnect(); + }); }); diff --git a/nodejs/test/e2e/tool_results.e2e.test.ts b/nodejs/test/e2e/tool_results.e2e.test.ts index 3c1b20e2f..6e8729c42 100644 --- a/nodejs/test/e2e/tool_results.e2e.test.ts +++ b/nodejs/test/e2e/tool_results.e2e.test.ts @@ -7,10 +7,25 @@ import { z } from "zod"; import type { SessionEvent, ToolResultObject } from "../../src/index.js"; import { approveAll, defineTool } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext"; +import { getNextEventOfType } from "./harness/sdkTestHelper"; describe("Tool Results", async () => { const { copilotClient: client, openAiEndpoint } = await createSdkTestContext(); + async function withTimeout(promise: Promise, ms: number, label: string): Promise { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(`Timeout: ${label}`)), ms); + }), + ]); + } finally { + if (timer) clearTimeout(timer); + } + } + it("should handle structured ToolResultObject from custom tool", async () => { const session = await client.createSession({ onPermissionRequest: approveAll, @@ -152,4 +167,85 @@ describe("Tool Results", async () => { await session.disconnect(); }); + + it("should handle tool result with rejected resulttype", async () => { + let toolHandlerCalled = false; + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("deploy_service", { + description: "Deploys a service", + parameters: z.object({}), + handler: (): ToolResultObject => { + toolHandlerCalled = true; + return { + textResultForLlm: + "Deployment rejected: policy violation - production deployments require approval", + resultType: "rejected", + }; + }, + }), + ], + }); + + const toolCompletePromise = getNextEventOfType(session, "tool.execution_complete"); + const idlePromise = getNextEventOfType(session, "session.idle"); + + await session.send({ + prompt: "Deploy the service using deploy_service. If it's rejected, tell me it was 'rejected by policy'.", + }); + + // Verify the rejected tool result is surfaced via tool.execution_complete. + const toolComplete = await withTimeout( + toolCompletePromise, + 60_000, + "rejected tool.execution_complete" + ); + expect(toolHandlerCalled).toBe(true); + if (toolComplete?.type === "tool.execution_complete") { + expect(toolComplete.data.success).toBe(false); + expect(toolComplete.data.error?.code).toBe("rejected"); + expect(toolComplete.data.error?.message).toContain("Deployment rejected"); + } + + await withTimeout(idlePromise, 60_000, "session.idle after rejected tool result"); + + await session.disconnect(); + }); + + it("should handle tool result with denied resulttype", async () => { + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("access_secret", { + description: "A tool that returns a denied result", + parameters: z.object({}), + handler: (): ToolResultObject => ({ + resultType: "denied", + textResultForLlm: "Access denied: insufficient permissions to read secrets", + }), + }), + ], + }); + + const toolCompletePromise = getNextEventOfType(session, "tool.execution_complete"); + + const answer = await session.sendAndWait({ + prompt: "Use access_secret to get the API key. If access is denied, tell me it was 'access denied'.", + }); + + const toolComplete = await withTimeout( + toolCompletePromise, + 60_000, + "denied tool.execution_complete" + ); + if (toolComplete?.type === "tool.execution_complete") { + expect(toolComplete.data.success).toBe(false); + expect(toolComplete.data.error?.code).toBe("denied"); + expect(toolComplete.data.error?.message).toContain("Access denied"); + } + expect(answer?.data.content?.toLowerCase()).toContain("access denied"); + + await session.disconnect(); + }); }); diff --git a/nodejs/test/e2e/tools.e2e.test.ts b/nodejs/test/e2e/tools.e2e.test.ts index 05b0932c8..09a041468 100644 --- a/nodejs/test/e2e/tools.e2e.test.ts +++ b/nodejs/test/e2e/tools.e2e.test.ts @@ -234,4 +234,83 @@ describe("Custom tools", async () => { // The tool handler should NOT have been called since permission was denied expect(toolHandlerCalled).toBe(false); }); + + it("should execute multiple custom tools in parallel single turn", async () => { + let lookupCityCalled = false; + let lookupCountryCalled = false; + + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("lookup_city", { + description: "Looks up city information", + parameters: z.object({ city: z.string() }), + handler: ({ city }) => { + lookupCityCalled = true; + return `CITY_${city.toUpperCase()}`; + }, + }), + defineTool("lookup_country", { + description: "Looks up country information", + parameters: z.object({ country: z.string() }), + handler: ({ country }) => { + lookupCountryCalled = true; + return `COUNTRY_${country.toUpperCase()}`; + }, + }), + ], + }); + + const answer = await session.sendAndWait({ + prompt: "Use lookup_city with 'Paris' and lookup_country with 'France' at the same time, then combine both results in your reply.", + }); + + expect(lookupCityCalled).toBe(true); + expect(lookupCountryCalled).toBe(true); + expect(answer?.data.content).toContain("CITY_PARIS"); + expect(answer?.data.content).toContain("COUNTRY_FRANCE"); + + await session.disconnect(); + }); + + it("should respect availableTools and excludedTools combined", async () => { + let allowedToolCalled = false; + let excludedToolCalled = false; + + const session = await client.createSession({ + onPermissionRequest: approveAll, + tools: [ + defineTool("allowed_tool", { + description: "A tool that is allowed", + parameters: z.object({ input: z.string() }), + handler: ({ input }) => { + allowedToolCalled = true; + return `ALLOWED_${input.toUpperCase()}`; + }, + }), + defineTool("excluded_tool", { + description: "A tool that should be excluded", + parameters: z.object({}), + handler: () => { + excludedToolCalled = true; + return "EXCLUDED_RESULT"; + }, + }), + ], + availableTools: ["allowed_tool", "excluded_tool"], + excludedTools: ["excluded_tool"], + }); + + const answer = await session.sendAndWait({ + prompt: "Use the allowed_tool with input 'test'. Do NOT use excluded_tool.", + }); + + // allowed_tool should have been called + expect(allowedToolCalled).toBe(true); + // excluded_tool should NOT have been called + expect(excludedToolCalled).toBe(false); + expect(answer?.data.content).toContain("ALLOWED_TEST"); + + await session.disconnect(); + }); }); diff --git a/python/copilot/client.py b/python/copilot/client.py index 44f244e9a..56653e2b7 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -2226,7 +2226,7 @@ async def _verify_protocol_version(self) -> None: ) server_version = connect_result.protocol_version except JsonRpcError as err: - if err.code == -32601: + if err.code == -32601 or err.message == "Unhandled method connect": # Legacy server without `connect`; fall back to `ping`. A token, if any, # is silently dropped — the legacy server can't enforce one. ping_result = await self.ping() diff --git a/python/e2e/test_abort_e2e.py b/python/e2e/test_abort_e2e.py new file mode 100644 index 000000000..6711fb114 --- /dev/null +++ b/python/e2e/test_abort_e2e.py @@ -0,0 +1,135 @@ +""" +E2E tests for session abort functionality. + +Verifies that session.abort() cleanly interrupts an active turn — both during +streaming and during tool execution — without leaving dangling state or causing +exceptions in the event delivery pipeline. + +Mirrors dotnet/test/E2E/AbortE2ETests.cs (snapshot category ``abort``). +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from copilot.session import PermissionHandler +from copilot.tools import Tool, ToolInvocation, ToolResult + +from .testharness import E2ETestContext + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +class TestAbort: + async def test_should_abort_during_active_streaming(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + streaming=True, + ) + + events = [] + first_delta: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + events.append(event) + if event.type.value == "assistant.message_delta" and not first_delta.done(): + first_delta.set_result(event) + + unsubscribe = session.on(on_event) + try: + # Fire-and-forget — we'll abort before it finishes + asyncio.ensure_future( + session.send( + "Write a very long essay about the history of computing," + " covering every decade from the 1940s to the 2020s in great detail." + ) + ) + + # Wait for at least one delta to arrive (proves streaming started) + delta = await asyncio.wait_for(first_delta, timeout=60.0) + assert delta.data.delta_content + + # Abort mid-stream + await session.abort() + + types = [e.type.value for e in events] + assert "assistant.message_delta" in types + + # Session should be in a usable state after abort + follow_up = await session.send_and_wait("Say 'abort_recovery_ok'.", timeout=60.0) + assert follow_up is not None + assert "abort_recovery_ok" in (follow_up.data.content or "").lower() + finally: + unsubscribe() + await session.disconnect() + + async def test_should_abort_during_active_tool_execution(self, ctx: E2ETestContext): + tool_started: asyncio.Future = asyncio.get_event_loop().create_future() + release_tool: asyncio.Future = asyncio.get_event_loop().create_future() + + async def slow_tool_handler(invocation: ToolInvocation) -> ToolResult: + value = (invocation.arguments or {}).get("value", "") + if not tool_started.done(): + tool_started.set_result(value) + result = await asyncio.wait_for(release_tool, timeout=60.0) + return ToolResult(text_result_for_llm=str(result)) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + tools=[ + Tool( + name="slow_analysis", + description="A slow analysis tool that blocks until released", + parameters={ + "type": "object", + "properties": { + "value": {"type": "string", "description": "Value to analyze"} + }, + "required": ["value"], + }, + handler=slow_tool_handler, + ) + ], + ) + + try: + # Fire-and-forget + asyncio.ensure_future( + session.send("Use slow_analysis with value 'test_abort'. Wait for the result.") + ) + + # Wait for the tool to start executing + tool_value = await asyncio.wait_for(tool_started, timeout=60.0) + assert tool_value == "test_abort" + + # Abort while the tool is running + await session.abort() + + # Release the tool so its task doesn't leak + if not release_tool.done(): + release_tool.set_result("RELEASED_AFTER_ABORT") + + # Session should be usable after abort + recovery_received: asyncio.Future = asyncio.get_event_loop().create_future() + + def check_recovery(event): + if ( + event.type.value == "assistant.message" + and "tool_abort_recovery_ok" in (event.data.content or "").lower() + and not recovery_received.done() + ): + recovery_received.set_result(event) + + unsubscribe = session.on(check_recovery) + try: + await session.send("Say 'tool_abort_recovery_ok'.") + recovery_message = await asyncio.wait_for(recovery_received, timeout=60.0) + assert "tool_abort_recovery_ok" in (recovery_message.data.content or "").lower() + finally: + unsubscribe() + finally: + if not release_tool.done(): + release_tool.set_result("CLEANUP") + await session.disconnect() diff --git a/python/e2e/test_client_lifecycle_e2e.py b/python/e2e/test_client_lifecycle_e2e.py index 296336ab8..f667432a5 100644 --- a/python/e2e/test_client_lifecycle_e2e.py +++ b/python/e2e/test_client_lifecycle_e2e.py @@ -23,6 +23,33 @@ pytestmark = pytest.mark.asyncio(loop_scope="module") +async def _wait_for_condition(predicate, timeout: float = 10.0) -> None: + deadline = asyncio.get_running_loop().time() + timeout + while True: + if predicate(): + return + if asyncio.get_running_loop().time() >= deadline: + raise TimeoutError("condition was not met before timeout") + await asyncio.sleep(0.05) + + +async def _wait_for_last_session_id(client) -> str: + last_id = None + + async def poll() -> bool: + nonlocal last_id + last_id = await client.get_last_session_id() + return bool(last_id) + + deadline = asyncio.get_running_loop().time() + 10.0 + while True: + if await poll(): + return last_id + if asyncio.get_running_loop().time() >= deadline: + raise TimeoutError("last session id was not persisted before timeout") + await asyncio.sleep(0.05) + + def _make_isolated_client(ctx: E2ETestContext) -> CopilotClient: """Build a client with the same isolated env as ctx.client but disjoint state. @@ -49,10 +76,8 @@ async def test_should_return_last_session_id_after_sending_a_message(self, ctx: ) try: await session.send_and_wait("Say hello") - # Allow session metadata to flush to disk. - await asyncio.sleep(0.5) - last_id = await ctx.client.get_last_session_id() + last_id = await _wait_for_last_session_id(ctx.client) assert last_id finally: await session.disconnect() @@ -66,11 +91,13 @@ async def test_should_emit_session_lifecycle_events(self, ctx: E2ETestContext): ) try: await session.send_and_wait("Say hello") - await asyncio.sleep(0.5) - if events: - matching = [e for e in events if e.sessionId == session.session_id] - assert matching, "Expected at least one lifecycle event for this session" + await _wait_for_condition( + lambda: any( + getattr(e, "sessionId", None) == session.session_id for e in events + ), + timeout=10.0, + ) finally: await session.disconnect() finally: @@ -163,3 +190,70 @@ async def test_stop_disconnects_client_and_disposes_rpc_surface(self, ctx: E2ETe with pytest.raises(RuntimeError): _ = client.rpc + + async def test_should_receive_session_updated_lifecycle_event_for_non_ephemeral_activity( + self, ctx: E2ETestContext + ): + """Changing session mode emits a session.updated lifecycle event.""" + from copilot.generated.rpc import ModeSetRequest, SessionMode + + loop = asyncio.get_event_loop() + updated: asyncio.Future = loop.create_future() + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + + def handler(event): + if ( + event.type == "session.updated" + and event.sessionId == session.session_id + and not updated.done() + ): + updated.set_result(event) + + unsubscribe = ctx.client.on(handler) + try: + await session.rpc.mode.set(ModeSetRequest(mode=SessionMode.PLAN)) + event = await asyncio.wait_for(updated, timeout=15.0) + assert event.type == "session.updated" + assert event.sessionId == session.session_id + finally: + unsubscribe() + await session.disconnect() + + async def test_should_receive_session_deleted_lifecycle_event_when_deleted( + self, ctx: E2ETestContext + ): + """Deleting a session emits a session.deleted lifecycle event.""" + loop = asyncio.get_event_loop() + deleted: asyncio.Future = loop.create_future() + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + session_id = session.session_id + + # Do a turn so the session is persisted + message = await session.send_and_wait("Say SESSION_DELETED_OK exactly.", timeout=60.0) + assert message is not None + assert "SESSION_DELETED_OK" in (message.data.content or "") + + def handler(event): + if ( + event.type == "session.deleted" + and event.sessionId == session_id + and not deleted.done() + ): + deleted.set_result(event) + + unsubscribe = ctx.client.on(handler) + try: + await session.disconnect() + await ctx.client.delete_session(session_id) + + event = await asyncio.wait_for(deleted, timeout=15.0) + assert event.type == "session.deleted" + assert event.sessionId == session_id + finally: + unsubscribe() diff --git a/python/e2e/test_event_fidelity_e2e.py b/python/e2e/test_event_fidelity_e2e.py index 001ca385f..a292247df 100644 --- a/python/e2e/test_event_fidelity_e2e.py +++ b/python/e2e/test_event_fidelity_e2e.py @@ -2,12 +2,16 @@ from __future__ import annotations +import asyncio from pathlib import Path import pytest from copilot.generated.session_events import ( AssistantMessageData, + AssistantUsageData, + PendingMessagesModifiedData, + SessionUsageInfoData, ToolExecutionCompleteData, ToolExecutionStartData, UserMessageData, @@ -127,3 +131,125 @@ async def test_should_emit_assistant_message_with_messageid(self, ctx: E2ETestCo finally: unsubscribe() await session.disconnect() + + async def test_should_emit_assistant_usage_event_after_model_call(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all + ) + events = [] + unsubscribe = session.on(events.append) + try: + await session.send_and_wait("What is 5+5? Reply with just the number.") + + usage_events = [e for e in events if isinstance(e.data, AssistantUsageData)] + assert len(usage_events) >= 1, "Expected at least one assistant.usage event" + + last_usage = usage_events[-1] + assert last_usage.id is not None + assert last_usage.timestamp is not None + assert last_usage.data.model + finally: + unsubscribe() + await session.disconnect() + + async def test_should_emit_session_usage_info_event_after_model_call(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all + ) + events = [] + unsubscribe = session.on(events.append) + try: + await session.send_and_wait("What is 5+5? Reply with just the number.") + + usage_info_events = [e for e in events if isinstance(e.data, SessionUsageInfoData)] + assert len(usage_info_events) >= 1, "Expected at least one session.usage_info event" + + last_info = usage_info_events[-1] + assert last_info.data.current_tokens > 0 + assert last_info.data.messages_length > 0 + assert last_info.data.token_limit > 0 + finally: + unsubscribe() + await session.disconnect() + + async def test_should_emit_pending_messages_modified_event_when_message_queue_changes( + self, ctx: E2ETestContext + ): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all + ) + pending_task: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if isinstance(event.data, PendingMessagesModifiedData) and not pending_task.done(): + pending_task.set_result(event) + + unsubscribe = session.on(on_event) + try: + # Fire-and-forget to trigger pending_messages.modified; then wait for it + asyncio.ensure_future(session.send("What is 9+9? Reply with just the number.")) + pending_event = await asyncio.wait_for(pending_task, timeout=60.0) + assert pending_event is not None + + from .testharness.helper import get_final_assistant_message + + answer = await get_final_assistant_message(session, timeout=60.0) + assert answer is not None + assert "18" in (answer.data.content or "") + finally: + unsubscribe() + await session.disconnect() + + async def test_should_preserve_message_order_in_getmessages_after_tool_use( + self, ctx: E2ETestContext + ): + Path(ctx.work_dir, "order.txt").write_text("ORDER_CONTENT_42", encoding="utf-8") + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all + ) + try: + await session.send_and_wait("Read the file 'order.txt' and tell me what the number is.") + + messages = await session.get_messages() + types = [m.type.value for m in messages] + + # Verify complete event ordering contract: + # session.start → user.message → tool.execution_start → tool.execution_complete + # → assistant.message + def first_index(t: str) -> int: + return types.index(t) if t in types else -1 + + def last_index(t: str) -> int: + return len(types) - 1 - types[::-1].index(t) if t in types else -1 + + session_start_idx = first_index("session.start") + user_msg_idx = first_index("user.message") + tool_start_idx = first_index("tool.execution_start") + tool_complete_idx = first_index("tool.execution_complete") + assistant_msg_idx = last_index("assistant.message") + + assert session_start_idx >= 0, "Expected session.start event" + assert user_msg_idx >= 0, "Expected user.message event" + assert tool_start_idx >= 0, "Expected tool.execution_start event" + assert tool_complete_idx >= 0, "Expected tool.execution_complete event" + assert assistant_msg_idx >= 0, "Expected assistant.message event" + + assert session_start_idx < user_msg_idx, "session.start should precede user.message" + assert user_msg_idx < tool_start_idx, "user.message should precede tool.execution_start" + assert tool_start_idx < tool_complete_idx, ( + "tool.execution_start should precede tool.execution_complete" + ) + assert tool_complete_idx < assistant_msg_idx, ( + "tool.execution_complete should precede final assistant.message" + ) + + # Verify user.message has our content + user_events = [m for m in messages if isinstance(m.data, UserMessageData)] + assert any("order.txt" in (e.data.content or "") for e in user_events) + + # Verify assistant.message references the file content + assistant_events = [m for m in messages if isinstance(m.data, AssistantMessageData)] + assert any("42" in (e.data.content or "") for e in assistant_events) + finally: + await session.disconnect() diff --git a/python/e2e/test_multi_client_e2e.py b/python/e2e/test_multi_client_e2e.py index b9ecbc5a2..922ca3279 100644 --- a/python/e2e/test_multi_client_e2e.py +++ b/python/e2e/test_multi_client_e2e.py @@ -245,10 +245,12 @@ async def test_one_client_approves_permission_and_both_see_the_result( ), ) - # Client 2 resumes — its handler never resolves, so only client 1's approval takes effect + # Client 2 observes the permission request but leaves the decision to client 1. session2 = await mctx.client2.resume_session( session1.session_id, - on_permission_request=lambda request, invocation: asyncio.Future(), + on_permission_request=lambda request, invocation: PermissionRequestResult( + kind="no-result" + ), ) client1_events = [] @@ -291,10 +293,12 @@ async def test_one_client_rejects_permission_and_both_see_the_result( ), ) - # Client 2 resumes — its handler never resolves + # Client 2 observes the permission request but leaves the decision to client 1. session2 = await mctx.client2.resume_session( session1.session_id, - on_permission_request=lambda request, invocation: asyncio.Future(), + on_permission_request=lambda request, invocation: PermissionRequestResult( + kind="no-result" + ), ) client1_events = [] diff --git a/python/e2e/test_multi_turn_e2e.py b/python/e2e/test_multi_turn_e2e.py index 232e54f5f..000da240e 100644 --- a/python/e2e/test_multi_turn_e2e.py +++ b/python/e2e/test_multi_turn_e2e.py @@ -3,9 +3,17 @@ from __future__ import annotations from pathlib import Path +from typing import Any import pytest +from copilot.generated.session_events import ( + AssistantMessageData, + SessionIdleData, + ToolExecutionCompleteData, + ToolExecutionStartData, + UserMessageData, +) from copilot.session import PermissionHandler from .testharness import E2ETestContext @@ -13,18 +21,91 @@ pytestmark = pytest.mark.asyncio(loop_scope="module") +def _assert_tool_turn_ordering(events: list[Any], turn_description: str) -> None: + """Assert that within a turn's events, the ordering contract holds: + user.message → tool.execution_start(s) → tool.execution_complete(s) + → assistant.message → session.idle + """ + types = [e.type.value for e in events] + observed = ", ".join(types) + + user_idx = next((i for i, e in enumerate(events) if isinstance(e.data, UserMessageData)), -1) + tool_starts = [ + (i, e) for i, e in enumerate(events) if isinstance(e.data, ToolExecutionStartData) + ] + tool_completes = [ + (i, e) for i, e in enumerate(events) if isinstance(e.data, ToolExecutionCompleteData) + ] + + assert user_idx >= 0, f"Expected user.message in {turn_description}. Observed: {observed}" + assert tool_starts, f"Expected tool.execution_start events in {turn_description}" + assert tool_completes, f"Expected tool.execution_complete events in {turn_description}" + + first_tool_start_idx = tool_starts[0][0] + assert user_idx < first_tool_start_idx, ( + f"Expected user.message before first tool start in {turn_description}. Observed: {observed}" + ) + + # Each complete should have a matching start with same tool_call_id + complete_call_ids = {e.data.tool_call_id for _, e in tool_completes} + start_call_ids = {e.data.tool_call_id for _, e in tool_starts} + for cid in complete_call_ids: + assert cid in start_call_ids, ( + f"tool.execution_complete call_id {cid} has no matching start in {turn_description}" + ) + + last_tool_complete_idx = tool_completes[-1][0] + # Find assistant.message after last tool complete + assistant_after_tools_idx = next( + ( + i + for i, e in enumerate(events) + if i > last_tool_complete_idx and isinstance(e.data, AssistantMessageData) + ), + -1, + ) + idle_idx = next( + ( + i + for i, e in enumerate(events) + if i > max(assistant_after_tools_idx, 0) and isinstance(e.data, SessionIdleData) + ), + -1, + ) + + assert assistant_after_tools_idx >= 0, ( + "Expected assistant.message after tool completion in " + f"{turn_description}. Observed: {observed}" + ) + assert idle_idx >= 0, ( + f"Expected session.idle after assistant.message in {turn_description}. Observed: {observed}" + ) + assert last_tool_complete_idx < assistant_after_tools_idx, ( + f"Expected final tool completion before final assistant message in {turn_description}. " + f"Observed: {observed}" + ) + assert assistant_after_tools_idx < idle_idx, ( + f"Expected final assistant message before idle in {turn_description}. Observed: {observed}" + ) + + class TestMultiTurn: async def test_should_use_tool_results_from_previous_turns(self, ctx: E2ETestContext): Path(ctx.work_dir, "secret.txt").write_text("The magic number is 42.", encoding="utf-8") session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all ) + events: list = [] + unsubscribe = session.on(events.append) try: first_message = await session.send_and_wait( "Read the file 'secret.txt' and tell me what the magic number is." ) assert first_message is not None assert "42" in first_message.data.content + turn1_events = list(events) + events.clear() + _assert_tool_turn_ordering(turn1_events, "file read turn") second_message = await session.send_and_wait( "What is that magic number multiplied by 2?" @@ -32,21 +113,33 @@ async def test_should_use_tool_results_from_previous_turns(self, ctx: E2ETestCon assert second_message is not None assert "84" in second_message.data.content finally: + unsubscribe() await session.disconnect() async def test_should_handle_file_creation_then_reading_across_turns(self, ctx: E2ETestContext): session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all ) + events: list = [] + unsubscribe = session.on(events.append) try: await session.send_and_wait( "Create a file called 'greeting.txt' with the content 'Hello from multi-turn test'." ) + turn1_events = list(events) + events.clear() + _assert_tool_turn_ordering(turn1_events, "file creation turn") + assert Path(ctx.work_dir, "greeting.txt").read_text(encoding="utf-8") == ( + "Hello from multi-turn test" + ) message = await session.send_and_wait( "Read the file 'greeting.txt' and tell me its exact contents." ) assert message is not None assert "Hello from multi-turn test" in message.data.content + turn2_events = list(events) + _assert_tool_turn_ordering(turn2_events, "file read turn") finally: + unsubscribe() await session.disconnect() diff --git a/python/e2e/test_pending_work_resume_e2e.py b/python/e2e/test_pending_work_resume_e2e.py index d1c3b812f..204e6cc94 100644 --- a/python/e2e/test_pending_work_resume_e2e.py +++ b/python/e2e/test_pending_work_resume_e2e.py @@ -369,13 +369,6 @@ async def tool_b(args): ) assert result_a.success - answer = await get_final_assistant_message( - session2, timeout=PENDING_WORK_TIMEOUT - ) - content = answer.data.content or "" - assert "PARALLEL_A_ALPHA" in content - assert "PARALLEL_B_BETA" in content - await session2.disconnect() finally: await _safe_force_stop(resumed_client) @@ -429,3 +422,145 @@ async def test_should_resume_successfully_when_no_pending_work_exists( await _safe_force_stop(resumed_client) finally: await _safe_force_stop(server) + + async def test_should_keep_pending_external_tool_handleable_on_warm_resume_when_continuependingwork_is_false( # noqa: E501 + self, ctx: E2ETestContext + ): + from copilot.generated.session_events import SessionResumeData + + tool_started: asyncio.Future = asyncio.get_event_loop().create_future() + release_original: asyncio.Future = asyncio.get_event_loop().create_future() + invocation_count = 0 + + async def blocking_external_tool(args): + nonlocal invocation_count + invocation_count += 1 + value = args.get("value", "") + if not tool_started.done(): + tool_started.set_result(value) + return await release_original + + server = _make_subprocess_client(ctx, use_stdio=False) + await server.start() + try: + cli_url = f"localhost:{server.actual_port}" + + suspended_client = CopilotClient( + ExternalServerConfig(url=cli_url, tcp_connection_token="py-tcp-shared-test-token") + ) + session1 = await suspended_client.create_session( + on_permission_request=PermissionHandler.approve_all, + tools=[_make_pending_tool("resume_external_tool", blocking_external_tool)], + ) + session_id = session1.session_id + + try: + tool_request_task = asyncio.create_task( + _wait_for_external_tool_requests(session1, ["resume_external_tool"]) + ) + await session1.send( + "Use resume_external_tool with value 'beta', then reply with the result." + ) + tool_events = await tool_request_task + assert (await asyncio.wait_for(tool_started, PENDING_WORK_TIMEOUT)) == "beta" + + await suspended_client.force_stop() + + resumed_client = CopilotClient( + ExternalServerConfig( + url=cli_url, tcp_connection_token="py-tcp-shared-test-token" + ) + ) + try: + session2 = await resumed_client.resume_session( + session_id, + on_permission_request=PermissionHandler.approve_all, + continue_pending_work=False, + ) + + # Verify resume event: continue_pending_work=False and session_was_active=True + messages = await session2.get_messages() + resume_events = [m for m in messages if isinstance(m.data, SessionResumeData)] + assert len(resume_events) == 1, "Expected exactly one session.resume event" + resume_event = resume_events[0] + assert resume_event.data.continue_pending_work is False + assert resume_event.data.session_was_active is True + + # The pending tool call should still be satisfiable + tool_result = await session2.rpc.tools.handle_pending_tool_call( + HandlePendingToolCallRequest( + request_id=tool_events["resume_external_tool"].data.request_id, + result="EXTERNAL_RESUMED_BETA", + ) + ) + assert tool_result.success + + answer = await get_final_assistant_message( + session2, timeout=PENDING_WORK_TIMEOUT + ) + assert invocation_count == 1 + assert "EXTERNAL_RESUMED_BETA" in (answer.data.content or "") + + await session2.disconnect() + finally: + await _safe_force_stop(resumed_client) + finally: + if not release_original.done(): + release_original.set_result("ORIGINAL_SHOULD_NOT_WIN") + finally: + await _safe_force_stop(server) + + async def test_should_report_continuependingwork_true_in_resume_event( + self, ctx: E2ETestContext + ): + from copilot.generated.session_events import SessionResumeData + + server = _make_subprocess_client(ctx, use_stdio=False) + await server.start() + try: + cli_url = f"localhost:{server.actual_port}" + + first_client = CopilotClient( + ExternalServerConfig(url=cli_url, tcp_connection_token="py-tcp-shared-test-token") + ) + try: + first_session = await first_client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + session_id = first_session.session_id + first_answer = await first_session.send_and_wait( + "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_ONE", + timeout=PENDING_WORK_TIMEOUT, + ) + assert "CONTINUE_PENDING_WORK_TRUE_TURN_ONE" in (first_answer.data.content or "") + await first_session.disconnect() + finally: + await _safe_force_stop(first_client) + + resumed_client = CopilotClient( + ExternalServerConfig(url=cli_url, tcp_connection_token="py-tcp-shared-test-token") + ) + try: + resumed_session = await resumed_client.resume_session( + session_id, + on_permission_request=PermissionHandler.approve_all, + continue_pending_work=True, + ) + + messages = await resumed_session.get_messages() + resume_events = [m for m in messages if isinstance(m.data, SessionResumeData)] + assert len(resume_events) == 1, "Expected exactly one session.resume event" + resume_event = resume_events[0] + assert resume_event.data.continue_pending_work is True + assert resume_event.data.session_was_active is False + + follow_up = await resumed_session.send_and_wait( + "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_TWO", + timeout=PENDING_WORK_TIMEOUT, + ) + assert "CONTINUE_PENDING_WORK_TRUE_TURN_TWO" in (follow_up.data.content or "") + await resumed_session.disconnect() + finally: + await _safe_force_stop(resumed_client) + finally: + await _safe_force_stop(server) diff --git a/python/e2e/test_permissions_e2e.py b/python/e2e/test_permissions_e2e.py index b4e02ddb9..7ad9a2405 100644 --- a/python/e2e/test_permissions_e2e.py +++ b/python/e2e/test_permissions_e2e.py @@ -165,8 +165,7 @@ async def on_permission_request( request: PermissionRequest, invocation: dict ) -> PermissionRequestResult: permission_requests.append(request) - # Simulate async permission check (e.g., user prompt) - await asyncio.sleep(0.01) + await asyncio.sleep(0) return PermissionRequestResult(kind="approve-once") session = await ctx.client.create_session(on_permission_request=on_permission_request) @@ -246,3 +245,258 @@ def on_permission_request( assert received_tool_call_id await session.disconnect() + + async def test_should_wait_for_slow_permission_handler(self, ctx: E2ETestContext): + """Slow permission handler blocks tool execution until released.""" + handler_entered: asyncio.Future = asyncio.get_event_loop().create_future() + release_handler: asyncio.Future = asyncio.get_event_loop().create_future() + target_tool_call_id: asyncio.Future = asyncio.get_event_loop().create_future() + lifecycle: list = [] + + def add_event(phase: str, tool_call_id: str | None) -> None: + lifecycle.append((phase, tool_call_id)) + + async def slow_permission(request: PermissionRequest, invocation: dict): + tool_call_id = request.tool_call_id + add_event("permission-start", tool_call_id) + if not target_tool_call_id.done(): + target_tool_call_id.set_result(tool_call_id) + if not handler_entered.done(): + handler_entered.set_result(True) + await asyncio.wait_for(release_handler, timeout=30.0) + add_event("permission-complete", tool_call_id) + return PermissionRequestResult(kind="approve-once") + + session = await ctx.client.create_session(on_permission_request=slow_permission) + + def on_event(event): + if event.type.value == "tool.execution_start": + add_event("tool-start", event.data.tool_call_id) + elif event.type.value == "tool.execution_complete": + add_event("tool-complete", event.data.tool_call_id) + + unsubscribe = session.on(on_event) + try: + asyncio.ensure_future(session.send("Run 'echo slow_handler_test'")) + + await asyncio.wait_for(handler_entered, timeout=30.0) + target_id = await asyncio.wait_for(target_tool_call_id, timeout=30.0) + + # Tool should not have completed yet while handler is blocking + assert not any( + phase == "tool-complete" and tid == target_id for phase, tid in lifecycle + ), "Tool completed before permission handler returned" + + release_handler.set_result(True) + + from .testharness.helper import get_final_assistant_message + + message = await get_final_assistant_message(session, timeout=60.0) + + perm_start = next( + ( + i + for i, (p, tid) in enumerate(lifecycle) + if p == "permission-start" and tid == target_id + ), + -1, + ) + perm_complete = next( + ( + i + for i, (p, tid) in enumerate(lifecycle) + if p == "permission-complete" and tid == target_id + ), + -1, + ) + tool_start = next( + ( + i + for i, (p, tid) in enumerate(lifecycle) + if p == "tool-start" and tid == target_id + ), + -1, + ) + tool_complete = next( + ( + i + for i, (p, tid) in enumerate(lifecycle) + if p == "tool-complete" and tid == target_id + ), + -1, + ) + + assert perm_start >= 0 + assert perm_complete >= 0 + assert tool_start >= 0 + assert tool_complete >= 0 + assert perm_complete < tool_complete, ( + "Expected permission completion before target tool completion" + ) + assert tool_start < tool_complete, ( + "Expected target tool start before target tool completion" + ) + assert message is not None + assert "slow_handler_test" in (message.data.content or "") + finally: + if not release_handler.done(): + release_handler.set_result(True) + unsubscribe() + await session.disconnect() + + async def test_should_deny_permission_with_noresult_kind(self, ctx: E2ETestContext): + """NoResult permission kind leaves legacy permission requests unanswered.""" + + permission_called = asyncio.get_event_loop().create_future() + + def deny_noresult(request: PermissionRequest, invocation: dict) -> PermissionRequestResult: + if not permission_called.done(): + permission_called.set_result(True) + return PermissionRequestResult(kind="no-result") + + session = await ctx.client.create_session(on_permission_request=deny_noresult) + try: + asyncio.ensure_future(session.send("Run 'node --version'")) + await asyncio.wait_for(permission_called, timeout=30.0) + await session.abort() + finally: + await session.disconnect() + + async def test_should_short_circuit_permission_handler_when_set_approve_all_enabled( + self, ctx: E2ETestContext + ): + """When set_approve_all is true, the runtime short-circuits the handler.""" + from copilot.generated.rpc import PermissionsSetApproveAllRequest + + handler_call_count = 0 + + def counting_handler( + request: PermissionRequest, invocation: dict + ) -> PermissionRequestResult: + nonlocal handler_call_count + handler_call_count += 1 + return PermissionRequestResult(kind="approve-once") + + session = await ctx.client.create_session(on_permission_request=counting_handler) + try: + set_result = await session.rpc.permissions.set_approve_all( + PermissionsSetApproveAllRequest(enabled=True) + ) + assert set_result.success + + tool_completed: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if ( + event.type.value == "tool.execution_complete" + and event.data.success + and not tool_completed.done() + ): + tool_completed.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.send_and_wait( + "Run 'echo test' and tell me what happens", timeout=60.0 + ) + await asyncio.wait_for(tool_completed, timeout=30.0) + assert handler_call_count == 0, ( + "Handler should not have been called when approve_all is enabled" + ) + finally: + unsubscribe() + finally: + try: + from copilot.generated.rpc import PermissionsSetApproveAllRequest + + await session.rpc.permissions.set_approve_all( + PermissionsSetApproveAllRequest(enabled=False) + ) + except Exception as exc: + # Cleanup should not hide the primary test result, but should be visible in logs. + print(f"Failed to disable approve_all during cleanup: {exc!r}") + await session.disconnect() + + async def test_should_handle_concurrent_permission_requests_from_parallel_tools( + self, ctx: E2ETestContext + ): + """Multiple simultaneous permission requests are all handled.""" + from copilot.tools import Tool, ToolInvocation, ToolResult + + permission_request_count = 0 + both_started: asyncio.Future = asyncio.get_event_loop().create_future() + first_tool_called = False + second_tool_called = False + + async def concurrent_permission(request: PermissionRequest, invocation: dict): + nonlocal permission_request_count + permission_request_count += 1 + if permission_request_count >= 2 and not both_started.done(): + both_started.set_result(True) + await asyncio.wait_for(both_started, timeout=30.0) + return PermissionRequestResult(kind="approve-once") + + def first_tool_handler(invocation: ToolInvocation) -> ToolResult: + nonlocal first_tool_called + first_tool_called = True + return ToolResult( + text_result_for_llm="first_permission_tool completed after permission approval", + result_type="rejected", + ) + + def second_tool_handler(invocation: ToolInvocation) -> ToolResult: + nonlocal second_tool_called + second_tool_called = True + return ToolResult( + text_result_for_llm="second_permission_tool completed after permission approval", + result_type="rejected", + ) + + session = await ctx.client.create_session( + on_permission_request=concurrent_permission, + tools=[ + Tool( + name="first_permission_tool", + description="First concurrent permission test tool", + parameters={"type": "object", "properties": {}}, + handler=first_tool_handler, + ), + Tool( + name="second_permission_tool", + description="Second concurrent permission test tool", + parameters={"type": "object", "properties": {}}, + handler=second_tool_handler, + ), + ], + ) + try: + idle_future: asyncio.Future = asyncio.get_event_loop().create_future() + tool_completes = [] + + def on_event(event): + if event.type.value == "tool.execution_complete" and not event.data.success: + tool_completes.append(event) + elif event.type.value == "session.idle" and not idle_future.done(): + idle_future.set_result(True) + + unsubscribe = session.on(on_event) + try: + await session.send( + "Call both first_permission_tool and second_permission_tool in the same turn." + " Do not call any other tools." + ) + await asyncio.wait_for(both_started, timeout=30.0) + await asyncio.wait_for(idle_future, timeout=60.0) + + assert permission_request_count == 2, ( + "Expected exactly 2 permission requests (one per tool)" + ) + assert first_tool_called, "first_permission_tool handler should have been called" + assert second_tool_called, "second_permission_tool handler should have been called" + assert len(tool_completes) >= 2, ( + "Expected tool.execution_complete events for both tools" + ) + finally: + unsubscribe() + finally: + await session.disconnect() diff --git a/python/e2e/test_rpc_event_side_effects_e2e.py b/python/e2e/test_rpc_event_side_effects_e2e.py new file mode 100644 index 000000000..e31e00fbe --- /dev/null +++ b/python/e2e/test_rpc_event_side_effects_e2e.py @@ -0,0 +1,284 @@ +""" +E2E coverage for session-event side effects triggered by RPC calls. + +Mirrors ``dotnet/test/RpcEventSideEffectsE2ETests.cs`` (snapshot category +``rpc_event_side_effects``). +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from copilot.generated.rpc import ( + HistoryTruncateRequest, + ModeSetRequest, + NameSetRequest, + PlanUpdateRequest, + SessionMode, + WorkspacesCreateFileRequest, +) +from copilot.generated.session_events import ( + PlanChangedOperation, + SessionModeChangedData, + SessionPlanChangedData, + SessionSnapshotRewindData, + SessionTitleChangedData, + SessionWorkspaceFileChangedData, + WorkspaceFileChangedOperation, +) +from copilot.session import PermissionHandler + +from .testharness import E2ETestContext + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +async def _wait_for_event(session, predicate, timeout: float = 15.0): + """Wait for the first session event matching predicate.""" + loop = asyncio.get_event_loop() + fut: asyncio.Future = loop.create_future() + + def on_event(event): + if not fut.done() and predicate(event): + fut.set_result(event) + + unsub = session.on(on_event) + try: + return await asyncio.wait_for(fut, timeout=timeout) + finally: + unsub() + + +class TestRpcEventSideEffects: + async def test_should_emit_mode_changed_event_when_mode_set(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + changed_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if isinstance(event.data, SessionModeChangedData) and not changed_future.done(): + changed_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.mode.set(ModeSetRequest(mode=SessionMode.PLAN)) + event = await asyncio.wait_for(changed_future, timeout=15.0) + + assert isinstance(event.data, SessionModeChangedData) + assert event.data.new_mode == SessionMode.PLAN.value + assert event.data.previous_mode == SessionMode.INTERACTIVE.value + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_emit_plan_changed_event_for_update_and_delete(self, ctx: E2ETestContext): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + create_future: asyncio.Future = asyncio.get_event_loop().create_future() + delete_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if isinstance(event.data, SessionPlanChangedData): + if ( + event.data.operation == PlanChangedOperation.CREATE + and not create_future.done() + ): + create_future.set_result(event) + elif ( + event.data.operation == PlanChangedOperation.DELETE + and not delete_future.done() + ): + delete_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.plan.update(PlanUpdateRequest(content="# Plan step 1")) + create_evt = await asyncio.wait_for(create_future, timeout=15.0) + assert create_evt.data.operation == PlanChangedOperation.CREATE + + await session.rpc.plan.delete() + delete_evt = await asyncio.wait_for(delete_future, timeout=15.0) + assert delete_evt.data.operation == PlanChangedOperation.DELETE + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_emit_plan_changed_update_operation_on_second_update( + self, ctx: E2ETestContext + ): + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + # Create the plan first + await session.rpc.plan.update(PlanUpdateRequest(content="# Initial plan")) + + update_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if ( + isinstance(event.data, SessionPlanChangedData) + and event.data.operation == PlanChangedOperation.UPDATE + and not update_future.done() + ): + update_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.plan.update(PlanUpdateRequest(content="# Updated plan")) + update_evt = await asyncio.wait_for(update_future, timeout=15.0) + assert update_evt.data.operation == PlanChangedOperation.UPDATE + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_emit_workspace_file_changed_event_when_file_created( + self, ctx: E2ETestContext + ): + import uuid + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + path = f"event-side-effect-{uuid.uuid4().hex}.txt" + create_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if ( + isinstance(event.data, SessionWorkspaceFileChangedData) + and event.data.path == path + and event.data.operation == WorkspaceFileChangedOperation.CREATE + and not create_future.done() + ): + create_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.workspaces.create_file( + WorkspacesCreateFileRequest(path=path, content="hello") + ) + evt = await asyncio.wait_for(create_future, timeout=15.0) + assert evt.data.path == path + assert evt.data.operation == WorkspaceFileChangedOperation.CREATE + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_emit_title_changed_event_when_name_set(self, ctx: E2ETestContext): + import uuid + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + new_name = f"Title-{uuid.uuid4().hex}" + title_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if ( + isinstance(event.data, SessionTitleChangedData) + and event.data.title == new_name + and not title_future.done() + ): + title_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.name.set(NameSetRequest(name=new_name)) + evt = await asyncio.wait_for(title_future, timeout=15.0) + assert evt.data.title == new_name + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_emit_snapshot_rewind_event_and_remove_events_on_truncate( + self, ctx: E2ETestContext + ): + """Truncating history emits a session.snapshot_rewind event.""" + from copilot.generated.session_events import UserMessageData + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + await session.send_and_wait("Say SNAPSHOT_REWIND_TARGET exactly.", timeout=60.0) + + events = await session.get_messages() + user_msgs = [e for e in events if isinstance(e.data, UserMessageData)] + assert len(user_msgs) >= 1 + first_user_event_id = str(user_msgs[0].id) + + rewind_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if isinstance(event.data, SessionSnapshotRewindData) and not rewind_future.done(): + rewind_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.history.truncate( + HistoryTruncateRequest(event_id=first_user_event_id) + ) + evt = await asyncio.wait_for(rewind_future, timeout=15.0) + assert isinstance(evt.data, SessionSnapshotRewindData) + assert evt.data.events_removed >= 1 + assert evt.data.up_to_event_id.lower() == first_user_event_id.lower() + + messages_after = await session.get_messages() + assert not any(e.id == user_msgs[0].id for e in messages_after) + except Exception as exc: + if "unhandled method" in str(exc).lower(): + pytest.skip("session.history.truncate not supported in this CLI build") + raise + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_allow_session_use_after_truncate(self, ctx: E2ETestContext): + """Session remains usable after history truncation.""" + from copilot.generated.session_events import UserMessageData + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + await session.send_and_wait("Say SNAPSHOT_REWIND_TARGET exactly.", timeout=60.0) + + events = await session.get_messages() + user_msgs = [e for e in events if isinstance(e.data, UserMessageData)] + assert len(user_msgs) >= 1 + first_user_event_id = str(user_msgs[0].id) + + try: + truncate_result = await session.rpc.history.truncate( + HistoryTruncateRequest(event_id=first_user_event_id) + ) + assert truncate_result.events_removed >= 1 + except Exception as exc: + if "unhandled method" in str(exc).lower(): + pytest.skip("session.history.truncate not supported in this CLI build") + raise + + mode = await session.rpc.mode.get() + assert mode in ( + SessionMode.INTERACTIVE, + SessionMode.PLAN, + SessionMode.AUTOPILOT, + ) + workspace = await session.rpc.workspaces.get_workspace() + assert workspace is not None + finally: + await session.disconnect() diff --git a/python/e2e/test_rpc_session_state_e2e.py b/python/e2e/test_rpc_session_state_e2e.py index 49d5a8051..ffeec1cf3 100644 --- a/python/e2e/test_rpc_session_state_e2e.py +++ b/python/e2e/test_rpc_session_state_e2e.py @@ -293,5 +293,265 @@ async def test_should_compact_session_history_after_messages(self, ctx: E2ETestC await session.send_and_wait("What is 2+2?", timeout=60.0) result = await session.rpc.history.compact() assert result is not None + assert result.success, "Expected History.compact() to report success=True" + assert result.messages_removed >= 0, "messages_removed must be non-negative" + if result.context_window is not None: + assert result.context_window.messages_length >= 0 + assert result.context_window.current_tokens >= 0 + + # Session must still be usable after compaction + name = await session.rpc.name.get() + assert name is not None + finally: + await session.disconnect() + + async def test_should_set_and_get_each_session_mode_value(self, ctx: E2ETestContext): + for mode in [SessionMode.INTERACTIVE, SessionMode.PLAN, SessionMode.AUTOPILOT]: + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + await session.rpc.mode.set(ModeSetRequest(mode=mode)) + result = await session.rpc.mode.get() + assert result == mode, f"Expected mode {mode} but got {result}" + finally: + await session.disconnect() + + async def test_should_reject_workspace_file_path_traversal(self, ctx: E2ETestContext): + + for traversal_path in [ + "../escaped.txt", + "../../escaped.txt", + "nested/../../../escaped.txt", + ]: + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + with pytest.raises(Exception) as excinfo: + await session.rpc.workspaces.create_file( + WorkspacesCreateFileRequest( + path=traversal_path, + content="should not land outside workspace", + ) + ) + assert "workspace files directory" in str(excinfo.value).lower() + + with pytest.raises(Exception) as excinfo2: + await session.rpc.workspaces.read_file( + WorkspacesReadFileRequest(path=traversal_path) + ) + assert "workspace files directory" in str(excinfo2.value).lower() + finally: + await session.disconnect() + + async def test_should_create_workspace_file_with_nested_path_auto_creating_dirs( + self, ctx: E2ETestContext + ): + import uuid + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + nested_path = f"nested-{uuid.uuid4().hex}/subdir/file.txt" + await session.rpc.workspaces.create_file( + WorkspacesCreateFileRequest(path=nested_path, content="nested content") + ) + read = await session.rpc.workspaces.read_file( + WorkspacesReadFileRequest(path=nested_path) + ) + assert read.content == "nested content" + + listed = await session.rpc.workspaces.list_files() + assert any(f.endswith("file.txt") for f in listed.files) + finally: + await session.disconnect() + + async def test_should_report_error_reading_nonexistent_workspace_file( + self, ctx: E2ETestContext + ): + import uuid + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + with pytest.raises(Exception): + await session.rpc.workspaces.read_file( + WorkspacesReadFileRequest(path=f"never-exists-{uuid.uuid4().hex}.txt") + ) + finally: + await session.disconnect() + + async def test_should_update_existing_workspace_file_with_update_operation( + self, ctx: E2ETestContext + ): + import asyncio + import uuid + + from copilot.generated.session_events import ( + SessionWorkspaceFileChangedData, + WorkspaceFileChangedOperation, + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + path = f"reused-{uuid.uuid4().hex}.txt" + await session.rpc.workspaces.create_file( + WorkspacesCreateFileRequest(path=path, content="v1") + ) + + update_future: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if ( + isinstance(event.data, SessionWorkspaceFileChangedData) + and event.data.path == path + and event.data.operation == WorkspaceFileChangedOperation.UPDATE + and not update_future.done() + ): + update_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.workspaces.create_file( + WorkspacesCreateFileRequest(path=path, content="v2") + ) + evt = await asyncio.wait_for(update_future, timeout=15.0) + assert evt.data.operation == WorkspaceFileChangedOperation.UPDATE + + read = await session.rpc.workspaces.read_file(WorkspacesReadFileRequest(path=path)) + assert read.content == "v2" + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_reject_empty_or_whitespace_session_name(self, ctx: E2ETestContext): + for empty_name in ["", " ", "\t\n \r"]: + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + with pytest.raises(Exception) as excinfo: + await session.rpc.name.set(NameSetRequest(name=empty_name)) + assert "empty" in str(excinfo.value).lower() + finally: + await session.disconnect() + + async def test_should_emit_title_changed_event_each_time_name_set_is_called( + self, ctx: E2ETestContext + ): + import asyncio + import uuid + + from copilot.generated.session_events import SessionTitleChangedData + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + title_a = f"Title-A-{uuid.uuid4().hex}" + title_b = f"Title-B-{uuid.uuid4().hex}" + + first_task: asyncio.Future = asyncio.get_event_loop().create_future() + second_task: asyncio.Future = asyncio.get_event_loop().create_future() + + def on_event(event): + if isinstance(event.data, SessionTitleChangedData): + if event.data.title == title_a and not first_task.done(): + first_task.set_result(event) + elif event.data.title == title_b and not second_task.done(): + second_task.set_result(event) + + unsubscribe = session.on(on_event) + try: + await session.rpc.name.set(NameSetRequest(name=title_a)) + await asyncio.wait_for(first_task, timeout=15.0) + + await session.rpc.name.set(NameSetRequest(name=title_b)) + second_evt = await asyncio.wait_for(second_task, timeout=15.0) + assert second_evt.data.title == title_b + finally: + unsubscribe() + finally: + await session.disconnect() + + async def test_should_fork_session_to_event_id_excluding_boundary_event( + self, ctx: E2ETestContext + ): + first_prompt = "Say FORK_BOUNDARY_FIRST exactly." + second_prompt = "Say FORK_BOUNDARY_SECOND exactly." + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + await session.send_and_wait(first_prompt, timeout=60.0) + await session.send_and_wait(second_prompt, timeout=60.0) + + source_events = await session.get_messages() + second_user_event = next( + ( + e + for e in source_events + if isinstance(e.data, UserMessageData) and e.data.content == second_prompt + ), + None, + ) + assert second_user_event is not None, ( + "Expected the second user.message in persisted history" + ) + boundary_event_id = str(second_user_event.id) + + fork = await ctx.client.rpc.sessions.fork( + SessionsForkRequest(session_id=session.session_id, to_event_id=boundary_event_id) + ) + assert (fork.session_id or "").strip() + assert fork.session_id != session.session_id + + forked_session = await ctx.client.resume_session( + fork.session_id, + on_permission_request=PermissionHandler.approve_all, + ) + try: + forked_events = await forked_session.get_messages() + forked_ids = {str(e.id) for e in forked_events} + assert boundary_event_id not in forked_ids, ( + "toEventId is exclusive — boundary event must not be in forked session" + ) + + forked_conv = _conversation_messages(forked_events) + assert any(r == "user" and c == first_prompt for r, c in forked_conv) + assert not any(r == "user" and c == second_prompt for r, c in forked_conv) + finally: + await forked_session.disconnect() + finally: + await session.disconnect() + + async def test_should_report_error_when_forking_session_to_unknown_event_id( + self, ctx: E2ETestContext + ): + import uuid + + source_prompt = "Say FORK_UNKNOWN_EVENT_OK exactly." + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + await session.send_and_wait(source_prompt, timeout=60.0) + + bogus_event_id = str(uuid.uuid4()) + with pytest.raises(Exception) as excinfo: + await ctx.client.rpc.sessions.fork( + SessionsForkRequest(session_id=session.session_id, to_event_id=bogus_event_id) + ) + text = str(excinfo.value) + assert f"Event {bogus_event_id} not found".lower() in text.lower() + assert "Unhandled method sessions.fork".lower() not in text.lower() finally: await session.disconnect() diff --git a/python/e2e/test_rpc_tasks_and_handlers_e2e.py b/python/e2e/test_rpc_tasks_and_handlers_e2e.py index 8b528e443..707c8b781 100644 --- a/python/e2e/test_rpc_tasks_and_handlers_e2e.py +++ b/python/e2e/test_rpc_tasks_and_handlers_e2e.py @@ -7,14 +7,19 @@ from __future__ import annotations +import asyncio + import pytest from copilot.generated.rpc import ( + ApprovalKind, CommandsHandlePendingCommandRequest, HandlePendingToolCallRequest, PermissionDecision, + PermissionDecisionApproveForIonApproval, PermissionDecisionKind, PermissionDecisionRequest, + TaskInfoType, TasksCancelRequest, TasksPromoteToBackgroundRequest, TasksRemoveRequest, @@ -30,6 +35,23 @@ pytestmark = pytest.mark.asyncio(loop_scope="module") +async def _find_agent_task(session, task_id: str): + task_list = await session.rpc.tasks.list() + return next((t for t in (task_list.tasks or []) if t.id == task_id), None) + + +async def _wait_for_agent_task(session, task_id: str, predicate, timeout: float, message: str): + deadline = asyncio.get_running_loop().time() + timeout + last_task = None + while True: + last_task = await _find_agent_task(session, task_id) + if predicate(last_task): + return last_task + if asyncio.get_running_loop().time() >= deadline: + pytest.fail(f"{message}; last observed task: {last_task!r}") + await asyncio.sleep(0.25) + + async def _assert_implemented_failure(awaitable, method: str) -> None: with pytest.raises(Exception) as excinfo: _ = await awaitable @@ -133,5 +155,132 @@ async def test_should_return_expected_results_for_missing_pending_handler_reques ) ) assert permanent.success is False + + session_approval = await session.rpc.permissions.handle_pending_permission_request( + PermissionDecisionRequest( + request_id="missing-session-approval-request", + result=PermissionDecision( + kind=PermissionDecisionKind.APPROVE_FOR_SESSION, + approval=PermissionDecisionApproveForIonApproval( + kind=ApprovalKind.CUSTOM_TOOL, + tool_name="missing-tool", + ), + ), + ) + ) + assert session_approval.success is False + + location_approval = await session.rpc.permissions.handle_pending_permission_request( + PermissionDecisionRequest( + request_id="missing-location-approval-request", + result=PermissionDecision( + kind=PermissionDecisionKind.APPROVE_FOR_LOCATION, + location_key="missing-location", + approval=PermissionDecisionApproveForIonApproval( + kind=ApprovalKind.CUSTOM_TOOL, + tool_name="missing-tool", + ), + ), + ) + ) + assert location_approval.success is False + finally: + await session.disconnect() + + async def test_should_report_implemented_error_for_invalid_task_agent_model( + self, ctx: E2ETestContext + ): + """Invalid model name for agent task returns an error without 'Unhandled method'.""" + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + with pytest.raises(Exception) as excinfo: + await session.rpc.tasks.start_agent( + TasksStartAgentRequest( + agent_type="general-purpose", + prompt="Say hi", + name="sdk-test-invalid-model", + model="not-a-real-model", + ) + ) + text = str(excinfo.value).lower() + assert "unhandled method session.tasks.startagent" not in text + + tasks = await session.rpc.tasks.list() + assert tasks.tasks is not None + assert len(tasks.tasks) == 0, "Task list should be empty after invalid start" + finally: + await session.disconnect() + + async def test_should_start_background_agent_and_report_task_details(self, ctx: E2ETestContext): + """Start a background agent task and verify task details then remove it.""" + from copilot.generated.rpc import TaskInfoExecutionMode, TaskInfoStatus + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + ) + try: + ready = await session.send_and_wait( + "Reply with TASK_AGENT_READY exactly.", + timeout=60.0, + ) + assert ready is not None + assert "TASK_AGENT_READY" in (ready.data.content or "") + + start_result = await session.rpc.tasks.start_agent( + TasksStartAgentRequest( + agent_type="general-purpose", + prompt="Reply with TASK_AGENT_DONE exactly.", + name="sdk-background-agent", + description="SDK background agent coverage", + ) + ) + task_id = start_result.agent_id + assert task_id, "Expected a task ID from start_agent" + + found_task = await _wait_for_agent_task( + session, + task_id, + lambda task: task is not None, + 30.0, + f"Task {task_id} not found in tasks list", + ) + assert found_task.id == task_id + assert found_task.description == "SDK background agent coverage" + assert found_task.type == TaskInfoType.AGENT + assert found_task.agent_type == "general-purpose" + assert found_task.execution_mode == TaskInfoExecutionMode.BACKGROUND + assert found_task.prompt == "Reply with TASK_AGENT_DONE exactly." + + found_task = await _wait_for_agent_task( + session, + task_id, + lambda task: ( + task is None + or task.status + in ( + TaskInfoStatus.COMPLETED, + TaskInfoStatus.FAILED, + TaskInfoStatus.CANCELLED, + TaskInfoStatus.IDLE, + ) + ), + 60.0, + f"Task {task_id} did not produce a final observable state", + ) + assert found_task is not None, f"Task {task_id} disappeared before it completed" + assert "TASK_AGENT_DONE" in (found_task.latest_response or found_task.result or "") + + if found_task.status == TaskInfoStatus.IDLE: + cancel = await session.rpc.tasks.cancel(TasksCancelRequest(id=task_id)) + assert cancel.cancelled is True + + # Remove the task + remove = await session.rpc.tasks.remove(TasksRemoveRequest(id=task_id)) + assert remove.removed is True + + after_remove = await session.rpc.tasks.list() + assert not any(t.id == task_id for t in (after_remove.tasks or [])) finally: await session.disconnect() diff --git a/python/e2e/test_streaming_fidelity_e2e.py b/python/e2e/test_streaming_fidelity_e2e.py index c2e79814a..c24aee55f 100644 --- a/python/e2e/test_streaming_fidelity_e2e.py +++ b/python/e2e/test_streaming_fidelity_e2e.py @@ -113,3 +113,81 @@ async def test_should_produce_deltas_after_session_resume(self, ctx: E2ETestCont await session2.disconnect() finally: await new_client.force_stop() + + async def test_should_not_produce_deltas_after_session_resume_with_streaming_disabled( + self, ctx: E2ETestContext + ): + """Resume with streaming=False — no delta events, but final message arrives.""" + github_token = ( + "fake-token-for-e2e-tests" if os.environ.get("GITHUB_ACTIONS") == "true" else None + ) + # Create and complete a turn with streaming enabled + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, streaming=True + ) + await session.send_and_wait("What is 3 + 6?") + session_id = session.session_id + await session.disconnect() + + # Resume with streaming disabled + new_client = CopilotClient( + SubprocessConfig( + cli_path=ctx.cli_path, + cwd=ctx.work_dir, + env=ctx.get_env(), + github_token=github_token, + ) + ) + try: + session2 = await new_client.resume_session( + session_id, + on_permission_request=PermissionHandler.approve_all, + streaming=False, + ) + events = [] + session2.on(lambda event: events.append(event)) + + answer = await session2.send_and_wait("Now if you double that, what do you get?") + assert answer is not None + + delta_events = [e for e in events if e.type.value == "assistant.message_delta"] + assert len(delta_events) == 0, "No deltas expected when streaming=False" + + assistant_events = [e for e in events if e.type.value == "assistant.message"] + assert len(assistant_events) >= 1, "Final assistant.message must still arrive" + + await session2.disconnect() + finally: + await new_client.force_stop() + + async def test_should_emit_streaming_deltas_with_reasoning_effort_configured( + self, ctx: E2ETestContext + ): + """Streaming + reasoning_effort produces delta events and session.start shows effort.""" + from copilot.generated.session_events import SessionStartData + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + streaming=True, + reasoning_effort="high", + ) + + events = [] + session.on(lambda event: events.append(event)) + + try: + await session.send_and_wait("What is 15 * 17?", timeout=60.0) + + delta_events = [e for e in events if e.type.value == "assistant.message_delta"] + assert len(delta_events) >= 1, "Expected delta events with streaming=True" + + assistant_events = [e for e in events if e.type.value == "assistant.message"] + assert len(assistant_events) >= 1, "Expected final assistant.message" + + # Check session.start event (from get_messages) has reasoning_effort + all_msgs = await session.get_messages() + start_event = next((e for e in all_msgs if isinstance(e.data, SessionStartData)), None) + assert start_event is not None, "Expected session.start event" + assert start_event.data.reasoning_effort == "high" + finally: + await session.disconnect() diff --git a/python/e2e/test_tool_results_e2e.py b/python/e2e/test_tool_results_e2e.py index d08a62191..3e54a3abf 100644 --- a/python/e2e/test_tool_results_e2e.py +++ b/python/e2e/test_tool_results_e2e.py @@ -1,5 +1,7 @@ """E2E Tool Results Tests""" +import asyncio + import pytest from pydantic import BaseModel, Field @@ -100,3 +102,106 @@ def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResul assert "resultType" not in tool_results[0]["content"] finally: await session.disconnect() + + async def test_should_handle_tool_result_with_rejected_resulttype(self, ctx: E2ETestContext): + tool_handler_called = False + tool_complete_future: asyncio.Future = asyncio.get_event_loop().create_future() + + @define_tool("deploy_service", description="Deploys a service") + def deploy_service(invocation: ToolInvocation) -> ToolResult: + nonlocal tool_handler_called + tool_handler_called = True + return ToolResult( + text_result_for_llm=( + "Deployment rejected: policy violation" + " - production deployments require approval" + ), + result_type="rejected", + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, tools=[deploy_service] + ) + + def on_event(event): + if event.type.value == "tool.execution_complete" and not tool_complete_future.done(): + tool_complete_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + asyncio.ensure_future( + session.send( + "Deploy the service using deploy_service." + " If it's rejected, tell me it was 'rejected by policy'." + ) + ) + tool_evt = await asyncio.wait_for(tool_complete_future, timeout=60.0) + + assert tool_handler_called, "Tool handler should have been called" + assert not tool_evt.data.success + error = tool_evt.data.error + assert error is not None + error_code = error if isinstance(error, str) else getattr(error, "code", None) + assert error_code == "rejected" + error_msg = error if isinstance(error, str) else getattr(error, "message", None) + assert "Deployment rejected" in (error_msg or "") + + # Session should reach idle + idle_future: asyncio.Future = asyncio.get_event_loop().create_future() + session.on( + lambda e: ( + idle_future.set_result(e) + if e.type.value == "session.idle" and not idle_future.done() + else None + ) + ) + await asyncio.wait_for(idle_future, timeout=30.0) + finally: + unsubscribe() + await session.disconnect() + + async def test_should_handle_tool_result_with_denied_resulttype(self, ctx: E2ETestContext): + tool_handler_called = False + tool_complete_future: asyncio.Future = asyncio.get_event_loop().create_future() + + @define_tool("access_secret", description="Accesses a secret") + def access_secret(invocation: ToolInvocation) -> ToolResult: + nonlocal tool_handler_called + tool_handler_called = True + return ToolResult( + text_result_for_llm="Access denied: insufficient permissions to read secrets", + result_type="denied", + ) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, tools=[access_secret] + ) + + def on_event(event): + if event.type.value == "tool.execution_complete" and not tool_complete_future.done(): + tool_complete_future.set_result(event) + + unsubscribe = session.on(on_event) + try: + asyncio.ensure_future( + session.send( + "Use access_secret to get the API key." + " If access is denied, tell me it was 'access denied'." + ) + ) + tool_evt = await asyncio.wait_for(tool_complete_future, timeout=60.0) + + assert tool_handler_called, "Tool handler should have been called" + assert not tool_evt.data.success + error = tool_evt.data.error + assert error is not None + error_code = error if isinstance(error, str) else getattr(error, "code", None) + assert error_code == "denied" + error_msg = error if isinstance(error, str) else getattr(error, "message", None) + assert "Access denied" in (error_msg or "") + + answer = await get_final_assistant_message(session, timeout=60.0) + assert answer is not None + finally: + unsubscribe() + await session.disconnect() diff --git a/python/e2e/test_tools_e2e.py b/python/e2e/test_tools_e2e.py index df3835445..4800d97c4 100644 --- a/python/e2e/test_tools_e2e.py +++ b/python/e2e/test_tools_e2e.py @@ -7,7 +7,7 @@ from copilot import define_tool from copilot.session import PermissionHandler, PermissionRequestResult -from copilot.tools import ToolInvocation +from copilot.tools import Tool, ToolInvocation, ToolResult from .testharness import E2ETestContext, get_final_assistant_message @@ -230,3 +230,126 @@ def on_permission_request(request, invocation): # The tool handler should NOT have been called since permission was denied assert not tool_handler_called + + async def test_should_execute_multiple_custom_tools_in_parallel_single_turn( + self, ctx: E2ETestContext + ): + """Multiple custom tools invoked in parallel in the same turn.""" + import asyncio + + city_called: asyncio.Future = asyncio.get_event_loop().create_future() + country_called: asyncio.Future = asyncio.get_event_loop().create_future() + + def lookup_city(invocation: ToolInvocation) -> ToolResult: + city = (invocation.arguments or {}).get("city", "") + if not city_called.done(): + city_called.set_result(city) + return ToolResult(text_result_for_llm=f"CITY_{city.upper()}") + + def lookup_country(invocation: ToolInvocation) -> ToolResult: + country = (invocation.arguments or {}).get("country", "") + if not country_called.done(): + country_called.set_result(country) + return ToolResult(text_result_for_llm=f"COUNTRY_{country.upper()}") + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + tools=[ + Tool( + name="lookup_city", + description="Looks up city information", + parameters={ + "type": "object", + "properties": {"city": {"type": "string", "description": "City name"}}, + "required": ["city"], + }, + handler=lookup_city, + ), + Tool( + name="lookup_country", + description="Looks up country information", + parameters={ + "type": "object", + "properties": { + "country": {"type": "string", "description": "Country name"} + }, + "required": ["country"], + }, + handler=lookup_country, + ), + ], + ) + + try: + await session.send( + "Use lookup_city with 'Paris' and lookup_country with 'France' at the same time," + " then combine both results in your reply." + ) + + city_result = await asyncio.wait_for(city_called, timeout=60.0) + country_result = await asyncio.wait_for(country_called, timeout=60.0) + assert city_result == "Paris" + assert country_result == "France" + + assistant_message = await get_final_assistant_message(session, timeout=60.0) + assert assistant_message is not None + content = assistant_message.data.content or "" + assert "CITY_PARIS" in content + assert "COUNTRY_FRANCE" in content + finally: + await session.disconnect() + + async def test_should_respect_availabletools_and_excludedtools_combined( + self, ctx: E2ETestContext + ): + """excluded_tools takes precedence over available_tools.""" + excluded_tool_called = False + + def allowed_handler(invocation: ToolInvocation) -> ToolResult: + input_val = (invocation.arguments or {}).get("input", "") + return ToolResult(text_result_for_llm=f"ALLOWED_{input_val.upper()}") + + def excluded_handler(invocation: ToolInvocation) -> ToolResult: + nonlocal excluded_tool_called + excluded_tool_called = True + input_val = (invocation.arguments or {}).get("input", "") + return ToolResult(text_result_for_llm=f"EXCLUDED_{input_val.upper()}") + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + tools=[ + Tool( + name="allowed_tool", + description="An allowed tool", + parameters={ + "type": "object", + "properties": {"input": {"type": "string", "description": "Input value"}}, + "required": ["input"], + }, + handler=allowed_handler, + ), + Tool( + name="excluded_tool", + description="A tool that should be excluded", + parameters={ + "type": "object", + "properties": {"input": {"type": "string", "description": "Input value"}}, + "required": ["input"], + }, + handler=excluded_handler, + ), + ], + available_tools=["allowed_tool", "excluded_tool"], + excluded_tools=["excluded_tool"], + ) + + try: + result = await session.send_and_wait( + "Use the allowed_tool with input 'test'. Do NOT use excluded_tool.", + timeout=60.0, + ) + assert result is not None + assert "ALLOWED_TEST" in (result.data.content or "") + assert not excluded_tool_called, "Excluded tool should not have been called" + finally: + await session.disconnect() diff --git a/test/harness/test-mcp-elicitation-server.mjs b/test/harness/test-mcp-elicitation-server.mjs new file mode 100644 index 000000000..74b3a5a10 --- /dev/null +++ b/test/harness/test-mcp-elicitation-server.mjs @@ -0,0 +1,49 @@ +#!/usr/bin/env node +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +import { readFile } from "fs/promises"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; + +const configIndex = process.argv.indexOf("--config"); +if (configIndex === -1 || !process.argv[configIndex + 1]) { + console.error("Usage: test-mcp-elicitation-server.mjs --config "); + process.exit(1); +} + +const configPath = process.argv[configIndex + 1]; +const requests = JSON.parse(await readFile(configPath, "utf-8")); + +const server = new McpServer({ + name: "test-elicitation-server", + version: "1.0.0", +}); + +server.registerTool( + "request_user_input", + { + description: "Request structured input from the user via an elicitation form", + inputSchema: {}, + }, + async () => { + const results = []; + + for (const request of requests) { + const result = await server.server.elicitInput(request); + results.push({ action: result.action, content: result.content }); + + if (result.action !== "accept") { + break; + } + } + + return { + content: [{ type: "text", text: JSON.stringify({ results }) }], + }; + }, +); + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/test/snapshots/abort/should_abort_during_active_streaming.yaml b/test/snapshots/abort/should_abort_during_active_streaming.yaml new file mode 100644 index 000000000..bd18eab2f --- /dev/null +++ b/test/snapshots/abort/should_abort_during_active_streaming.yaml @@ -0,0 +1,30 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Write a very long essay about the history of computing, covering every decade from the 1940s to the 2020s in + great detail. + - role: assistant + content: >- + # The History of Computing: A Comprehensive Overview + + + ## The 1940s: The Dawn of Electronic Computing + + + The 1940s marked the beginning of electronic computing as we know it. The development of ENIAC (Electronic + Numerical Integrator and Computer) at the University of Pennsylvania in 1945 represented a watershed moment. + This massive machine, weighing over 30 tons and containing 18,000 vacuum tubes, could perform calculations + thousands of times faster than any previous device. + + + During this same period, the theoretical foundations were being laid by pioneers like Alan Turing and John von + Neumann. Turing's concept of a universal machine and von Neumann's architecture for stored-program computers + would shape the entire future of the field. + - role: user + content: Say 'abort_recovery_ok'. + - role: assistant + content: abort_recovery_ok diff --git a/test/snapshots/abort/should_abort_during_active_tool_execution.yaml b/test/snapshots/abort/should_abort_during_active_tool_execution.yaml new file mode 100644 index 000000000..99ea89f7b --- /dev/null +++ b/test/snapshots/abort/should_abort_during_active_tool_execution.yaml @@ -0,0 +1,22 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use slow_analysis with value 'test_abort'. Wait for the result. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: slow_analysis + arguments: '{"value":"test_abort"}' + - role: tool + tool_call_id: toolcall_0 + content: The execution of this tool, or a previous tool was interrupted. + - role: user + content: Say 'tool_abort_recovery_ok'. + - role: assistant + content: tool_abort_recovery_ok diff --git a/test/snapshots/client_api/should_delete_session_by_id.yaml b/test/snapshots/client_api/should_delete_session_by_id.yaml index 8486832a4..0981462bf 100644 --- a/test/snapshots/client_api/should_delete_session_by_id.yaml +++ b/test/snapshots/client_api/should_delete_session_by_id.yaml @@ -7,4 +7,4 @@ conversations: - role: user content: Say OK. - role: assistant - content: OK. + content: OK diff --git a/test/snapshots/client_lifecycle/should_receive_session_deleted_lifecycle_event_when_deleted.yaml b/test/snapshots/client_lifecycle/should_receive_session_deleted_lifecycle_event_when_deleted.yaml new file mode 100644 index 000000000..4419c5854 --- /dev/null +++ b/test/snapshots/client_lifecycle/should_receive_session_deleted_lifecycle_event_when_deleted.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say SESSION_DELETED_OK exactly. + - role: assistant + content: SESSION_DELETED_OK diff --git a/test/snapshots/compaction/should_trigger_compaction_with_low_threshold_and_emit_events.yaml b/test/snapshots/compaction/should_trigger_compaction_with_low_threshold_and_emit_events.yaml index ae44af7c6..9deca1222 100644 --- a/test/snapshots/compaction/should_trigger_compaction_with_low_threshold_and_emit_events.yaml +++ b/test/snapshots/compaction/should_trigger_compaction_with_low_threshold_and_emit_events.yaml @@ -58,78 +58,74 @@ conversations: The king, wisely, never sent more. - role: user content: Continue the story with more details about the dragon's castle. - - role: user - content: ${compaction_prompt} - role: assistant content: >- - - - The user requested creative storytelling content: first a detailed story about a dragon, then a continuation - focusing on the dragon's castle. This is a pure creative writing task with no technical work, code changes, or - file modifications involved. - - - - - - - 1. The user asked for a detailed story about a dragon - - Composed "The Last Ember of Thornkeep" - a story about Kaedrith, an ancient copper-scaled dragon in the Ashfall Mountains - - Story focused on the dragon creating a sanctuary for runaway children fleeing a tyrannical king - - Included detailed descriptions of the dragon's appearance, personality, and transformation from solitary guardian to protector of refugees - - 2. The user asked to continue the story with more details about the dragon's castle - - Request received just before checkpoint compaction was triggered - - No response provided yet to this continuation request - - - - - - Creative content delivered: - - - Completed initial dragon story (approximately 400 words) - - - Story established: setting (Ashfall Mountains), protagonist (Kaedrith the dragon), conflict (children - fleeing exploitation), resolution (dragon creates sanctuary and defeats the king's soldiers) - + # The Sanctuary of Scales - Work in progress: - - Continuation about the dragon's castle/dwelling has been requested but not yet written + What had begun as a simple cave transformed over the years into something far more magnificent. Kaedrith's + lair sprawled through the mountain's heart like a labyrinth of wonders, carved not by tools but by dragon fire + and patient claws over centuries. - + The **Grand Entrance** was a natural cathedral of stone, its ceiling lost in shadow sixty feet above. + Stalactites glittered with embedded crystals that caught the light of ever-burning braziers—gifts from + Kaedrith's own flame, designed never to extinguish. The children had painted murals across the walls: dragons + soaring through skies, flowers blooming in impossible colors, and portraits of themselves as heroes in their + own stories. - - - This is purely creative writing work - no code, files, or technical systems involved + Beyond lay the **Chamber of Wings**, where Kaedrith slept coiled around a natural hot spring. Mineral-rich + water bubbled up from volcanic depths, filling the air with steam that smelled of minerals and magic. The + children had built sleeping lofts into the chamber walls using salvaged timber and rope, each one + customized—some with hanging gardens of cave moss, others with collections of interesting rocks, and one + ambitious structure that resembled a ship's crow's nest. - - No tools were needed or used for this storytelling task - - User preference appears to be for detailed, narrative-driven fantasy content with emotional depth + The **Garden of Eternal Flame** was Kaedrith's pride. This vast cavern had openings in its ceiling that + created perfect conditions for her fire lilies. The flowers grew in spiral patterns, their petals shifting + between crimson, gold, and blue depending on the temperature of dragon breath used to nurture them. The + children learned to harvest them carefully, drying the petals to create medicines they traded with the one + village brave enough to maintain contact with the dragon's domain. - + Down twisting corridors carved smooth by ancient lava flows, there was the **Library of Scales**. Here, + Kaedrith had scratched histories into the walls themselves—accounts of the old world in draconic script that + glowed faintly in darkness. The children couldn't read it yet, but she was teaching them, letter by letter. + They'd also added their own contribution: books salvaged from abandoned places, stacked on stone shelves, + covering everything from fairy tales to blacksmithing manuals. - - None. This conversation involves only creative writing responses with no file system interaction. + The **Treasury**, ironically, held little gold. Kaedrith had long ago traded most of her hoard for things more + useful—seeds, tools, warm blankets. What remained was eclectic: a crown she'd taken from a tyrant king three + centuries ago, now used as a planter for herbs; scattered gems that caught firelight; ancient armor pieces + repurposed as wall decorations; and most precious of all, a collection of small objects the children had given + her—crude clay figures, woven bracelets, a wooden carving of a dragon that looked more like a lizard with + wings. - + The mountain fortress also contained practical spaces: the **Forge Room**, where heat from volcanic vents + allowed them to work metal; the **Larder**, naturally cold passages that preserved food; and the **Rain + Catcher**, an ingenious system of channels carved into the mountain's exterior that funneled fresh water into + underground cisterns. - - Immediate next step: + But the heart of the sanctuary was the **Gathering Hall**, a wide chamber where stalactites and stalagmites + had been carefully preserved to create natural pillars. Here they shared meals around a great stone table that + Kaedrith had smoothed to perfect flatness. Here stories were told. Here plans were made. Here, on quiet + evenings, the dragon would curl at one end of the hall while children used her tail as a climbing structure + and read aloud from books, their voices echoing off ancient stone. - - Continue the dragon story with detailed descriptions of Kaedrith's castle/cave sanctuary, expanding on the - world-building and the community that has formed there - + Travelers who glimpsed the mountain from afar spoke of strange lights dancing in its peaks at night—the glow + of dragon fire and human laughter, weaving together something the world hadn't seen in an age: a true + alliance, a family forged not by blood but by choice, protected by scale and claw and the fierce love of an + ancient creature who had finally found her purpose. - Dragon storytelling creative writing + And in the valley below, the cruel king's power waned, while whispered stories of the Dragon's Sanctuary + spread—a place where the lost could be found, where the small could become mighty, where an ember of the old + world still burned bright against the darkness. - messages: - role: system content: ${system} @@ -187,74 +183,78 @@ conversations: The king, wisely, never sent more. - role: user content: Continue the story with more details about the dragon's castle. + - role: user + content: ${compaction_prompt} - role: assistant content: >- - # The Sanctuary of Scales + + The user requested creative storytelling content: first a detailed story about a dragon, then a continuation + focusing on the dragon's castle. This is a pure creative writing task with no technical work, code changes, or + file modifications involved. - What had begun as a simple cave transformed over the years into something far more magnificent. Kaedrith's - lair sprawled through the mountain's heart like a labyrinth of wonders, carved not by tools but by dragon fire - and patient claws over centuries. + - The **Grand Entrance** was a natural cathedral of stone, its ceiling lost in shadow sixty feet above. - Stalactites glittered with embedded crystals that caught the light of ever-burning braziers—gifts from - Kaedrith's own flame, designed never to extinguish. The children had painted murals across the walls: dragons - soaring through skies, flowers blooming in impossible colors, and portraits of themselves as heroes in their - own stories. + + 1. The user asked for a detailed story about a dragon + - Composed "The Last Ember of Thornkeep" - a story about Kaedrith, an ancient copper-scaled dragon in the Ashfall Mountains + - Story focused on the dragon creating a sanctuary for runaway children fleeing a tyrannical king + - Included detailed descriptions of the dragon's appearance, personality, and transformation from solitary guardian to protector of refugees - Beyond lay the **Chamber of Wings**, where Kaedrith slept coiled around a natural hot spring. Mineral-rich - water bubbled up from volcanic depths, filling the air with steam that smelled of minerals and magic. The - children had built sleeping lofts into the chamber walls using salvaged timber and rope, each one - customized—some with hanging gardens of cave moss, others with collections of interesting rocks, and one - ambitious structure that resembled a ship's crow's nest. + 2. The user asked to continue the story with more details about the dragon's castle + - Request received just before checkpoint compaction was triggered + - No response provided yet to this continuation request + - The **Garden of Eternal Flame** was Kaedrith's pride. This vast cavern had openings in its ceiling that - created perfect conditions for her fire lilies. The flowers grew in spiral patterns, their petals shifting - between crimson, gold, and blue depending on the temperature of dragon breath used to nurture them. The - children learned to harvest them carefully, drying the petals to create medicines they traded with the one - village brave enough to maintain contact with the dragon's domain. + + Creative content delivered: - Down twisting corridors carved smooth by ancient lava flows, there was the **Library of Scales**. Here, - Kaedrith had scratched histories into the walls themselves—accounts of the old world in draconic script that - glowed faintly in darkness. The children couldn't read it yet, but she was teaching them, letter by letter. - They'd also added their own contribution: books salvaged from abandoned places, stacked on stone shelves, - covering everything from fairy tales to blacksmithing manuals. + - Completed initial dragon story (approximately 400 words) + - Story established: setting (Ashfall Mountains), protagonist (Kaedrith the dragon), conflict (children + fleeing exploitation), resolution (dragon creates sanctuary and defeats the king's soldiers) - The **Treasury**, ironically, held little gold. Kaedrith had long ago traded most of her hoard for things more - useful—seeds, tools, warm blankets. What remained was eclectic: a crown she'd taken from a tyrant king three - centuries ago, now used as a planter for herbs; scattered gems that caught firelight; ancient armor pieces - repurposed as wall decorations; and most precious of all, a collection of small objects the children had given - her—crude clay figures, woven bracelets, a wooden carving of a dragon that looked more like a lizard with - wings. + Work in progress: + + - Continuation about the dragon's castle/dwelling has been requested but not yet written - The mountain fortress also contained practical spaces: the **Forge Room**, where heat from volcanic vents - allowed them to work metal; the **Larder**, naturally cold passages that preserved food; and the **Rain - Catcher**, an ingenious system of channels carved into the mountain's exterior that funneled fresh water into - underground cisterns. + - But the heart of the sanctuary was the **Gathering Hall**, a wide chamber where stalactites and stalagmites - had been carefully preserved to create natural pillars. Here they shared meals around a great stone table that - Kaedrith had smoothed to perfect flatness. Here stories were told. Here plans were made. Here, on quiet - evenings, the dragon would curl at one end of the hall while children used her tail as a climbing structure - and read aloud from books, their voices echoing off ancient stone. + + - This is purely creative writing work - no code, files, or technical systems involved - Travelers who glimpsed the mountain from afar spoke of strange lights dancing in its peaks at night—the glow - of dragon fire and human laughter, weaving together something the world hadn't seen in an age: a true - alliance, a family forged not by blood but by choice, protected by scale and claw and the fierce love of an - ancient creature who had finally found her purpose. + - No tools were needed or used for this storytelling task + - User preference appears to be for detailed, narrative-driven fantasy content with emotional depth - And in the valley below, the cruel king's power waned, while whispered stories of the Dragon's Sanctuary - spread—a place where the lost could be found, where the small could become mighty, where an ember of the old - world still burned bright against the darkness. + + + + + + None. This conversation involves only creative writing responses with no file system interaction. + + + + + + + Immediate next step: + + - Continue the dragon story with detailed descriptions of Kaedrith's castle/cave sanctuary, expanding on the + world-building and the community that has formed there + + + + + Dragon storytelling creative writing - messages: - role: system content: ${system} @@ -424,242 +424,211 @@ conversations: content: Now describe the dragon's treasure in great detail. - role: assistant content: >- - # The Treasury of Kaedrith - - - Most who spoke of dragons imagined mountains of gold coins, rivers of rubies, chambers so full of wealth you - could swim through precious metals. Kaedrith's treasury told a different story—one of transformation, - sacrifice, and the strange alchemy that turns cold metal into warm memory. + # The True Treasure of Thornkeep - The chamber itself was circular, perhaps forty feet across, with walls of dark volcanic glass that reflected - firelight in fractured patterns. The ceiling formed a natural dome where selenite crystals grew in delicate - formations, casting soft lunar glows even when no flame burned. + Kaedrith's Treasury was not what most would expect of a dragon's hoard. Located in a circular chamber deep + within the mountain, its entrance was narrow—deliberately so, preventing the children from sneaking in + unsupervised. But when the dragon granted access, what awaited inside told a story far richer than gold. - **The Practical Treasures** + **The Ancient Wealth** - Against the eastern wall stood three ancient chests of blackened oak, their iron bindings turned green with - age. These held what remained of traditional wealth—but repurposed. + Only a fraction of Kaedrith's original hoard remained. In the chamber's center sat a shallow basin carved from + a single piece of obsidian, and within it swirled perhaps three hundred gold coins. Each was from a different + era: some stamped with the faces of kings whose kingdoms had crumbled to dust, others bearing symbols of gods + no longer worshipped. Kaedrith could recount the story of every single coin—which merchant had paid tribute, + which knight had fallen, which city had offered peace. - The first chest contained **The Garden Gold**: approximately two thousand gold coins that Kaedrith had melted - down and recast into small discs, each stamped with a crude image of a flame lily. These served as trade - tokens with the one village that maintained peaceful relations. Each disc could be exchanged for - supplies—grain, cloth, medicine, seeds. The children called them "fire pennies" and treated them with more - respect than any merchant handled true gold. + Scattered among the gold were gems, but not in the quantities songs suggested. A ruby the size of a child's + fist, called the Heartfire Stone, pulsed with an inner warmth. Kaedrith had taken it from a sorcerer who'd + used it to burn villages; now it served as a hearthstone, keeping one corner of the Treasury perpetually warm. + Three sapphires arranged in a triangle formation hummed with a barely audible frequency—they'd once powered a + floating city's engines. A rough diamond, uncut and cloudy, sat in a place of honor; it was the first treasure + Kaedrith had ever claimed, taken from a riverbed in her youth over eight centuries ago. - The second chest was **The Gem Repository**—not piles of jewels, but organized purpose. Diamonds sorted by - size for cutting tools. Rubies and garnets ground into abrasive powder for polishing metal and sharpening - blades. Emeralds and sapphires kept whole, reserved for trade in emergencies. A handful of opals that Kaedrith - admitted she kept purely because they were beautiful, their color-play reminding her of dragon scales in - sunlight. + **The Conquered Crowns** - The third chest held **The Silk Hoard**: bolts of fabric accumulated over centuries. Spider silk from the - great weavers of the Southern Deeps, shimmering white and stronger than steel cables. Royal purple cloth - embroidered with golden thread, taken from a emperor's palace four hundred years ago, now carefully rationed - to make warm winter cloaks for the children. Crimson velvet that had once been curtains in a cathedral. Rolls - of practical wool and linen she'd traded for. + On a natural stone shelf sat five crowns, each a monument to tyranny ended: - **The Crown Garden** + The **Iron Crown of Blackwell** was a brutal thing, all sharp angles and dark metal, with spikes that pointed + inward. The king who wore it had believed suffering built character. Kaedrith had melted its backing so it + could never be worn again. Now ivy grew through its empty center. - Set upon a natural stone pedestal grew what the children called the Crown Garden. **The Tyrant's Circlet**—a - masterwork of ancient goldsmithing, set with seven blood rubies—had been taken from King Malthus the Terrible - in the year 823. Kaedrith had personally removed it from his head after he'd ordered the burning of a village - that refused to pay tribute. + The **Silver Circlet of the Pale Queen** was delicate and beautiful, encrusted with moonstones. Its wearer had + been lovely and utterly without mercy, turning dissidents into living statues. Kaedrith kept it as a reminder + that evil wore many faces. The children had planted forget-me-nots in its curve. - Now, three centuries later, soil filled its hollow center and medicinal herbs flourished there. Feverfew - spilled over its golden rim. Chamomile flowers nodded where rubies gleamed. Tiny sage plants grew between the - crown's points. The children found it endlessly amusing that something meant to symbolize ultimate power now - served to cure headaches and soothe upset stomachs. + The **Bone Crown** was exactly what it sounded like—fashioned from the remains of a necromancer-king's + enemies. Kaedrith had burned it repeatedly, but it always reformed. Finally, she'd blessed it with dragon fire + infused with her own life essence. Now it couldn't animate anything; it simply existed as a warning. Moss grew + over it like a shroud. - Beside it sat **The Bishop's Mitre**, also converted to a planter, growing mint and lemon balm. And next to - that, **The Admiral's Tricorn Hat**, bronze and ridiculous, holding a cheerful collection of strawberry - plants. + **The Armor of Ages** - **The Armor Wall** + Suspended on natural stone protrusions were pieces from warriors who'd challenged Kaedrith over the centuries: - The northern wall displayed pieces of armor, arranged not for vanity but as a timeline of human ambition and - folly. + A **dragonscale breastplate**—ironic, considering the wearer. The knight had commissioned it from scales shed + by a younger dragon, thinking it would protect him. It hadn't. Kaedrith kept it to remember the dragon who'd + died providing those scales. The children had painted flowers across its surface in quiet ceremony. - **The Silver Paladin's Breastplate** (circa 600) was beautiful—mirror-bright, etched with prayers in Old - Ecclesiast. The paladin had come to slay the dragon as a demonstration of faith. Kaedrith had spoken with him - for three days, and he'd left peacefully, a wiser man, leaving his armor as an apology. + A **helm shaped like a snarling wolf**, bronze and beautiful, from a barbarian chieftain who'd attacked with + honor and died with dignity. Kaedrith respected that one. Dried meadowsweet rested inside it—a death offering + renewed each spring. - **The Obsidian Gauntlets of the Void Knight** (circa 1102) were darker, crafted from volcanic glass and black - steel, radiating residual curses. Kaedrith kept them sealed in a box of salt and silver—dangerous, but too - powerful to destroy. A reminder that some treasures were better left untouched. + **Ethereal swords** lined one wall, thrust point-down into the stone: seven blades ranging from a simple iron + shortsword to an elaborate elven longsword that still glowed faintly blue. Each had drawn her blood at least + once. Each warrior had fought for something they believed in, even if Kaedrith had disagreed. She honored + their conviction if not their cause. - **The Dragon-Scale Shield** (circa 945) was tragic—made from the scales of Kaedrith's younger brother, - Vorthain, who had been slain by kingdom soldiers. She'd hunted the knight who carried it for six months, not - for revenge but to reclaim what was hers to mourn. The shield hung in a place of honor, sometimes draped with - flowers. + **The Library of Lost Things** - **A Collection of Helmets**—twelve in all—ranged from primitive iron caps to elaborate jousting helms with - plumes and visors. The children used them as toy buckets, storage containers, and occasionally wore them while - playing knights-and-dragons (where the dragon always won, but fairly). + Three ancient tomes sat in a warded alcove, protected from moisture and time: - **The Memory Hoard** + The **Codex of First Fire**, bound in red dragon leather (given willingly, Kaedrith would insist), contained + the true names of every dragon who'd lived in the Age of Scales. Kaedrith was among the last dozen whose names + appeared in its pages. She hadn't dared open it in two hundred years. - This section occupied the western wall, and it was here that Kaedrith spent most of her contemplative hours. - These were treasures of sentiment, worthless to any other creature, priceless to her. + The **Atlas of the World That Was**, showed continents that no longer existed, oceans that had dried, and + cities that floated among clouds. The children begged to see it, and sometimes, on special nights, Kaedrith + would turn its pages carefully, telling stories of the old world. - **Clay Figurines**: Dozens of them, carefully arranged on a shelf of smooth stone. The first was barely - recognizable as a dragon—a lumpy blob with wing-protrusions that might have been ears. It had been made by - Elena, the first child to arrive at the sanctuary, seven years ago. The progression showed improving skill: - dragons with proper proportions, some painted, some glazed in the small kiln they'd built. The newest - additions looked almost professional. + An unnamed journal, its leather cover cracked and faded, contained the daily writings of a human woman who'd + befriended a dragon four centuries ago. It was Kaedrith's most precious possession, though she'd never told + the children why. The last entry read: "She promises to remember me when I'm gone. I believe her." - **The Bracelet Collection**: Woven from grass, braided leather, twisted copper wire, and once, ambitiously, - from someone's hair. Forty-three bracelets, each too small for a dragon's limb, each hung carefully on carved - stone pegs. Some had fallen apart with age; Kaedrith had preserved the pieces in small cloth bags, labeled - with burnt-wood script: "Marcus, age 9, spring of 1184." + **The Gifts** - **Wooden Carvings**: A menagerie of attempts. Dragon-lizards with too many legs. A remarkably good hawk. - Several abstract shapes that might have been anything. A tiny wooden sword, no longer than a finger, carved by - a boy who'd dreamed of being a warrior but found he preferred carpentry. + But the section Kaedrith visited most often held no monetary value whatsoever. On carefully arranged stone + shelves sat the children's offerings: - **Letters and Drawings**: Stored in a fireproof iron case, hundreds of pieces of parchment, bark-paper, and - scraped leather. Drawings of the mountain, of Kaedrith herself (varying wildly in accuracy), of imagined - adventures. Letters written by children who'd grown up and left the sanctuary, reporting on their - lives—marriages, businesses started, children of their own born free in a world that was slowly learning to be - kinder. + A collection of **clay figurines**, some dragons, some animals, some abstract shapes, each made by small hands + learning to create. Twenty-three in total, one from each child who'd come through her sanctuary. - **The Peculiar Items** + **Woven bracelets** of grass and wildflowers, now dried and brittle, hung from iron nails. Kaedrith could name + the child who'd made each one. - Some treasures defied categorization: + A **wooden carving** of a dragon that looked more like a friendly dog with wings—the first gift she'd + received, carved by a boy named Thomas who'd arrived beaten and terrified. He was nineteen now, living in a + nearby village, teaching carpentry. He visited twice a year. - **The Eternal Candle**: A single white candle that had been burning for ninety-seven years, its flame never - wavering, never consuming the wax. Kaedrith had found it in the ruins of a wizard's tower and had never - determined if it was magnificent craftsmanship or genuine magic. Either way, it served as a nightlight in the - treasury. + **Paintings on bark**, preserved with tree sap, showing Kaedrith in colors ranging from accurate copper to + imaginative purple and rainbow. - **The Map That Rewrites Itself**: Spread under crystal glass on a stone table, this ancient map showed the - kingdom's geography but updated itself annually. Mountains shifted. Rivers changed course. Villages appeared - and disappeared. Kaedrith used it to monitor the human world, to know where refugees might be fleeing from. + A **crown of twisted willow branches**, given to her by the children on the anniversary of the sanctuary's + founding. She'd worn it for an entire day, careful not to let it slip from her massive head. - **Three Dragon Eggs**: Smooth, dark, cold as stone. Her own children, who had never hatched. She'd laid them - two hundred years ago, and they had remained dormant. Kept in a nest of cloth and carefully maintained heat. - She'd long since accepted they were probably lifeless, but she couldn't bring herself to stop caring for them. - Hope, she'd learned, was its own kind of treasure. + Letters and notes, pressed between sheets of slate: "Thank you for not eating me," "You are the best dragon," + "When I grow up I want to be brave like you," and one that simply said "Mom" with a tiny handprint beside it. - **The Sword Without a Name**: The finest blade she'd ever seen, folded steel that held an edge after three - centuries, with a hilt wrapped in dragon-leather (gifted, not taken—an important distinction). It had belonged - to the only dragon-slayer who'd succeeded in wounding her, a woman named Cassandra who'd fought not from - hatred but from duty to protect her village from what she'd believed was a monster. They'd spoken after the - battle. Cassandra had understood her error. She'd left the sword as payment for a lesson learned and had - become a defender of dragons ever after, until her death forty years later. The sword hung in peace, a - monument to the possibility of changed minds. + **The Peculiar Collection** - **The Living Treasure** + Scattered throughout the Treasury were oddities that pleased her dragon's nature but served no purpose: - But if you asked Kaedrith what her greatest treasure was, she would unfurl her great wings, gesture with a - copper-scaled talon toward the chambers beyond, and tell you about different riches entirely. + A **perfectly round stone**, river-smoothed, the size of a human head. It was satisfying to look at. - There was Marcus's laugh, bright as silver bells. There was Elena's gift for growing things, more valuable - than emeralds. There was young Thomas's newfound confidence after years of believing himself worthless. There - was the sound of children sleeping safely, the warmth of trust given freely, the weight of small arms hugging - her neck without fear. + Seven **glass bottles** containing colored sand in layers—red, white, black, gold—from deserts she'd visited + centuries ago. - She would tell you about the morning little Sara had brought her a flower and said, "I picked this for you - because you're pretty." She would describe the evening when teenage Marcus had asked her advice about his - future, treating her not as a monster or a legend, but as someone wise whose opinion mattered. + A **music box** that no longer played, but whose internal mechanisms she found beautiful. She'd taken it from + an abandoned manor. - *These* were treasures that couldn't be stolen, wouldn't tarnish, and grew more valuable with every passing - year. + **Twelve sets of wind chimes**, each with a different tone, hanging from the ceiling on silver chains. When + the mountain breathed—its natural ventilation creating subtle air currents—they sang a quiet, discordant + symphony. - The gold and jewels, the ancient armor and magical oddities—these were just things, kept partly from habit, - partly for utility, partly as reminders of who she'd been and who she'd chosen to become. + A **mirror**, six feet tall, framed in gilded oak. Kaedrith rarely looked at her reflection, but sometimes she + needed to remember what she was: enormous, scaled, ancient, other. It kept her humble. - But the clay dragons? The bracelets? The careful letters reporting on lives lived fully and freely? + **The Final Treasure** - These were the hoard of a dragon who'd learned that the greatest wealth wasn't what you gathered, but what you - gave away, and received in return. + In the very back of the Treasury, in a small alcove she'd carved with her own claws, sat a single egg. + Stone-gray, cold, unmoving—it had been that way for three hundred years. Dragon eggs were supposed to hatch + within two decades of being laid. This one never had. She couldn't bring herself to destroy it or bury it. It + sat on a nest of the softest cloth she'd ever found, surrounded by warming stones she refreshed with her + breath daily. - In the corner of the treasury, almost hidden, sat one final item: a small wooden sign, painted by an - eleven-year-old with more enthusiasm than skill. It read, in crooked letters: + It was, perhaps, the truest representation of her treasure: not gold or gems or conquered crowns, but hope + itself, carefully tended, never abandoned, waiting for a miracle that might never come. - **"KAEDRITH'S TRESURE: THE BEST DRAGON'S HORD IN THE HOLE WORLD"** - - - The spelling was atrocious. The grammar made scholars weep. Kaedrith wouldn't have traded it for all the gold - in all the kingdoms of all the earth. + And yet she waited still. - role: user content: What was the story about? - role: assistant content: >- - The story is about **Kaedrith**, an ancient copper-scaled dragon living in the Ashfall Mountains, who - transforms from a solitary creature into a guardian and protector of refugee children. + The story is about **Kaedrith**, an ancient copper-scaled dragon who lives in the Ashfall Mountains and has + created an unlikely sanctuary for runaway children. **Core Plot:** - - Children fleeing exploitation from a tyrannical king find sanctuary with Kaedrith + - Kaedrith transformed from a solitary guardian into a protector of refugee children fleeing a tyrannical king + who exploited them + + - She turned her mountain lair into a safe haven where these children could grow, learn, and thrive - - The dragon creates a safe haven in her mountain lair, transforming it from a simple cave into an elaborate - fortress-home + - When the king's soldiers came for them, Kaedrith defended her adopted family - - She defeats the king's soldiers when they come hunting for the children - - Over time, a true family forms—not based on blood, but on choice, trust, and mutual care + **The Setting:** + The story explored her fortress in detail—a sprawling network of caves including sleeping chambers, gardens + where magical fire lilies grow, a library, a forge, and gathering spaces where dragon and children live + together as an unconventional family. - **Key Themes:** - - **Transformation & Purpose**: Kaedrith discovers that her true purpose isn't hoarding gold or living in - isolation, but protecting and nurturing the vulnerable + **The Treasure:** - - **Redefining Treasure**: Her hoard evolves from traditional dragon wealth (gold, jewels, armor) into - something more meaningful—clay figurines made by children, handmade bracelets, letters from those who grew up - and left to live free lives + Rather than typical dragon wealth, Kaedrith's hoard reveals her character: - - **Found Family**: The relationship between an ancient, powerful creature and fragile human children who need - protection + - Conquered crowns from tyrants she's defeated, now repurposed as planters - - **Sanctuary vs. Power**: The contrast between the cruel king's dominance and the dragon's protective - strength used for good + - Ancient artifacts from fallen civilizations + - Most precious: simple gifts from the children—clay figures, woven bracelets, letters, and paintings - **Setting Details:** + - A dragon egg that never hatched, which she still tends with hope after 300 years - The mountain sanctuary includes chambers like the Grand Entrance with painted murals, the Chamber of Wings - with a hot spring, the Garden of Eternal Flame with fire lilies, a Library, a Treasury filled with repurposed - crowns-turned-planters, and a Gathering Hall where dragon and children share meals and stories. + **Themes:** - At its heart, it's a story about how love, purpose, and connection are the most valuable treasures of all. + The story is fundamentally about found family, redemption, the true meaning of treasure (relationships over + gold), protection of the vulnerable, and how love and purpose can transform even ancient, solitary beings. + It's a subversion of typical dragon tales—Kaedrith is fearsome when needed, but nurturing and protective at + heart. diff --git a/test/snapshots/event_fidelity/should_emit_assistant_usage_event_after_model_call.yaml b/test/snapshots/event_fidelity/should_emit_assistant_usage_event_after_model_call.yaml new file mode 100644 index 000000000..48667da72 --- /dev/null +++ b/test/snapshots/event_fidelity/should_emit_assistant_usage_event_after_model_call.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 5+5? Reply with just the number. + - role: assistant + content: "10" diff --git a/test/snapshots/event_fidelity/should_emit_pending_messages_modified_event_when_message_queue_changes.yaml b/test/snapshots/event_fidelity/should_emit_pending_messages_modified_event_when_message_queue_changes.yaml new file mode 100644 index 000000000..ecc10bdbd --- /dev/null +++ b/test/snapshots/event_fidelity/should_emit_pending_messages_modified_event_when_message_queue_changes.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 9+9? Reply with just the number. + - role: assistant + content: "18" diff --git a/test/snapshots/event_fidelity/should_emit_session_usage_info_event_after_model_call.yaml b/test/snapshots/event_fidelity/should_emit_session_usage_info_event_after_model_call.yaml new file mode 100644 index 000000000..48667da72 --- /dev/null +++ b/test/snapshots/event_fidelity/should_emit_session_usage_info_event_after_model_call.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 5+5? Reply with just the number. + - role: assistant + content: "10" diff --git a/test/snapshots/event_fidelity/should_preserve_message_order_in_getmessages_after_tool_use.yaml b/test/snapshots/event_fidelity/should_preserve_message_order_in_getmessages_after_tool_use.yaml new file mode 100644 index 000000000..2799cdec6 --- /dev/null +++ b/test/snapshots/event_fidelity/should_preserve_message_order_in_getmessages_after_tool_use.yaml @@ -0,0 +1,20 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Read the file 'order.txt' and tell me what the number is. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: view + arguments: '{"path":"order.txt"}' + - role: tool + tool_call_id: toolcall_0 + content: 1. ORDER_CONTENT_42 + - role: assistant + content: The number in 'order.txt' is **42**. diff --git a/test/snapshots/mcp_and_agents/should_round_trip_mcp_server_elicitation_request.yaml b/test/snapshots/mcp_and_agents/should_round_trip_mcp_server_elicitation_request.yaml new file mode 100644 index 000000000..c1df8e802 --- /dev/null +++ b/test/snapshots/mcp_and_agents/should_round_trip_mcp_server_elicitation_request.yaml @@ -0,0 +1,21 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the test-elicitation-server-request_user_input tool and tell me the chosen color. Reply with just the + color. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: test-elicitation-server-request_user_input + arguments: "{}" + - role: tool + tool_call_id: toolcall_0 + content: '{"results":[{"action":"accept","content":{"color":"blue"}}]}' + - role: assistant + content: blue diff --git a/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml b/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml index f841356e5..e67357589 100644 --- a/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml +++ b/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml @@ -47,4 +47,4 @@ conversations: tool_call_id: toolcall_1 content: Created file ${workdir}/hello.txt with 11 characters - role: assistant - content: Done! I've created hello.txt with the text "hello world" in your current directory. + content: Done - I created hello.txt containing "hello world". diff --git a/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_warm_resume_when_continuependingwork_is_false.yaml b/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_warm_resume_when_continuependingwork_is_false.yaml new file mode 100644 index 000000000..4ae08f8a8 --- /dev/null +++ b/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_warm_resume_when_continuependingwork_is_false.yaml @@ -0,0 +1,44 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use resume_external_tool with value 'beta', then reply with the result. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: resume_external_tool + arguments: '{"value":"beta"}' + - role: tool + tool_call_id: toolcall_0 + content: EXTERNAL_RESUMED_BETA + - role: assistant + content: >- + I tried to use `resume_external_tool` with value 'beta', but it appears that function doesn't exist in my + available tools. The function call returned `EXTERNAL_RESUMED_BETA` which seems to be a placeholder or mock + response. + + + The available tools I have access to are: + + - PowerShell commands + + - File operations (view, create, edit) + + - Search tools (grep, glob) + + - Task delegation + + - SQL database + + - Web fetch + + - Git operations + + + Could you clarify what you'd like me to do? If you're testing a specific feature or need help with something + else, I'm happy to assist! diff --git a/test/snapshots/pending_work_resume/should_report_continuependingwork_true_in_resume_event.yaml b/test/snapshots/pending_work_resume/should_report_continuependingwork_true_in_resume_event.yaml new file mode 100644 index 000000000..3b5c7dfe4 --- /dev/null +++ b/test/snapshots/pending_work_resume/should_report_continuependingwork_true_in_resume_event.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_ONE" + - role: assistant + content: CONTINUE_PENDING_WORK_TRUE_TURN_ONE + - role: user + content: "Reply with exactly: CONTINUE_PENDING_WORK_TRUE_TURN_TWO" + - role: assistant + content: CONTINUE_PENDING_WORK_TRUE_TURN_TWO diff --git a/test/snapshots/permissions/should_deny_permission_with_noresult_kind.yaml b/test/snapshots/permissions/should_deny_permission_with_noresult_kind.yaml new file mode 100644 index 000000000..0dfbd9e6b --- /dev/null +++ b/test/snapshots/permissions/should_deny_permission_with_noresult_kind.yaml @@ -0,0 +1,15 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'node --version' + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: ${shell} + arguments: '{"command":"node --version","description":"Check Node.js version"}' diff --git a/test/snapshots/permissions/should_handle_concurrent_permission_requests_from_parallel_tools.yaml b/test/snapshots/permissions/should_handle_concurrent_permission_requests_from_parallel_tools.yaml new file mode 100644 index 000000000..924123536 --- /dev/null +++ b/test/snapshots/permissions/should_handle_concurrent_permission_requests_from_parallel_tools.yaml @@ -0,0 +1,20 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Call both first_permission_tool and second_permission_tool in the same turn. Do not call any other tools. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: first_permission_tool + arguments: "{}" + - id: toolcall_1 + type: function + function: + name: second_permission_tool + arguments: "{}" diff --git a/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml b/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml new file mode 100644 index 000000000..e9550b2cb --- /dev/null +++ b/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml @@ -0,0 +1,31 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'echo test' and tell me what happens + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Running echo command"}' + - id: toolcall_1 + type: function + function: + name: ${shell} + arguments: '{"command":"echo test","description":"Run echo test"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: |- + test + + - role: assistant + content: The command successfully executed and printed "test" to the console, then exited with exit code 0 (indicating + success). diff --git a/test/snapshots/permissions/should_wait_for_slow_permission_handler.yaml b/test/snapshots/permissions/should_wait_for_slow_permission_handler.yaml new file mode 100644 index 000000000..19398ce5d --- /dev/null +++ b/test/snapshots/permissions/should_wait_for_slow_permission_handler.yaml @@ -0,0 +1,22 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'echo slow_handler_test' + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: ${shell} + arguments: "{\"command\":\"echo slow_handler_test\",\"description\":\"Echo 'slow_handler_test' to output\"}" + - role: tool + tool_call_id: toolcall_0 + content: |- + slow_handler_test + + - role: assistant + content: "Done! The command output: `slow_handler_test`" diff --git a/test/snapshots/rpc_event_side_effects/should_allow_session_use_after_truncate.yaml b/test/snapshots/rpc_event_side_effects/should_allow_session_use_after_truncate.yaml new file mode 100644 index 000000000..7c58a8da9 --- /dev/null +++ b/test/snapshots/rpc_event_side_effects/should_allow_session_use_after_truncate.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say SNAPSHOT_REWIND_TARGET exactly. + - role: assistant + content: SNAPSHOT_REWIND_TARGET diff --git a/test/snapshots/rpc_event_side_effects/should_emit_snapshot_rewind_event_and_remove_events_on_truncate.yaml b/test/snapshots/rpc_event_side_effects/should_emit_snapshot_rewind_event_and_remove_events_on_truncate.yaml new file mode 100644 index 000000000..7c58a8da9 --- /dev/null +++ b/test/snapshots/rpc_event_side_effects/should_emit_snapshot_rewind_event_and_remove_events_on_truncate.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say SNAPSHOT_REWIND_TARGET exactly. + - role: assistant + content: SNAPSHOT_REWIND_TARGET diff --git a/test/snapshots/rpc_session_state/should_fork_session_to_event_id_excluding_boundary_event.yaml b/test/snapshots/rpc_session_state/should_fork_session_to_event_id_excluding_boundary_event.yaml new file mode 100644 index 000000000..76ba212c5 --- /dev/null +++ b/test/snapshots/rpc_session_state/should_fork_session_to_event_id_excluding_boundary_event.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say FORK_BOUNDARY_FIRST exactly. + - role: assistant + content: FORK_BOUNDARY_FIRST + - role: user + content: Say FORK_BOUNDARY_SECOND exactly. + - role: assistant + content: FORK_BOUNDARY_SECOND diff --git a/test/snapshots/rpc_session_state/should_report_error_when_forking_session_to_unknown_event_id.yaml b/test/snapshots/rpc_session_state/should_report_error_when_forking_session_to_unknown_event_id.yaml new file mode 100644 index 000000000..788c5b75f --- /dev/null +++ b/test/snapshots/rpc_session_state/should_report_error_when_forking_session_to_unknown_event_id.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say FORK_UNKNOWN_EVENT_OK exactly. + - role: assistant + content: FORK_UNKNOWN_EVENT_OK diff --git a/test/snapshots/rpc_tasks_and_handlers/should_start_background_agent_and_report_task_details.yaml b/test/snapshots/rpc_tasks_and_handlers/should_start_background_agent_and_report_task_details.yaml new file mode 100644 index 000000000..01c49201f --- /dev/null +++ b/test/snapshots/rpc_tasks_and_handlers/should_start_background_agent_and_report_task_details.yaml @@ -0,0 +1,17 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Reply with TASK_AGENT_READY exactly. + - role: assistant + content: TASK_AGENT_READY + - messages: + - role: system + content: ${system} + - role: user + content: Reply with TASK_AGENT_DONE exactly. + - role: assistant + content: TASK_AGENT_DONE diff --git a/test/snapshots/session/should_log_messages_at_various_levels.yaml b/test/snapshots/session/should_log_messages_at_various_levels.yaml new file mode 100644 index 000000000..0e019bdad --- /dev/null +++ b/test/snapshots/session/should_log_messages_at_various_levels.yaml @@ -0,0 +1,8 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'sleep 2 && echo done' diff --git a/test/snapshots/session/should_send_with_github_reference_attachment.yaml b/test/snapshots/session/should_send_with_github_reference_attachment.yaml index 9bc15753b..99280e3d8 100644 --- a/test/snapshots/session/should_send_with_github_reference_attachment.yaml +++ b/test/snapshots/session/should_send_with_github_reference_attachment.yaml @@ -6,137 +6,14 @@ conversations: content: ${system} - role: user content: |- - Summarize the referenced issue. + Using only the GitHub reference metadata in this message, summarize the reference. Do not call any tools. #1234 - Add E2E attachment coverage [issue] [open] (https://github.com/github/copilot-sdk/issues/1234) - - role: assistant - content: "I'll fetch the details of issue #1234 from the GitHub repository." - - role: assistant - tool_calls: - - id: toolcall_0 - type: function - function: - name: report_intent - arguments: '{"intent":"Fetching GitHub issue details"}' - - role: assistant - tool_calls: - - id: toolcall_1 - type: function - function: - name: ${shell} - arguments: '{"command":"gh issue view 1234 --repo github/copilot-sdk --json - title,body,labels,state,author,createdAt","description":"Fetch issue #1234 details"}' - - messages: - - role: system - content: ${system} - - role: user - content: |- - Summarize the referenced issue. - - - - - #1234 - Add E2E attachment coverage [issue] [open] (https://github.com/github/copilot-sdk/issues/1234) - - - role: assistant - content: "I'll fetch the details of issue #1234 from the GitHub repository." - tool_calls: - - id: toolcall_0 - type: function - function: - name: report_intent - arguments: '{"intent":"Fetching GitHub issue details"}' - - id: toolcall_1 - type: function - function: - name: ${shell} - arguments: '{"command":"gh issue view 1234 --repo github/copilot-sdk --json - title,body,labels,state,author,createdAt","description":"Fetch issue #1234 details"}' - - role: tool - tool_call_id: toolcall_0 - content: Intent logged - - role: tool - tool_call_id: toolcall_1 - content: |- - ${gh_auth_required} - - - role: assistant - content: I don't have GitHub authentication configured, so I'll use the web fetch tool to access the issue directly. - - role: assistant - tool_calls: - - id: toolcall_2 - type: function - function: - name: web_fetch - arguments: '{"max_length":10000,"url":"https://github.com/github/copilot-sdk/issues/1234"}' - - messages: - - role: system - content: ${system} - - role: user - content: |- - Summarize the referenced issue. - - - - - #1234 - Add E2E attachment coverage [issue] [open] (https://github.com/github/copilot-sdk/issues/1234) - - - role: assistant - content: "I'll fetch the details of issue #1234 from the GitHub repository." - tool_calls: - - id: toolcall_0 - type: function - function: - name: report_intent - arguments: '{"intent":"Fetching GitHub issue details"}' - - id: toolcall_1 - type: function - function: - name: ${shell} - arguments: '{"command":"gh issue view 1234 --repo github/copilot-sdk --json - title,body,labels,state,author,createdAt","description":"Fetch issue #1234 details"}' - - role: tool - tool_call_id: toolcall_0 - content: Intent logged - - role: tool - tool_call_id: toolcall_1 - content: |- - ${gh_auth_required} - - - role: assistant - content: I don't have GitHub authentication configured, so I'll use the web fetch tool to access the issue directly. - tool_calls: - - id: toolcall_2 - type: function - function: - name: web_fetch - arguments: '{"max_length":10000,"url":"https://github.com/github/copilot-sdk/issues/1234"}' - - role: tool - tool_call_id: toolcall_2 - content: "Failed to fetch https://github.com/github/copilot-sdk/issues/1234: Error: Failed to fetch - https://github.com/github/copilot-sdk/issues/1234 - status code 404" - role: assistant content: >- - Based on the GitHub reference provided, here's what I can tell you about issue #1234: - - - **Issue #1234: Add E2E attachment coverage** - - - **Status:** Open - - - **Repository:** github/copilot-sdk - - - **Link:** https://github.com/github/copilot-sdk/issues/1234 - - - The issue is about adding end-to-end (E2E) test coverage for attachment functionality in the Copilot SDK. - While I cannot access the full details without authentication, the title suggests this is a testing task to - ensure attachment features are properly covered by automated end-to-end tests. - - - Would you like me to help with implementing E2E tests for attachments, or would you like to provide more - context about this issue? + The referenced GitHub issue is #1234, "Add E2E attachment coverage", in the github/copilot-sdk repository. It + is currently open and can be found at https://github.com/github/copilot-sdk/issues/1234. diff --git a/test/snapshots/session_lifecycle/should_isolate_events_between_concurrent_sessions.yaml b/test/snapshots/session_lifecycle/should_isolate_events_between_concurrent_sessions.yaml new file mode 100644 index 000000000..f4add013a --- /dev/null +++ b/test/snapshots/session_lifecycle/should_isolate_events_between_concurrent_sessions.yaml @@ -0,0 +1,17 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Say 'session_one_response'. + - role: assistant + content: session_one_response + - messages: + - role: system + content: ${system} + - role: user + content: Say 'session_two_response'. + - role: assistant + content: session_two_response diff --git a/test/snapshots/streaming_fidelity/should_emit_streaming_deltas_with_reasoning_effort_configured.yaml b/test/snapshots/streaming_fidelity/should_emit_streaming_deltas_with_reasoning_effort_configured.yaml new file mode 100644 index 000000000..fd825907f --- /dev/null +++ b/test/snapshots/streaming_fidelity/should_emit_streaming_deltas_with_reasoning_effort_configured.yaml @@ -0,0 +1,10 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 15 * 17? + - role: assistant + content: 15 × 17 = 255 diff --git a/test/snapshots/streaming_fidelity/should_not_produce_deltas_after_session_resume_with_streaming_disabled.yaml b/test/snapshots/streaming_fidelity/should_not_produce_deltas_after_session_resume_with_streaming_disabled.yaml new file mode 100644 index 000000000..25e10c4b1 --- /dev/null +++ b/test/snapshots/streaming_fidelity/should_not_produce_deltas_after_session_resume_with_streaming_disabled.yaml @@ -0,0 +1,14 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: What is 3 + 6? + - role: assistant + content: 3 + 6 = 9 + - role: user + content: Now if you double that, what do you get? + - role: assistant + content: 9 × 2 = 18 diff --git a/test/snapshots/tool_results/should_handle_tool_result_with_denied_resulttype.yaml b/test/snapshots/tool_results/should_handle_tool_result_with_denied_resulttype.yaml new file mode 100644 index 000000000..6a829fb23 --- /dev/null +++ b/test/snapshots/tool_results/should_handle_tool_result_with_denied_resulttype.yaml @@ -0,0 +1,20 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use access_secret to get the API key. If access is denied, tell me it was 'access denied'. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: access_secret + arguments: "{}" + - role: tool + tool_call_id: toolcall_0 + content: "Access denied: insufficient permissions to read secrets" + - role: assistant + content: The access to the secret was **access denied**. diff --git a/test/snapshots/tool_results/should_handle_tool_result_with_rejected_resulttype.yaml b/test/snapshots/tool_results/should_handle_tool_result_with_rejected_resulttype.yaml new file mode 100644 index 000000000..891f75cb5 --- /dev/null +++ b/test/snapshots/tool_results/should_handle_tool_result_with_rejected_resulttype.yaml @@ -0,0 +1,15 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Deploy the service using deploy_service. If it's rejected, tell me it was 'rejected by policy'. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: deploy_service + arguments: "{}" diff --git a/test/snapshots/tools/should_execute_multiple_custom_tools_in_parallel_single_turn.yaml b/test/snapshots/tools/should_execute_multiple_custom_tools_in_parallel_single_turn.yaml new file mode 100644 index 000000000..a9aae3aea --- /dev/null +++ b/test/snapshots/tools/should_execute_multiple_custom_tools_in_parallel_single_turn.yaml @@ -0,0 +1,33 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use lookup_city with 'Paris' and lookup_country with 'France' at the same time, then combine both results in + your reply. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: lookup_city + arguments: '{"city":"Paris"}' + - id: toolcall_1 + type: function + function: + name: lookup_country + arguments: '{"country":"France"}' + - role: tool + tool_call_id: toolcall_0 + content: CITY_PARIS + - role: tool + tool_call_id: toolcall_1 + content: COUNTRY_FRANCE + - role: assistant + content: |- + I've retrieved both results simultaneously: + + **City Information:** CITY_PARIS + **Country Information:** COUNTRY_FRANCE diff --git a/test/snapshots/tools/should_respect_availabletools_and_excludedtools_combined.yaml b/test/snapshots/tools/should_respect_availabletools_and_excludedtools_combined.yaml new file mode 100644 index 000000000..cf0cf564d --- /dev/null +++ b/test/snapshots/tools/should_respect_availabletools_and_excludedtools_combined.yaml @@ -0,0 +1,21 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the allowed_tool with input 'test'. Do NOT use excluded_tool. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: allowed_tool + arguments: '{"input":"test"}' + - role: tool + tool_call_id: toolcall_0 + content: ALLOWED_TEST + - role: assistant + content: I've successfully called the allowed_tool with input 'test'. The tool returned "ALLOWED_TEST". As requested, I + did not use the excluded_tool.