From a62d6f6fa18119189049921f62465fe3f8b7f1ef Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 1 Jun 2026 16:21:00 +0200 Subject: [PATCH 1/2] fix(mcp): latch user-declined OAuth so the dialog does not re-pop The MCP SDK fires several initialize-stage RPCs concurrently. Each one that hits a 401 queues on oauthFlowMu and runs the OAuth flow. When the user clicks Cancel, the goroutine holding the mutex returns the decline and releases the mutex; the next queued goroutine sees no valid token and starts a fresh OAuth flow, re-emitting the elicitation the user just dismissed. The single-flight added in 8bc1fa90e coalesces concurrent attempts but does not latch a decline. Two changes to fix the loop without altering the happy-path flow: 1. New sentinel *OAuthDeclinedError in pkg/tools/mcp/interactive.go (plus IsOAuthDeclined helper), distinct from the existing AuthorizationRequiredError so callers can distinguish a deliberate user dismissal from a non-interactive deferral. 2. Sticky decline state on oauthTransport (lastOAuthDeclined): - authorizeOnce sets it under oauthFlowMu before the deferred Unlock fires, so any queued goroutine observes the decline before it can fire a fresh flow. - authorizeOnce short-circuits to OAuthDeclinedError at the top when the flag is set, avoiding handleOAuthFlow entirely. The MCP SDK wraps transport errors with fmt.Errorf("%w: %v", ...), dropping the original error from the unwrap chain (same reason AuthorizationRequiredError uses the lastAuthRequired flag pattern). enrichConnectError reads the new oauthDeclined() accessor and reconstitutes a clean *OAuthDeclinedError sentinel for callers. Regression tests: - TestUnmanagedOAuthFlow_ElicitationDeclineReturnsSentinel: a declined elicitation surfaces as IsOAuthDeclined, carries the server URL, and does not hit the token endpoint. - TestUnmanagedOAuthFlow_ElicitationDeclineLatchesAgainstConcurrentRoundTrips: 4 concurrent RoundTrips produce exactly ONE elicitation; every queued caller observes the latched decline and short-circuits. --- pkg/tools/mcp/interactive.go | 36 ++++++++++++ pkg/tools/mcp/oauth.go | 81 ++++++++++++++++++++++++- pkg/tools/mcp/oauth_test.go | 111 +++++++++++++++++++++++++++++++++++ pkg/tools/mcp/remote.go | 6 ++ 4 files changed, 232 insertions(+), 2 deletions(-) diff --git a/pkg/tools/mcp/interactive.go b/pkg/tools/mcp/interactive.go index 4dd145c94..1578f0f38 100644 --- a/pkg/tools/mcp/interactive.go +++ b/pkg/tools/mcp/interactive.go @@ -58,3 +58,39 @@ func IsAuthorizationRequired(err error) bool { var target *AuthorizationRequiredError return errors.As(err, &target) } + +// OAuthDeclinedError is returned by the transport when the user explicitly +// declines or cancels an interactive OAuth authorization flow (e.g. by +// clicking "Cancel" on the host's Authentication Request dialog). +// +// It is intentionally distinct from AuthorizationRequiredError: the latter +// is a silent deferral pending an interactive context, while a decline is +// a deliberate user action. Callers MUST NOT immediately retry the OAuth +// flow on a decline — that would re-emit the dialog the user just +// dismissed, which is exactly the bug this sentinel exists to prevent. +// +// Typical handling is for the caller (e.g. the MCP catalog toolset) to +// remove the server from its active set so subsequent tool enumerations +// don't kick off a fresh OAuth flow. Re-enabling the server (e.g. via +// the catalog's enable meta-tool) is the natural way for the user to +// say "actually, please retry". +type OAuthDeclinedError struct { + URL string +} + +func (e *OAuthDeclinedError) Error() string { + if e.URL == "" { + return "OAuth authorization was declined or cancelled by the user" + } + return "OAuth authorization to " + e.URL + " was declined or cancelled by the user" +} + +// IsOAuthDeclined reports whether err (or any error wrapped by it) signals +// that the user explicitly declined or cancelled an interactive OAuth +// authorization flow. Callers use this to break the +// "Tools() -> Start() -> OAuth elicitation" retry loop so the dismissed +// dialog does not immediately re-appear on the next loop iteration. +func IsOAuthDeclined(err error) bool { + var target *OAuthDeclinedError + return errors.As(err, &target) +} diff --git a/pkg/tools/mcp/oauth.go b/pkg/tools/mcp/oauth.go index 0358e6115..8be665e8c 100644 --- a/pkg/tools/mcp/oauth.go +++ b/pkg/tools/mcp/oauth.go @@ -302,6 +302,15 @@ type oauthTransport struct { // instead of unwrapping to know that OAuth was deferred rather than // failed for some other reason. lastAuthRequired bool + // lastOAuthDeclined records when the user explicitly declined or + // cancelled an interactive OAuth flow (clicked "Cancel" on the host's + // Authentication Request dialog). Same rationale as lastAuthRequired: + // the MCP SDK wraps transport errors with %v before they reach our + // callers, so errors.As cannot reliably recover the underlying + // *OAuthDeclinedError. enrichConnectError reads this flag to + // reconstitute a clean sentinel for the catalog's retry-loop short + // circuit (see mcpcatalog.Toolset.Tools / disableAfterDecline). + lastOAuthDeclined bool } func (t *oauthTransport) RoundTrip(req *http.Request) (*http.Response, error) { @@ -323,7 +332,41 @@ func (t *oauthTransport) authorizeOnce(ctx context.Context, authServer, wwwAuth return nil } - return t.handleOAuthFlow(ctx, authServer, wwwAuth) + // Sticky decline: the MCP SDK's Connect() runs several + // initialize-stage RPCs concurrently. Each one that gets a 401 + // queues here on oauthFlowMu. Without this short-circuit, the + // goroutine that wins the mutex runs the full OAuth flow; when + // the user clicks Cancel, this goroutine returns OAuthDeclinedError + // and releases the mutex — at which point the NEXT queued + // goroutine sees no valid token and fires a fresh OAuth flow, + // re-popping the dialog the user just dismissed. Latching the + // decline state under the same mutex makes the queued callers + // see the prior decline before they can start a new flow. + t.mu.Lock() + declined := t.lastOAuthDeclined + t.mu.Unlock() + if declined { + slog.DebugContext(ctx, "OAuth flow short-circuited: user already declined on this transport", "url", t.baseURL) + return &OAuthDeclinedError{URL: t.baseURL} + } + + err := t.handleOAuthFlow(ctx, authServer, wwwAuth) + if err != nil { + // Latch the decline state BEFORE the deferred Unlock fires so + // any goroutine queued on oauthFlowMu observes it on its next + // iteration of the getValidToken / declined / handleOAuthFlow + // dance. Setting this in roundTrip (after we return) would + // race: the queued goroutine would acquire the mutex first + // and start a fresh flow while we are still bubbling the + // error up the stack. + var declinedErr *OAuthDeclinedError + if errors.As(err, &declinedErr) { + t.mu.Lock() + t.lastOAuthDeclined = true + t.mu.Unlock() + } + } + return err } func (t *oauthTransport) roundTrip(req *http.Request, isRetry bool) (*http.Response, error) { @@ -392,6 +435,24 @@ func (t *oauthTransport) roundTrip(req *http.Request, isRetry bool) (*http.Respo t.mu.Unlock() return nil, authErr } + // User-driven decline of the host's Authentication + // Request dialog. Flag it explicitly so callers can + // recover the sentinel through enrichConnectError — + // the SDK's transport-error wrapping (fmt.Errorf + // "%w: %v") otherwise destroys the unwrap chain. See + // remote.go enrichConnectError + the lastAuthRequired + // pattern this mirrors. + var declinedErr *OAuthDeclinedError + if errors.As(err, &declinedErr) { + slog.Debug("OAuth flow declined by user", "url", t.baseURL) + if declinedErr.URL == "" { + declinedErr.URL = t.baseURL + } + t.mu.Lock() + t.lastOAuthDeclined = true + t.mu.Unlock() + return nil, declinedErr + } return nil, fmt.Errorf("OAuth flow failed: %w", err) } @@ -498,6 +559,17 @@ func (t *oauthTransport) authorizationRequired() bool { return t.lastAuthRequired } +// oauthDeclined reports whether the user explicitly declined or cancelled +// the most recent interactive OAuth flow handled by this transport. +// Mirrors authorizationRequired: callers use this to recognise the +// user-cancelled case despite the MCP SDK's %v-wrapping destroying the +// underlying *OAuthDeclinedError sentinel in the unwrap chain. +func (t *oauthTransport) oauthDeclined() bool { + t.mu.Lock() + defer t.mu.Unlock() + return t.lastOAuthDeclined +} + // extractServerMessage extracts a short, human-readable message from a // server response body. It tries, in order: // @@ -1106,7 +1178,12 @@ func (t *oauthTransport) handleUnmanagedOAuthFlow(ctx context.Context, authServe } slog.DebugContext(ctx, "Received elicitation response from client", "action", er.result.Action) if er.result.Action != tools.ElicitationActionAccept { - return errors.New("OAuth flow declined or cancelled by client") + // Surface a recognisable sentinel so callers (notably the + // MCP catalog toolset) can distinguish "user dismissed the + // dialog" from a generic transport/server failure and skip + // the otherwise-infinite Tools() -> Start() -> elicitation + // retry that would re-pop the dialog the user just closed. + return &OAuthDeclinedError{URL: t.baseURL} } if er.result.Content == nil { return errors.New("no payload received from client") diff --git a/pkg/tools/mcp/oauth_test.go b/pkg/tools/mcp/oauth_test.go index 6663c1d10..ef410c54e 100644 --- a/pkg/tools/mcp/oauth_test.go +++ b/pkg/tools/mcp/oauth_test.go @@ -1406,6 +1406,117 @@ func TestUnmanagedOAuthFlow_DriveFlow_ExchangesCodeForToken(t *testing.T) { assert.Equal(t, srv.URL, tok.AuthServer) } +// TestUnmanagedOAuthFlow_ElicitationDeclineReturnsSentinel verifies that +// when the user dismisses the host's Authentication Request dialog +// (action=decline), the OAuth round-trip surfaces a recognisable +// *OAuthDeclinedError, NOT a generic error. +// +// This sentinel is the contract that lets the mcp catalog toolset break +// the "Tools() -> Start() -> elicitation" retry loop: a generic error +// looks like a transient failure and gets retried on the next agent +// loop iteration, which is exactly what would cause the dismissed +// dialog to re-appear immediately. +func TestUnmanagedOAuthFlow_ElicitationDeclineReturnsSentinel(t *testing.T) { + srv := newUnmanagedOAuthTestServer(t) + defer srv.Close() + + const redirectURI = "https://example.test/oauth/cb" + capture := &elicitCaptured{} + capture.replyFn = func(_ *gomcp.ElicitParams) tools.ElicitationResult { + return tools.ElicitationResult{Action: tools.ElicitationActionDecline} + } + transport, _ := newUnmanagedTestTransport(t, srv.URL, redirectURI, capture) + + req, err := http.NewRequestWithContext(t.Context(), http.MethodPost, srv.URL, strings.NewReader("{}")) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + resp, rtErr := transport.RoundTrip(req) + if resp != nil { + _ = resp.Body.Close() + } + require.Error(t, rtErr) + assert.True(t, IsOAuthDeclined(rtErr), + "declined elicitation must surface as IsOAuthDeclined so the catalog can short-circuit the retry loop; got: %v", rtErr) + + var declined *OAuthDeclinedError + require.ErrorAs(t, rtErr, &declined) + assert.Equal(t, srv.URL, declined.URL, + "the sentinel must carry the server URL so callers can correlate it back to a catalog entry") + + assert.Equal(t, int32(0), srv.tokenCalls.Load(), + "token endpoint must NOT be hit when the user declined the authorization") +} + +// TestUnmanagedOAuthFlow_ElicitationDeclineLatchesAgainstConcurrentRoundTrips +// is the regression test for the "Cancel re-pops the dialog" bug. The MCP +// SDK's Connect() fires several initialize-stage RPCs in parallel; each +// gets a 401 and queues on oauthFlowMu. Without the sticky-decline latch +// inside authorizeOnce, the FIRST queued goroutine returns +// OAuthDeclinedError on user cancel and releases the mutex; the NEXT +// queued goroutine then sees no valid token and starts a fresh OAuth +// flow, re-emitting the elicitation the user just dismissed. +// +// This test fires N concurrent RoundTrips. The elicitation handler +// declines on the first invocation. The expected behaviour: exactly +// ONE elicitation is sent (no re-pop), and every concurrent roundtrip +// returns OAuthDeclinedError. +func TestUnmanagedOAuthFlow_ElicitationDeclineLatchesAgainstConcurrentRoundTrips(t *testing.T) { + srv := newUnmanagedOAuthTestServer(t) + defer srv.Close() + + const redirectURI = "https://example.test/oauth/cb" + const concurrentRoundTrips = 4 + + var elicitationCount atomic.Int32 + capture := &elicitCaptured{} + capture.replyFn = func(_ *gomcp.ElicitParams) tools.ElicitationResult { + elicitationCount.Add(1) + return tools.ElicitationResult{Action: tools.ElicitationActionDecline} + } + transport, _ := newUnmanagedTestTransport(t, srv.URL, redirectURI, capture) + + type rtOut struct { + resp *http.Response + err error + } + results := make(chan rtOut, concurrentRoundTrips) + for range concurrentRoundTrips { + go func() { + req, err := http.NewRequestWithContext(t.Context(), http.MethodPost, srv.URL, strings.NewReader("{}")) + if err != nil { + results <- rtOut{nil, err} + return + } + req.Header.Set("Content-Type", "application/json") + resp, err := transport.RoundTrip(req) + // Close the body in the goroutine that owns the + // response (bodyclose linter cannot see the receiver + // side closing it through a channel send). + if resp != nil { + _ = resp.Body.Close() + } + results <- rtOut{resp, err} + }() + } + + for range concurrentRoundTrips { + select { + case out := <-results: + require.Error(t, out.err) + assert.True(t, IsOAuthDeclined(out.err), + "every concurrent roundtrip must surface IsOAuthDeclined after the user declined; got: %v", out.err) + case <-time.After(10 * time.Second): + t.Fatal("timeout waiting for concurrent OAuth round-trips to complete") + } + } + + assert.Equal(t, int32(1), elicitationCount.Load(), + "exactly one elicitation must be sent: subsequent queued roundtrips must observe the latched decline state and short-circuit before opening a new dialog") + assert.Equal(t, int32(0), srv.tokenCalls.Load(), + "token endpoint must NOT be hit when the user declined") +} + // TestUnmanagedOAuthFlow_DriveFlow_RejectsStateMismatch verifies the CSRF // check: if the client returns a `state` value that doesn't match what // docker-agent generated and embedded in the authorize URL, the flow diff --git a/pkg/tools/mcp/remote.go b/pkg/tools/mcp/remote.go index 3304d0dc1..f4c5d7f54 100644 --- a/pkg/tools/mcp/remote.go +++ b/pkg/tools/mcp/remote.go @@ -127,6 +127,12 @@ func (c *remoteMCPClient) Initialize(ctx context.Context, _ *gomcp.InitializeReq // // Pre: err != nil and t != nil; only called from the Connect failure path. func enrichConnectError(err error, t *oauthTransport) error { + // Order matters: a decline implies the interactive OAuth flow + // actually ran, so lastOAuthDeclined wins over lastAuthRequired in + // the (in practice impossible) case that both flags are set. + if t.oauthDeclined() { + return &OAuthDeclinedError{URL: t.baseURL} + } if t.authorizationRequired() { return &AuthorizationRequiredError{URL: t.baseURL} } From 95a26196279cd8496dd2c6aa4dca84f0558093c2 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 1 Jun 2026 16:21:20 +0200 Subject: [PATCH 2/2] fix(mcpcatalog): remove server on OAuth decline and stop lying to the model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the transport-layer latch lands, the user-declined OAuth surfaces as an *OAuthDeclinedError out of Toolset.Start. Two follow-up issues remained: 1. The catalogs Tools() retried Start() on the same enabled wrapper on every agent loop iteration (mcp.IsAuthorizationRequired was the only short-circuit), so even with the dialog gone the catalog kept reporting "needs auth" forever and the meta-tools never re-collapsed. 2. handleEnable returned an unconditional "enabled X — proceed with the users original request; the host handles any required connection setup", added in 438058b82 to stop the model from stalling on YAML OAuth servers. With OAuth servers from the catalog the message is a flat promise the runtime cannot keep when the user dismisses the dialog: the model parrots it back ("On my next turn Ill have access to Notion tools") and either hallucinates results or asks for workspace details to call tools that no longer exist. Changes: - Tools(): on mcp.IsOAuthDeclined(err), call disableAfterDecline. disableAfterDecline removes the server from t.enabled, stops the inner toolset (so any partially-initialised session is cleaned up), and fires toolsChangedHandler exactly once — only when WE removed the entry, so a concurrent handleDisable / handleResetAuth / re-enable race does not double-notify. - handleEnable wording is now conditional and instructive instead of promissory: it tells the model to verify _ tools are present on the next turn, and gives explicit guidance for the dismissed-dialog branch (do not claim connected, do not hallucinate tools, tell the user honestly; on re-ask treat it as implicit retry and call enable_remote_mcp_server again). Regression tests: - TestToolsOAuthDeclineRemovesServer: Start() returning OAuthDeclinedError removes the server from t.enabled, stops the inner toolset, fires tools-changed exactly once, and a second Tools() call does NOT call Start() again — this is the property that broke the "dialog re-appears every loop iteration" loop. - TestToolsOAuthDeclineNoNotifyWhenAlreadyDisabled: when the entry has been superseded by a fresh enable between the failing Start() and the cleanup, disableAfterDecline still stops the stale wrapper but does NOT evict the new entry or double-notify. - Existing TestEnableDisableLifecycle / TestEnableAPIKeyEnvPresent updated for the new wording (asserts the self-check + dismissed-dialog branch are surfaced to the model). --- pkg/tools/builtin/mcpcatalog/mcpcatalog.go | 61 +++++- .../builtin/mcpcatalog/mcpcatalog_test.go | 184 +++++++++++++++++- 2 files changed, 235 insertions(+), 10 deletions(-) diff --git a/pkg/tools/builtin/mcpcatalog/mcpcatalog.go b/pkg/tools/builtin/mcpcatalog/mcpcatalog.go index 9e66b1656..13d51aa7d 100644 --- a/pkg/tools/builtin/mcpcatalog/mcpcatalog.go +++ b/pkg/tools/builtin/mcpcatalog/mcpcatalog.go @@ -480,6 +480,14 @@ func (t *Toolset) Tools(ctx context.Context) ([]tools.Tool, error) { if !e.ts.IsStarted() { if err := e.ts.Start(ctx); err != nil { + // Diagnostic breadcrumb so the deferred-vs-declined + // classification is visible in --debug logs without + // having to instrument the OAuth transport itself. + slog.DebugContext(ctx, "Enabled remote MCP server failed to start", + "id", e.id, + "auth_required", mcp.IsAuthorizationRequired(err), + "oauth_declined", mcp.IsOAuthDeclined(err), + "error", err) // Auth-required is an *expected* deferral when probing // with a non-interactive context (startup tool count) or // when the elicitation bridge is not yet ready. Silent @@ -490,6 +498,18 @@ func (t *Toolset) Tools(ctx context.Context) ([]tools.Tool, error) { "id", e.id) continue } + // User explicitly dismissed the host's authorization + // dialog (clicked "Cancel" / "Decline"). Treat that as + // "deactivate this server" so the very next Tools() + // call does not re-fire the OAuth flow and re-pop the + // dialog the user just closed. The model can re-enable + // the server explicitly if the user changes their mind. + if mcp.IsOAuthDeclined(err) { + slog.InfoContext(ctx, "Remote MCP server OAuth declined by user; removing from enabled set", + "id", e.id) + t.disableAfterDecline(ctx, e.id, e.ts) + continue + } // Real failure: log once per streak (StartableToolSet // dedupes) so a misbehaving server doesn't flood logs. if e.ts.ShouldReportFailure() { @@ -703,7 +723,9 @@ func (t *Toolset) handleEnable(ctx context.Context, args EnableArgs) (*tools.Too } msg := strings.Builder{} - fmt.Fprintf(&msg, "enabled %q (%s) — the server's tools will appear on your next turn. Proceed with the user's original request; the host handles any required connection setup.\n", id, server.Title) + fmt.Fprintf(&msg, "enable requested for %q (%s). The host will surface any required authorization dialog. On your next turn:\n", id, server.Title) + fmt.Fprintf(&msg, " - if tools whose names start with %q appear in your available tools, proceed with the user's original request using them.\n", id+"_") + fmt.Fprintf(&msg, " - if NO such tools appear, the user dismissed the authorization dialog. Do NOT claim the server is connected and do NOT ask the user for workspace details to use \"the available tools\" (there are none). Tell the user the request needs them to authorize the connection. If the user then re-asks for the same thing (or says \"yes\", \"retry\", \"try again\", etc.), call %s for this id again to surface a fresh authorization dialog.\n", ToolNameEnable) fmt.Fprintf(&msg, "endpoint: %s\n", server.URL) // Only surface unresolved env vars — the model can act on those by // asking the user to set the variable and retrying. Every other auth @@ -796,6 +818,43 @@ type DisableArgs struct { ID string `json:"id" jsonschema:"Catalog id of the server to disable."` } +// disableAfterDecline removes a server from the enabled set after the user +// declined its OAuth flow, mirroring handleDisable's lifecycle invariants +// (delete from t.enabled, stop the inner toolset, notify the runtime that +// the tool surface changed) but without producing a model-facing tool +// result — the decline happens asynchronously inside Tools(), not in +// response to a model tool call. +// +// The guard (current == wrapped) protects against a concurrent +// handleDisable / handleResetAuth / handleEnable that already swapped or +// removed the entry between the failing Start() and this cleanup. +func (t *Toolset) disableAfterDecline(ctx context.Context, id string, wrapped *tools.StartableToolSet) { + t.mu.Lock() + current, stillEnabled := t.enabled[id] + if stillEnabled && current == wrapped { + delete(t.enabled, id) + } else { + // Already gone (or replaced by a fresh enable) — let whoever + // owns the live wrapper handle its lifecycle. We still want + // to stop OUR reference because Start() may have left the + // session in a partially-initialised state. + stillEnabled = false + } + notify := t.toolsChangedHandler + t.mu.Unlock() + + if err := wrapped.Stop(ctx); err != nil && !errors.Is(err, context.Canceled) { + slog.DebugContext(ctx, "Failed to stop declined-OAuth remote MCP toolset", + "id", id, "error", err) + } + + // Only notify when WE removed the entry; if a concurrent caller + // already mutated t.enabled, they will have notified themselves. + if stillEnabled && notify != nil { + notify() + } +} + func (t *Toolset) handleDisable(ctx context.Context, args DisableArgs) (*tools.ToolCallResult, error) { id := strings.TrimSpace(args.ID) diff --git a/pkg/tools/builtin/mcpcatalog/mcpcatalog_test.go b/pkg/tools/builtin/mcpcatalog/mcpcatalog_test.go index 355e31c0a..bbbe120d0 100644 --- a/pkg/tools/builtin/mcpcatalog/mcpcatalog_test.go +++ b/pkg/tools/builtin/mcpcatalog/mcpcatalog_test.go @@ -122,14 +122,20 @@ func TestEnableDisableLifecycle(t *testing.T) { res, err := ts.handleEnable(ctx, EnableArgs{ID: oauthID}) require.NoError(t, err) require.False(t, res.IsError, "enable failed: %s", res.Output) - assert.Contains(t, res.Output, "enabled") - // The OAuth-branch wording was intentionally removed: the model has - // no agency over the OAuth flow, so the tool result no longer mentions - // "OAuth" or "authorization" — the previous "elicited on the next - // turn" wording caused the model to stop and ask the user to repeat - // themselves. See handleEnable for the rationale. - assert.NotContains(t, res.Output, "OAuth", "tool result must not leak OAuth details to the model") - assert.NotContains(t, res.Output, "authorization", "tool result must not leak OAuth details to the model") + assert.Contains(t, res.Output, "enable requested", + "tool result must record the enable request so the model has something to ground its next turn on") + // The tool result MUST set up a self-check for the next turn: if no + // `_` tools appear, the user dismissed the authorization dialog + // and the model must NOT pretend the server is connected. This was + // the regression that motivated the wording change — the previous + // "the server's tools will appear on your next turn. Proceed with + // the user's original request" was a flat promise the runtime cannot + // actually keep when OAuth is required, and the model parroted it + // even after the user clicked Cancel. + assert.Contains(t, res.Output, oauthID+"_", + "tool result must reference the tool-name prefix so the model can verify whether the server actually came online") + assert.Contains(t, res.Output, "dismissed", + "tool result must explicitly cover the user-dismissed-the-dialog branch so the model does not hallucinate access") assert.Equal(t, int32(1), changes.Load(), "enable should fire tools-changed handler exactly once") ts.mu.RLock() @@ -395,7 +401,7 @@ func TestEnableAPIKeyEnvPresent(t *testing.T) { res, err := ts.handleEnable(t.Context(), EnableArgs{ID: apiKeyID}) require.NoError(t, err) require.False(t, res.IsError) - assert.Contains(t, res.Output, "enabled") + assert.Contains(t, res.Output, "enable requested") // With every required env var present, no WARNING line is emitted — // the tool result is intentionally terse so the model proceeds to the // user's original request rather than narrating setup. @@ -821,6 +827,166 @@ func TestDisableAndResetAuthGatedOnEnabledServers(t *testing.T) { assert.NotContains(t, names, ToolNameResetAuth, "reset_auth must be hidden again once no server is enabled") } +// declineOnStartToolSet is a minimal ToolSet+Startable whose Start returns +// the OAuthDeclinedError sentinel on every call. It exists so the catalog +// can be tested for "user dismissed the auth dialog -> server is removed +// from the enabled set, no further retries" without driving a real OAuth +// handshake. +type declineOnStartToolSet struct { + url string + startCalls atomic.Int32 + stopCalls atomic.Int32 +} + +func (d *declineOnStartToolSet) Tools(context.Context) ([]tools.Tool, error) { + // Tools() must never be reachable: Start() always errors, so + // StartableToolSet.IsStarted stays false and Tools() in the catalog + // short-circuits the entry on every iteration. + return nil, errors.New("declineOnStartToolSet.Tools should never be called") +} + +func (d *declineOnStartToolSet) Start(context.Context) error { + d.startCalls.Add(1) + return &mcptools.OAuthDeclinedError{URL: d.url} +} + +func (d *declineOnStartToolSet) Stop(context.Context) error { + d.stopCalls.Add(1) + return nil +} + +// TestToolsOAuthDeclineRemovesServer is the regression test for the +// "Cancel doesn't dismiss the Authentication Request" bug: a Start() +// returning OAuthDeclinedError must cause the catalog to (a) drop the +// server from t.enabled, (b) Stop the inner toolset, (c) notify the +// runtime that the tool surface changed, and (d) not call Start() a +// second time on the next Tools() iteration — which is what previously +// caused the dialog to re-appear in the host on every agent loop turn. +func TestToolsOAuthDeclineRemovesServer(t *testing.T) { + ts := New(stubEnv{vars: map[string]string{}}) + const id = "decline-server" + server := Server{ + ID: id, + Title: "Decline", + URL: "https://example.test/mcp", + Transport: "streamable-http", + Auth: Auth{Type: "oauth"}, + } + ts.catalog.Servers = append(ts.catalog.Servers, server) + ts.byID[id] = server + + fake := &declineOnStartToolSet{url: server.URL} + wrapped := tools.NewStartable(fake) + + var changes atomic.Int32 + ts.SetToolsChangedHandler(func() { changes.Add(1) }) + + // Inject directly — handleEnable would build a real *mcp.Toolset + // that we cannot easily steer into a decline without a full OAuth + // fake server. + ts.mu.Lock() + ts.enabled[id] = wrapped + ts.mu.Unlock() + + ctx := t.Context() + + // First Tools() call: Start() returns OAuthDeclinedError. The + // catalog must swallow it cleanly (no error to the runtime) and + // remove the server. Meta-tool gating (disable / reset_auth) is + // computed from a snapshot taken BEFORE we iterate, so those tools + // are still expected on this first call; we assert the gating + // behaviour on the second call below where the mutation has + // already taken effect. + list, err := ts.Tools(ctx) + require.NoError(t, err, "Tools() must not propagate a user decline as an error") + + names := toolNames(list) + for _, meta := range []string{ToolNameSearch, ToolNameList, ToolNameEnable} { + assert.Contains(t, names, meta, "meta tools must still be present after a decline") + } + + ts.mu.RLock() + _, stillEnabled := ts.enabled[id] + ts.mu.RUnlock() + assert.False(t, stillEnabled, + "declined server must be removed from t.enabled so the next Tools() call does not re-trigger OAuth") + + assert.Equal(t, int32(1), fake.startCalls.Load(), + "Start() must be called exactly once before the decline removes the entry") + assert.Equal(t, int32(1), fake.stopCalls.Load(), + "the declined toolset must be Stop()'d so any partially-initialised session is cleaned up") + assert.Equal(t, int32(1), changes.Load(), + "tools-changed must fire so the runtime / UI sees the server is no longer enabled") + + // Second Tools() call: the entry is gone, so the fake must not be + // touched again. This is the property that breaks the + // "dialog re-appears on every loop iteration" loop. + list, err = ts.Tools(ctx) + require.NoError(t, err) + assert.Equal(t, int32(1), fake.startCalls.Load(), + "Start() must NOT be called again after the decline: the server is no longer enabled") + assert.Equal(t, int32(1), changes.Load(), + "tools-changed must fire exactly once for a single decline") + + names = toolNames(list) + assert.NotContains(t, names, ToolNameDisable, + "disable must be hidden once the declined server has been removed from the enabled set") + assert.NotContains(t, names, ToolNameResetAuth, + "reset_auth must be hidden once the declined server has been removed from the enabled set") +} + +// TestToolsOAuthDeclineNoNotifyWhenAlreadyDisabled covers the race where +// the model called disable_remote_mcp_server (or reset_remote_mcp_server_auth) +// between the OAuth flow being initiated and the user declining it. In that +// case the entry has already been removed and notify() has already fired; +// disableAfterDecline must not double-notify. +func TestToolsOAuthDeclineNoNotifyWhenAlreadyDisabled(t *testing.T) { + ts := New(stubEnv{vars: map[string]string{}}) + const id = "decline-server-concurrent" + server := Server{ + ID: id, + Title: "Decline", + URL: "https://example.test/mcp", + Transport: "streamable-http", + Auth: Auth{Type: "oauth"}, + } + ts.catalog.Servers = append(ts.catalog.Servers, server) + ts.byID[id] = server + + fake := &declineOnStartToolSet{url: server.URL} + wrapped := tools.NewStartable(fake) + + var changes atomic.Int32 + ts.SetToolsChangedHandler(func() { changes.Add(1) }) + + // Simulate "fresh enable replaced our entry": Tools() captures the + // snapshot under RLock, releases it, then we mutate t.enabled. When + // Start() fails on the captured wrapper, disableAfterDecline must + // notice current != wrapped and skip the notify. + ts.mu.Lock() + ts.enabled[id] = wrapped + ts.mu.Unlock() + + // Take a snapshot the same way Tools() does, then swap the entry. + ts.mu.Lock() + supersede := tools.NewStartable(&declineOnStartToolSet{url: server.URL}) + ts.enabled[id] = supersede + ts.mu.Unlock() + + ts.disableAfterDecline(t.Context(), id, wrapped) + + ts.mu.RLock() + current, stillEnabled := ts.enabled[id] + ts.mu.RUnlock() + require.True(t, stillEnabled, "the superseding entry must remain enabled") + require.Same(t, supersede, current, "the superseding entry must not be evicted by a stale decline cleanup") + + assert.Equal(t, int32(1), fake.stopCalls.Load(), + "the stale wrapper must still be Stop()'d to clean its partially-initialised session") + assert.Zero(t, changes.Load(), + "no notification must fire when the entry has already been replaced — the replacing call notified") +} + func mustTools(t *testing.T, ctx context.Context, ts *Toolset) []tools.Tool { t.Helper() list, err := ts.Tools(ctx)