Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion pkg/tools/builtin/mcpcatalog/mcpcatalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,14 @@ func (t *Toolset) Tools(ctx context.Context) ([]tools.Tool, error) {

if !e.ts.IsStarted() {
if err := e.ts.Start(ctx); err != nil {
// Diagnostic breadcrumb so the deferred-vs-declined
// classification is visible in --debug logs without
// having to instrument the OAuth transport itself.
slog.DebugContext(ctx, "Enabled remote MCP server failed to start",
"id", e.id,
"auth_required", mcp.IsAuthorizationRequired(err),
"oauth_declined", mcp.IsOAuthDeclined(err),
"error", err)
// Auth-required is an *expected* deferral when probing
// with a non-interactive context (startup tool count) or
// when the elicitation bridge is not yet ready. Silent
Expand All @@ -490,6 +498,18 @@ func (t *Toolset) Tools(ctx context.Context) ([]tools.Tool, error) {
"id", e.id)
continue
}
// User explicitly dismissed the host's authorization
// dialog (clicked "Cancel" / "Decline"). Treat that as
// "deactivate this server" so the very next Tools()
// call does not re-fire the OAuth flow and re-pop the
// dialog the user just closed. The model can re-enable
// the server explicitly if the user changes their mind.
if mcp.IsOAuthDeclined(err) {
slog.InfoContext(ctx, "Remote MCP server OAuth declined by user; removing from enabled set",
"id", e.id)
t.disableAfterDecline(ctx, e.id, e.ts)
continue
}
// Real failure: log once per streak (StartableToolSet
// dedupes) so a misbehaving server doesn't flood logs.
if e.ts.ShouldReportFailure() {
Expand Down Expand Up @@ -703,7 +723,9 @@ func (t *Toolset) handleEnable(ctx context.Context, args EnableArgs) (*tools.Too
}

msg := strings.Builder{}
fmt.Fprintf(&msg, "enabled %q (%s) — the server's tools will appear on your next turn. Proceed with the user's original request; the host handles any required connection setup.\n", id, server.Title)
fmt.Fprintf(&msg, "enable requested for %q (%s). The host will surface any required authorization dialog. On your next turn:\n", id, server.Title)
fmt.Fprintf(&msg, " - if tools whose names start with %q appear in your available tools, proceed with the user's original request using them.\n", id+"_")
fmt.Fprintf(&msg, " - if NO such tools appear, the user dismissed the authorization dialog. Do NOT claim the server is connected and do NOT ask the user for workspace details to use \"the available tools\" (there are none). Tell the user the request needs them to authorize the connection. If the user then re-asks for the same thing (or says \"yes\", \"retry\", \"try again\", etc.), call %s for this id again to surface a fresh authorization dialog.\n", ToolNameEnable)
fmt.Fprintf(&msg, "endpoint: %s\n", server.URL)
// Only surface unresolved env vars — the model can act on those by
// asking the user to set the variable and retrying. Every other auth
Expand Down Expand Up @@ -796,6 +818,43 @@ type DisableArgs struct {
ID string `json:"id" jsonschema:"Catalog id of the server to disable."`
}

// disableAfterDecline removes a server from the enabled set after the user
// declined its OAuth flow, mirroring handleDisable's lifecycle invariants
// (delete from t.enabled, stop the inner toolset, notify the runtime that
// the tool surface changed) but without producing a model-facing tool
// result — the decline happens asynchronously inside Tools(), not in
// response to a model tool call.
//
// The guard (current == wrapped) protects against a concurrent
// handleDisable / handleResetAuth / handleEnable that already swapped or
// removed the entry between the failing Start() and this cleanup.
func (t *Toolset) disableAfterDecline(ctx context.Context, id string, wrapped *tools.StartableToolSet) {
t.mu.Lock()
current, stillEnabled := t.enabled[id]
if stillEnabled && current == wrapped {
delete(t.enabled, id)
} else {
// Already gone (or replaced by a fresh enable) — let whoever
// owns the live wrapper handle its lifecycle. We still want
// to stop OUR reference because Start() may have left the
// session in a partially-initialised state.
stillEnabled = false
}
notify := t.toolsChangedHandler
t.mu.Unlock()

if err := wrapped.Stop(ctx); err != nil && !errors.Is(err, context.Canceled) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] disableAfterDecline suppresses context.Canceled but not context.DeadlineExceeded from Stop

The error filter in disableAfterDecline:

if err := wrapped.Stop(ctx); err != nil && !errors.Is(err, context.Canceled) {
    slog.DebugContext(ctx, "Failed to stop declined-OAuth remote MCP toolset", ...)
}

only swallows context.Canceled. If the context has a deadline that expires while Stop is running, context.DeadlineExceeded produces a spurious debug log. The same pattern appears in handleDisable (line 871) and handleResetAuth (line 949) — for consistency consider using errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) (or a shared helper).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentionally left alone for consistency.

The error filter mirrors the existing pattern already used by handleDisable (around line 871) and handleResetAuth (around line 949). Changing disableAfterDecline in isolation to also swallow context.DeadlineExceeded would introduce a behavioural asymmetry between the three callers that all share the same "Stop on the way out, do not yell on caller-induced cancellation" intent.

If the broader pattern should accept DeadlineExceeded too, that is a focused follow-up touching all three callsites (and arguably a shared helper), not an asymmetric tweak in this PR.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're absolutely right — I should have framed this as a consistency issue across all three sites rather than singling out disableAfterDecline. Making the change in isolation would indeed create an asymmetry, and your reasoning about "caller-induced cancellation" applying equally to both Canceled and DeadlineExceeded makes sense.

A follow-up PR that introduces a small helper (e.g., isContextDone(err) bool) and updates all three callsites would be cleaner. That said, if the current pattern hasn't caused issues in practice, it's a low-priority polish item rather than a bug. Thanks for the context!

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] disableAfterDecline suppresses context.Canceled but not context.DeadlineExceeded from Stop

The error filter in disableAfterDecline:

if err := wrapped.Stop(ctx); err != nil && !errors.Is(err, context.Canceled) {
    slog.DebugContext(ctx, "Failed to stop declined-OAuth remote MCP toolset", ...)
}

only swallows context.Canceled. If the context has a deadline that expires while Stop is running, context.DeadlineExceeded produces a spurious debug log. The same pattern appears in handleDisable (line 871) and handleResetAuth (line 949) — for consistency consider using errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) (or a shared helper).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See previous reply — duplicate of the same observation.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the pointer! I see you've already addressed this in the other thread — agreed that changing all three callsites together (ideally with a shared helper) makes more sense than creating asymmetry here. Closing this duplicate.

slog.DebugContext(ctx, "Failed to stop declined-OAuth remote MCP toolset",
"id", id, "error", err)
}

// Only notify when WE removed the entry; if a concurrent caller
// already mutated t.enabled, they will have notified themselves.
if stillEnabled && notify != nil {
notify()
}
}

func (t *Toolset) handleDisable(ctx context.Context, args DisableArgs) (*tools.ToolCallResult, error) {
id := strings.TrimSpace(args.ID)

Expand Down
184 changes: 175 additions & 9 deletions pkg/tools/builtin/mcpcatalog/mcpcatalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,20 @@ func TestEnableDisableLifecycle(t *testing.T) {
res, err := ts.handleEnable(ctx, EnableArgs{ID: oauthID})
require.NoError(t, err)
require.False(t, res.IsError, "enable failed: %s", res.Output)
assert.Contains(t, res.Output, "enabled")
// The OAuth-branch wording was intentionally removed: the model has
// no agency over the OAuth flow, so the tool result no longer mentions
// "OAuth" or "authorization" — the previous "elicited on the next
// turn" wording caused the model to stop and ask the user to repeat
// themselves. See handleEnable for the rationale.
assert.NotContains(t, res.Output, "OAuth", "tool result must not leak OAuth details to the model")
assert.NotContains(t, res.Output, "authorization", "tool result must not leak OAuth details to the model")
assert.Contains(t, res.Output, "enable requested",
"tool result must record the enable request so the model has something to ground its next turn on")
// The tool result MUST set up a self-check for the next turn: if no
// `<id>_` tools appear, the user dismissed the authorization dialog
// and the model must NOT pretend the server is connected. This was
// the regression that motivated the wording change — the previous
// "the server's tools will appear on your next turn. Proceed with
// the user's original request" was a flat promise the runtime cannot
// actually keep when OAuth is required, and the model parroted it
// even after the user clicked Cancel.
assert.Contains(t, res.Output, oauthID+"_",
"tool result must reference the tool-name prefix so the model can verify whether the server actually came online")
assert.Contains(t, res.Output, "dismissed",
"tool result must explicitly cover the user-dismissed-the-dialog branch so the model does not hallucinate access")
assert.Equal(t, int32(1), changes.Load(), "enable should fire tools-changed handler exactly once")

ts.mu.RLock()
Expand Down Expand Up @@ -395,7 +401,7 @@ func TestEnableAPIKeyEnvPresent(t *testing.T) {
res, err := ts.handleEnable(t.Context(), EnableArgs{ID: apiKeyID})
require.NoError(t, err)
require.False(t, res.IsError)
assert.Contains(t, res.Output, "enabled")
assert.Contains(t, res.Output, "enable requested")
// With every required env var present, no WARNING line is emitted —
// the tool result is intentionally terse so the model proceeds to the
// user's original request rather than narrating setup.
Expand Down Expand Up @@ -821,6 +827,166 @@ func TestDisableAndResetAuthGatedOnEnabledServers(t *testing.T) {
assert.NotContains(t, names, ToolNameResetAuth, "reset_auth must be hidden again once no server is enabled")
}

// declineOnStartToolSet is a minimal ToolSet+Startable whose Start returns
// the OAuthDeclinedError sentinel on every call. It exists so the catalog
// can be tested for "user dismissed the auth dialog -> server is removed
// from the enabled set, no further retries" without driving a real OAuth
// handshake.
type declineOnStartToolSet struct {
url string
startCalls atomic.Int32
stopCalls atomic.Int32
}

func (d *declineOnStartToolSet) Tools(context.Context) ([]tools.Tool, error) {
// Tools() must never be reachable: Start() always errors, so
// StartableToolSet.IsStarted stays false and Tools() in the catalog
// short-circuits the entry on every iteration.
return nil, errors.New("declineOnStartToolSet.Tools should never be called")
}

func (d *declineOnStartToolSet) Start(context.Context) error {
d.startCalls.Add(1)
return &mcptools.OAuthDeclinedError{URL: d.url}
}

func (d *declineOnStartToolSet) Stop(context.Context) error {
d.stopCalls.Add(1)
return nil
}

// TestToolsOAuthDeclineRemovesServer is the regression test for the
// "Cancel doesn't dismiss the Authentication Request" bug: a Start()
// returning OAuthDeclinedError must cause the catalog to (a) drop the
// server from t.enabled, (b) Stop the inner toolset, (c) notify the
// runtime that the tool surface changed, and (d) not call Start() a
// second time on the next Tools() iteration — which is what previously
// caused the dialog to re-appear in the host on every agent loop turn.
func TestToolsOAuthDeclineRemovesServer(t *testing.T) {
ts := New(stubEnv{vars: map[string]string{}})
const id = "decline-server"
server := Server{
ID: id,
Title: "Decline",
URL: "https://example.test/mcp",
Transport: "streamable-http",
Auth: Auth{Type: "oauth"},
}
ts.catalog.Servers = append(ts.catalog.Servers, server)
ts.byID[id] = server

fake := &declineOnStartToolSet{url: server.URL}
wrapped := tools.NewStartable(fake)

var changes atomic.Int32
ts.SetToolsChangedHandler(func() { changes.Add(1) })

// Inject directly — handleEnable would build a real *mcp.Toolset
// that we cannot easily steer into a decline without a full OAuth
// fake server.
ts.mu.Lock()
ts.enabled[id] = wrapped
ts.mu.Unlock()

ctx := t.Context()

// First Tools() call: Start() returns OAuthDeclinedError. The
// catalog must swallow it cleanly (no error to the runtime) and
// remove the server. Meta-tool gating (disable / reset_auth) is
// computed from a snapshot taken BEFORE we iterate, so those tools
// are still expected on this first call; we assert the gating
// behaviour on the second call below where the mutation has
// already taken effect.
list, err := ts.Tools(ctx)
require.NoError(t, err, "Tools() must not propagate a user decline as an error")

names := toolNames(list)
for _, meta := range []string{ToolNameSearch, ToolNameList, ToolNameEnable} {
assert.Contains(t, names, meta, "meta tools must still be present after a decline")
}

ts.mu.RLock()
_, stillEnabled := ts.enabled[id]
ts.mu.RUnlock()
assert.False(t, stillEnabled,
"declined server must be removed from t.enabled so the next Tools() call does not re-trigger OAuth")

assert.Equal(t, int32(1), fake.startCalls.Load(),
"Start() must be called exactly once before the decline removes the entry")
assert.Equal(t, int32(1), fake.stopCalls.Load(),
"the declined toolset must be Stop()'d so any partially-initialised session is cleaned up")
assert.Equal(t, int32(1), changes.Load(),
"tools-changed must fire so the runtime / UI sees the server is no longer enabled")

// Second Tools() call: the entry is gone, so the fake must not be
// touched again. This is the property that breaks the
// "dialog re-appears on every loop iteration" loop.
list, err = ts.Tools(ctx)
require.NoError(t, err)
assert.Equal(t, int32(1), fake.startCalls.Load(),
"Start() must NOT be called again after the decline: the server is no longer enabled")
assert.Equal(t, int32(1), changes.Load(),
"tools-changed must fire exactly once for a single decline")

names = toolNames(list)
assert.NotContains(t, names, ToolNameDisable,
"disable must be hidden once the declined server has been removed from the enabled set")
assert.NotContains(t, names, ToolNameResetAuth,
"reset_auth must be hidden once the declined server has been removed from the enabled set")
}

// TestToolsOAuthDeclineNoNotifyWhenAlreadyDisabled covers the race where
// the model called disable_remote_mcp_server (or reset_remote_mcp_server_auth)
// between the OAuth flow being initiated and the user declining it. In that
// case the entry has already been removed and notify() has already fired;
// disableAfterDecline must not double-notify.
func TestToolsOAuthDeclineNoNotifyWhenAlreadyDisabled(t *testing.T) {
ts := New(stubEnv{vars: map[string]string{}})
const id = "decline-server-concurrent"
server := Server{
ID: id,
Title: "Decline",
URL: "https://example.test/mcp",
Transport: "streamable-http",
Auth: Auth{Type: "oauth"},
}
ts.catalog.Servers = append(ts.catalog.Servers, server)
ts.byID[id] = server

fake := &declineOnStartToolSet{url: server.URL}
wrapped := tools.NewStartable(fake)

var changes atomic.Int32
ts.SetToolsChangedHandler(func() { changes.Add(1) })

// Simulate "fresh enable replaced our entry": Tools() captures the
// snapshot under RLock, releases it, then we mutate t.enabled. When
// Start() fails on the captured wrapper, disableAfterDecline must
// notice current != wrapped and skip the notify.
ts.mu.Lock()
ts.enabled[id] = wrapped
ts.mu.Unlock()

// Take a snapshot the same way Tools() does, then swap the entry.
ts.mu.Lock()
supersede := tools.NewStartable(&declineOnStartToolSet{url: server.URL})
ts.enabled[id] = supersede
ts.mu.Unlock()

ts.disableAfterDecline(t.Context(), id, wrapped)

ts.mu.RLock()
current, stillEnabled := ts.enabled[id]
ts.mu.RUnlock()
require.True(t, stillEnabled, "the superseding entry must remain enabled")
require.Same(t, supersede, current, "the superseding entry must not be evicted by a stale decline cleanup")

assert.Equal(t, int32(1), fake.stopCalls.Load(),
"the stale wrapper must still be Stop()'d to clean its partially-initialised session")
assert.Zero(t, changes.Load(),
"no notification must fire when the entry has already been replaced — the replacing call notified")
}

func mustTools(t *testing.T, ctx context.Context, ts *Toolset) []tools.Tool {
t.Helper()
list, err := ts.Tools(ctx)
Expand Down
36 changes: 36 additions & 0 deletions pkg/tools/mcp/interactive.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,39 @@ func IsAuthorizationRequired(err error) bool {
var target *AuthorizationRequiredError
return errors.As(err, &target)
}

// OAuthDeclinedError is returned by the transport when the user explicitly
// declines or cancels an interactive OAuth authorization flow (e.g. by
// clicking "Cancel" on the host's Authentication Request dialog).
//
// It is intentionally distinct from AuthorizationRequiredError: the latter
// is a silent deferral pending an interactive context, while a decline is
// a deliberate user action. Callers MUST NOT immediately retry the OAuth
// flow on a decline — that would re-emit the dialog the user just
// dismissed, which is exactly the bug this sentinel exists to prevent.
//
// Typical handling is for the caller (e.g. the MCP catalog toolset) to
// remove the server from its active set so subsequent tool enumerations
// don't kick off a fresh OAuth flow. Re-enabling the server (e.g. via
// the catalog's enable meta-tool) is the natural way for the user to
// say "actually, please retry".
type OAuthDeclinedError struct {
URL string
}

func (e *OAuthDeclinedError) Error() string {
if e.URL == "" {
return "OAuth authorization was declined or cancelled by the user"
}
return "OAuth authorization to " + e.URL + " was declined or cancelled by the user"
}

// IsOAuthDeclined reports whether err (or any error wrapped by it) signals
// that the user explicitly declined or cancelled an interactive OAuth
// authorization flow. Callers use this to break the
// "Tools() -> Start() -> OAuth elicitation" retry loop so the dismissed
// dialog does not immediately re-appear on the next loop iteration.
func IsOAuthDeclined(err error) bool {
var target *OAuthDeclinedError
return errors.As(err, &target)
}
Loading
Loading