Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions e2e/tui/chat_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Package tui_test contains end-to-end tests for the docker-agent terminal UI.
//
// These tests drive the real top-level TUI model through the tuitest harness
// (pkg/tui/tuitest) against a replaying VCR proxy, so a whole user journey —
// type a prompt, submit it, watch the agent stream its answer — runs offline
// and deterministically. They are the regression net for the finished
// product: a visual or behavioral change in a covered screen surfaces as a
// failed matcher or a golden diff.
package tui_test

import (
"testing"
"time"

tea "charm.land/bubbletea/v2"

"github.com/docker/docker-agent/pkg/tui"
"github.com/docker/docker-agent/pkg/tui/tuitest"
"github.com/docker/docker-agent/pkg/tui/types"
)

// TestChat_BasicMath types a question, submits it, and waits for the agent's
// streamed answer to appear in the transcript. The agent's response is
// replayed from testdata/cassettes/TestChat_BasicMath.yaml.
func TestChat_BasicMath(t *testing.T) {
d := newTUI(t, "testdata/basic.yaml", 120, 40)

d.Type("What's 2+2?").
Enter().
WaitFor(tuitest.Contains("What's 2+2?")). // the user's message echoes in the transcript
WaitFor(tuitest.Contains("2 + 2 equals 4."))
}

// TestChat_PromptIsEditable proves typed input shows up in the editor before
// submission and moves into the transcript after sending — a basic but
// easy-to-regress piece of the finished product.
func TestChat_PromptIsEditable(t *testing.T) {
d := newTUI(t, "testdata/basic.yaml", 120, 40)

d.Type("What's 2+2?").
WaitFor(tuitest.Contains("What's 2+2?"))

// After submitting, the draft is sent and the agent's reply streams in.
d.Enter().
WaitFor(tuitest.Contains("2 + 2 equals 4."))
}

func TestChat_CopyAssistantMessageToClipboard(t *testing.T) {
d := newTUI(t, "testdata/basic.yaml", 120, 40, tui.WithHideSidebar())

d.Type("What's 2+2?").
Enter().
WaitFor(tuitest.Contains("2 + 2 equals 4."))

d.MoveMouseToText("2 + 2 equals 4.").
WaitFor(tuitest.Contains(types.AssistantMessageCopyLabel)).
ClickText(types.AssistantMessageCopyLabel).
WaitForClipboard("2 + 2 equals 4.")
}

// TestCommandPalette_Opens exercises a pure-UI interaction that needs no agent
// response: Ctrl+K opens the command palette overlay, Esc closes it. It runs
// against an empty cassette since no LLM call is made.
func TestCommandPalette_Opens(t *testing.T) {
d := newTUI(t, "testdata/basic.yaml", 120, 40)

// Wait for the UI to finish its initial render before interacting.
d.WaitFor(tuitest.Not(tuitest.Contains("Loading")))

// The palette shows a distinctive search placeholder when open.
const placeholder = "Type to search commands"
d.Assert(tuitest.Absent(placeholder))

d.Press('k', tea.ModCtrl).
WaitFor(tuitest.Contains(placeholder))

// Esc closes the palette again.
d.Press(tea.KeyEscape).
WaitFor(tuitest.Absent(placeholder))
}

// TestGolden_Chat_BasicMath snapshots the full finished frame after the agent
// answers, so unintended visual drift in the chat surface shows up as a diff.
// Refresh the snapshot after an intentional UI change with:
//
// go test ./e2e/tui/ -run TestGolden_Chat_BasicMath -tuitest.update
func TestGolden_Chat_BasicMath(t *testing.T) {
// Hide the sidebar so the snapshot doesn't capture the machine-specific
// working directory and git branch, and pin the version so a release
// build's ldflags-injected version can't change the status bar. Both keep
// the golden portable.
d := newTUI(t, "testdata/basic.yaml", 120, 40,
tui.WithHideSidebar(),
tui.WithVersion("test"),
)

d.Type("What's 2+2?").
Enter().
WaitFor(tuitest.Contains("2 + 2 equals 4.")).
WaitForStable(200 * time.Millisecond).
AssertGolden("chat_basic_math")
}
135 changes: 135 additions & 0 deletions e2e/tui/setup_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package tui_test

import (
"context"
"net/http"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/require"
"gopkg.in/dnaeon/go-vcr.v4/pkg/recorder"

"github.com/docker/docker-agent/pkg/app"
"github.com/docker/docker-agent/pkg/config"
"github.com/docker/docker-agent/pkg/environment"
"github.com/docker/docker-agent/pkg/fake"
"github.com/docker/docker-agent/pkg/paths"
"github.com/docker/docker-agent/pkg/runtime"
"github.com/docker/docker-agent/pkg/session"
"github.com/docker/docker-agent/pkg/teamloader"
loaderdefaults "github.com/docker/docker-agent/pkg/teamloader/defaults"
"github.com/docker/docker-agent/pkg/tui"
"github.com/docker/docker-agent/pkg/tui/tuitest"
)

// newTUI builds the real top-level TUI model for agentFile, wired to a
// replaying VCR proxy so the agent's responses are deterministic and offline.
// It returns a started tuitest.Driver sized to the given terminal dimensions.
//
// State directories (data/config) are redirected to a temp dir so the test
// never touches the developer's ~/.cagent, and the SQLite session store lives
// under t.TempDir() too.
//
// agentFile is a deliberate seam: new scenarios point the harness at other
// agent configs without touching the helper.
//
//nolint:unparam // agentFile is intentionally parameterized for future scenarios.
func newTUI(t *testing.T, agentFile string, width, height int, tuiOpts ...tui.Option) *tuitest.Driver {
t.Helper()

isolateState(t)

runConfig := startReplayProxy(t)

ctx := t.Context()
agentSource, err := config.Resolve(agentFile, runConfig.EnvProvider())
require.NoError(t, err)

loadResult, err := teamloader.LoadWithConfig(ctx, agentSource, runConfig, loaderdefaults.Opts()...)
require.NoError(t, err)

team := loadResult.Team
agent, err := team.AgentOrDefault("")
require.NoError(t, err)

store, err := session.NewSQLiteSessionStore(filepath.Join(t.TempDir(), "session.db"))
require.NoError(t, err)
t.Cleanup(func() { _ = store.Close() })

rt, err := runtime.New(team,
runtime.WithSessionStore(store),
runtime.WithCurrentAgent(agent.Name()),
runtime.WithModelSwitcherConfig(&runtime.ModelSwitcherConfig{
Models: loadResult.Models,
Providers: loadResult.Providers,
ModelsGateway: runConfig.ModelsGateway,
EnvProvider: runConfig.EnvProvider(),
ProviderRegistry: loadResult.ProviderRegistry,
AgentDefaultModels: loadResult.AgentDefaultModels,
}),
)
require.NoError(t, err)
t.Cleanup(func() { _ = rt.Close() })

var appOpts []app.Opt
if gen := rt.TitleGenerator(); gen != nil {
appOpts = append(appOpts, app.WithTitleGenerator(gen))
}
application := app.New(ctx, rt, session.New(), appOpts...)

wd, _ := os.Getwd()
model := tui.New(ctx, nil /* no spawner: single tab */, application, wd, func() {}, tuiOpts...)

return tuitest.New(t, model, width, height)
}

// isolateState redirects docker-agent's data and config directories to a temp
// dir for the duration of the test so the TUI's persistent state (tab store,
// user settings) never touches the real home directory.
func isolateState(t *testing.T) {
t.Helper()
dir := t.TempDir()
paths.SetDataDir(filepath.Join(dir, "data"))
paths.SetConfigDir(filepath.Join(dir, "config"))
t.Cleanup(func() {
paths.SetDataDir("")
paths.SetConfigDir("")
})
}

// startReplayProxy starts a VCR proxy in replay-only mode against the cassette
// named after the current test, and returns a RuntimeConfig pointed at it.
// Recordings live in testdata/cassettes/<TestName>.yaml.
func startReplayProxy(t *testing.T) *config.RuntimeConfig {
t.Helper()

cassettePath := filepath.Join("testdata", "cassettes", t.Name())

matcher := fake.DefaultMatcher(func(err error) { require.NoError(t, err) })

proxyURL, cleanup, err := fake.StartProxyWithOptions(
cassettePath,
recorder.ModeReplayOnly,
matcher,
func(string, *http.Request) {}, // no API keys needed for replay
nil,
)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, cleanup()) })

return &config.RuntimeConfig{
Config: config.Config{ModelsGateway: proxyURL},
EnvProviderForTests: &mapEnvProvider{
environment.DockerDesktopTokenEnv: "DUMMY",
},
}
}

// mapEnvProvider is a static environment.Provider for tests.
type mapEnvProvider map[string]string

func (p *mapEnvProvider) Get(_ context.Context, name string) (string, bool) {
v, ok := (*p)[name]
return v, ok
}
9 changes: 9 additions & 0 deletions e2e/tui/testdata/basic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: "2"

agents:
root:
model: openai/gpt-3.5-turbo
description: A helpful AI assistant
instruction: |
You are a knowledgeable assistant that helps users with various tasks.
Be helpful, accurate, and concise in your responses.
95 changes: 95 additions & 0 deletions e2e/tui/testdata/cassettes/TestChat_BasicMath.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
---
version: 2
interactions:
- id: 0
request:
proto: HTTP/1.1
proto_major: 1
proto_minor: 1
content_length: 0
host: api.openai.com
body: '{"messages":[{"content":"You are a knowledgeable assistant that helps users with various tasks.\nBe helpful, accurate, and concise in your responses.\n","role":"system"},{"content":"What''s 2+2?","role":"user"}],"model":"gpt-3.5-turbo","stream_options":{"include_usage":true},"stream":true}'
url: https://api.openai.com/v1/chat/completions
method: POST
response:
proto: HTTP/2.0
proto_major: 2
proto_minor: 0
content_length: -1
body: |+
data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"31MAoT3T"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"f989QXFJX"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" +"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"LjplBv32"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OS4QsUws6"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"TW89LuVuk"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" equals"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"vrL"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"T8WY0RY3s"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0kTGMpnIE"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1pmLrWtWp"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Rsle"}

data: {"id":"chatcmpl-Cn4xGpdnsbgLIIeH49VdYprjDjc6P","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":41,"completion_tokens":8,"total_tokens":49,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"FdScpa1UzO"}

data: [DONE]

headers: {}
status: 200 OK
code: 200
duration: 376.601459ms
- id: 1
request:
proto: HTTP/1.1
proto_major: 1
proto_minor: 1
content_length: 0
host: api.openai.com
body: '{"messages":[{"content":"You are a helpful AI assistant that generates concise, descriptive titles for conversations. You will be given up to 2 recent user messages and asked to create a single-line title that captures the main topic. Never use newlines or line breaks in your response.","role":"system"},{"content":"Based on the following recent user messages from a conversation with an AI assistant, generate a short, descriptive title (maximum 50 characters) that captures the main topic or purpose of the conversation. Return ONLY the title text on a single line, nothing else. Do not include any newlines, explanations, or formatting.\n\nRecent user messages:\n1. What''s 2+2?\n\n\n","role":"user"}],"model":"gpt-3.5-turbo","stream_options":{"include_usage":true},"stream":true}'
url: https://api.openai.com/v1/chat/completions
method: POST
response:
proto: HTTP/2.0
proto_major: 2
proto_minor: 0
content_length: -1
body: |+
data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"3praRFP3"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Simple"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0Qwz"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" Math"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"j2ifs"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"NiTsH3kiT"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" Addition"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"x"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" of"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"eHDT8Y6"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ImGlkQ1UD"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"r0pbd1luX"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" and"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"fbgjGd"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"zJqaxx5ri"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"irCJfvsMd"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"psUW"}

data: {"id":"chatcmpl-Cn4xGduSFgxtSBIWwGDKiENMSp67u","object":"chat.completion.chunk","created":1765813154,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":100,"completion_tokens":10,"total_tokens":110,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"3lP94vJ"}

data: [DONE]

headers: {}
status: 200 OK
code: 200
duration: 443.446208ms
Loading
Loading