Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 63 additions & 16 deletions pkg/config/auto.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import (
"errors"
"fmt"
"log/slog"
"maps"
"slices"
"strings"

"github.com/docker/docker-agent/pkg/config/latest"
"github.com/docker/docker-agent/pkg/environment"
"github.com/docker/docker-agent/pkg/model/provider"
)

// DMRModelLister returns the IDs of the models currently available to Docker
Expand All @@ -24,6 +26,14 @@ type providerConfig struct {
name string // provider name (e.g., "anthropic")
envVars []string // env vars to check - provider is available if ANY is set
hint string // description for error messages
// apiKeyEnvVar is the single secret API-key env var that authenticates this
// provider and is safe to forward into an isolated environment (e.g. an eval
// container). It is empty when the provider has no forwardable single secret
// (e.g. amazon-bedrock's multi-variable AWS credentials, or DMR which needs
// none). It intentionally differs from envVars, which also contains non-secret
// detection/mode flags (e.g. GOOGLE_GENAI_USE_VERTEXAI) that must never be
// forwarded as credentials.
apiKeyEnvVar string
}

// cloudProviders defines the available cloud providers in priority order.
Expand All @@ -35,29 +45,29 @@ type providerConfig struct {
// should set the provider explicitly (e.g. `--model opencode-go/...`) rather than
// relying on auto; see docs/providers/opencode-go for details.
var cloudProviders = []providerConfig{
{"anthropic", []string{"ANTHROPIC_API_KEY"}, "ANTHROPIC_API_KEY"},
{"openai", []string{"OPENAI_API_KEY"}, "OPENAI_API_KEY"},
{"anthropic", []string{"ANTHROPIC_API_KEY"}, "ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY"},
{"openai", []string{"OPENAI_API_KEY"}, "OPENAI_API_KEY", "OPENAI_API_KEY"},
{"google", []string{
"GOOGLE_API_KEY",
"GEMINI_API_KEY",
"GOOGLE_GENAI_USE_VERTEXAI",
}, "GOOGLE_API_KEY (or GEMINI_API_KEY, GOOGLE_GENAI_USE_VERTEXAI)"},
{"mistral", []string{"MISTRAL_API_KEY"}, "MISTRAL_API_KEY"},
{"openrouter", []string{"OPENROUTER_API_KEY"}, "OPENROUTER_API_KEY"},
{"baseten", []string{"BASETEN_API_KEY"}, "BASETEN_API_KEY"},
{"ovhcloud", []string{"OVH_AI_ENDPOINTS_ACCESS_TOKEN"}, "OVH_AI_ENDPOINTS_ACCESS_TOKEN"},
{"groq", []string{"GROQ_API_KEY"}, "GROQ_API_KEY"},
{"fireworks", []string{"FIREWORKS_API_KEY"}, "FIREWORKS_API_KEY"},
{"deepseek", []string{"DEEPSEEK_API_KEY"}, "DEEPSEEK_API_KEY"},
{"cerebras", []string{"CEREBRAS_API_KEY"}, "CEREBRAS_API_KEY"},
}, "GOOGLE_API_KEY (or GEMINI_API_KEY, GOOGLE_GENAI_USE_VERTEXAI)", "GOOGLE_API_KEY"},
{"mistral", []string{"MISTRAL_API_KEY"}, "MISTRAL_API_KEY", "MISTRAL_API_KEY"},
{"openrouter", []string{"OPENROUTER_API_KEY"}, "OPENROUTER_API_KEY", "OPENROUTER_API_KEY"},
{"baseten", []string{"BASETEN_API_KEY"}, "BASETEN_API_KEY", "BASETEN_API_KEY"},
{"ovhcloud", []string{"OVH_AI_ENDPOINTS_ACCESS_TOKEN"}, "OVH_AI_ENDPOINTS_ACCESS_TOKEN", "OVH_AI_ENDPOINTS_ACCESS_TOKEN"},
{"groq", []string{"GROQ_API_KEY"}, "GROQ_API_KEY", "GROQ_API_KEY"},
{"fireworks", []string{"FIREWORKS_API_KEY"}, "FIREWORKS_API_KEY", "FIREWORKS_API_KEY"},
{"deepseek", []string{"DEEPSEEK_API_KEY"}, "DEEPSEEK_API_KEY", "DEEPSEEK_API_KEY"},
{"cerebras", []string{"CEREBRAS_API_KEY"}, "CEREBRAS_API_KEY", "CEREBRAS_API_KEY"},
{"amazon-bedrock", []string{
"AWS_BEARER_TOKEN_BEDROCK",
"AWS_ACCESS_KEY_ID",
"AWS_PROFILE",
"AWS_ROLE_ARN",
}, "AWS_ACCESS_KEY_ID (or AWS_PROFILE, AWS_ROLE_ARN, AWS_BEARER_TOKEN_BEDROCK)"},
{"opencode-zen", []string{"OPENCODE_API_KEY"}, "OPENCODE_API_KEY"},
{"opencode-go", []string{"OPENCODE_API_KEY"}, "OPENCODE_API_KEY"},
}, "AWS_ACCESS_KEY_ID (or AWS_PROFILE, AWS_ROLE_ARN, AWS_BEARER_TOKEN_BEDROCK)", ""},
{"opencode-zen", []string{"OPENCODE_API_KEY"}, "OPENCODE_API_KEY", "OPENCODE_API_KEY"},
{"opencode-go", []string{"OPENCODE_API_KEY"}, "OPENCODE_API_KEY", "OPENCODE_API_KEY"},
}

// AutoModelFallbackError is returned when auto model selection fails because
Expand Down Expand Up @@ -124,6 +134,43 @@ var DefaultModels = map[string]string{
"opencode-zen": "deepseek-v4-flash-free",
}

// nonForwardableTokenEnvVars lists provider token env vars that are NOT safe to
// forward as model credentials into an isolated environment (e.g. an eval
// container), even though a provider alias uses them for auth. GITHUB_TOKEN is
// a broad, general-purpose GitHub credential (git, gh, CI, packages) that the
// github-copilot alias happens to reuse; forwarding it would leak far more
// access than a dedicated model API key.
var nonForwardableTokenEnvVars = map[string]bool{
"GITHUB_TOKEN": true,
}

// ProviderAPIKeyEnvVars returns the deduplicated, sorted set of environment
// variables that hold a dedicated model-provider API key. Callers that need to
// forward provider credentials (e.g. into a container) should use this instead
// of hard-coding a list of API key names, so it stays in sync as providers are
// added.
//
// It only includes single-secret API keys: it deliberately excludes non-secret
// detection/mode flags (e.g. GOOGLE_GENAI_USE_VERTEXAI), multi-variable
// credential sets that cannot be forwarded as one secret (e.g. AWS/Bedrock),
// and broad general-purpose tokens (see nonForwardableTokenEnvVars). Providers
// needing those must be given credentials explicitly.
func ProviderAPIKeyEnvVars() []string {
seen := map[string]bool{}
add := func(name string) {
if name != "" && !nonForwardableTokenEnvVars[name] {
seen[name] = true
}
}
for _, p := range cloudProviders {
add(p.apiKeyEnvVar)
}
for _, alias := range provider.EachAlias() {
add(alias.TokenEnvVar)
}
return slices.Sorted(maps.Keys(seen))
}

func AvailableProviders(ctx context.Context, modelsGateway string, env environment.Provider) []string {
if modelsGateway != "" {
// Default to anthropic when using a gateway
Expand Down Expand Up @@ -275,9 +322,9 @@ func looksLikeEmbeddingModel(modelID string) bool {
return strings.Contains(strings.ToLower(modelID), "embed")
}

func PreferredMaxTokens(provider string) *int64 {
func PreferredMaxTokens(providerName string) *int64 {
var mt int64 = 32000
if provider == "dmr" {
if providerName == "dmr" {
mt = 16000
}
return &mt
Expand Down
40 changes: 40 additions & 0 deletions pkg/config/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package config
import (
"context"
"errors"
"slices"
"strings"
"testing"

Expand Down Expand Up @@ -942,3 +943,42 @@ type stubPullError struct {
func (e *stubPullError) Error() string { return e.fullDetail }

func (e *stubPullError) ModelPullErrorSummary() string { return e.summary }

func TestProviderAPIKeyEnvVars(t *testing.T) {
t.Parallel()

vars := ProviderAPIKeyEnvVars()

// Sorted and deduplicated for reproducibility.
assert.True(t, slices.IsSorted(vars), "env vars must be sorted, got %v", vars)
assert.Equal(t, slices.Compact(slices.Clone(vars)), vars, "env vars must be deduplicated")

// The dedicated single-secret model API keys must be present.
for _, name := range []string{
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"GOOGLE_API_KEY",
"MISTRAL_API_KEY",
"OPENROUTER_API_KEY",
"XAI_API_KEY",
"NEBIUS_API_KEY",
} {
assert.Contains(t, vars, name)
}

// Non-secret detection/mode flags and multi-variable credential sets must
// never be exposed as forwardable API keys.
for _, name := range []string{
"GOOGLE_GENAI_USE_VERTEXAI",
"GEMINI_API_KEY",
"AWS_ACCESS_KEY_ID",
"AWS_PROFILE",
"AWS_ROLE_ARN",
"AWS_BEARER_TOKEN_BEDROCK",
} {
assert.NotContains(t, vars, name)
}

// Broad, general-purpose tokens must not be forwarded as model credentials.
assert.NotContains(t, vars, "GITHUB_TOKEN")
}
42 changes: 16 additions & 26 deletions pkg/evaluation/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,8 @@ func (r *Runner) preBuildImages(ctx context.Context, out io.Writer, evals []Inpu
// Count unique images to report an accurate number.
unique := make(map[imageKey]struct{})
for _, eval := range evals {
var key imageKey
if eval.Evals != nil {
key = imageKey{workingDir: eval.Evals.WorkingDir, image: eval.Evals.Image}
}
unique[key] = struct{}{}
criteria := eval.criteria()
unique[imageKey{workingDir: criteria.WorkingDir, image: criteria.Image}] = struct{}{}
}

fmt.Fprintf(out, "Pre-building %d Docker image(s)...\n", len(unique))
Expand Down Expand Up @@ -289,10 +286,7 @@ func (r *Runner) preBuildImages(ctx context.Context, out io.Writer, evals []Inpu
continue
}

criteria := eval.Evals
if criteria == nil {
criteria = &session.EvalCriteria{}
}
criteria := eval.criteria()

_, err := r.getOrBuildImage(ctx, criteria)
results <- buildResult{title: eval.Title, err: err}
Expand Down Expand Up @@ -325,12 +319,7 @@ func (r *Runner) runSingleEval(ctx context.Context, evalSess *InputSession) (Res

slog.DebugContext(ctx, "Starting evaluation", "title", title)

var evals *session.EvalCriteria
if evalSess.Evals != nil {
evals = evalSess.Evals
} else {
evals = &session.EvalCriteria{}
}
evals := evalSess.criteria()

userMessages := getUserMessages(evalSess.Session)

Expand Down Expand Up @@ -418,20 +407,23 @@ func (r *Runner) runDockerAgentInContainer(ctx context.Context, imageID string,
)

var env []string
// addEnv forwards a variable to the container: "-e NAME" tells docker to
// pass it through, and NAME=VALUE sets it on the docker process.
addEnv := func(name, value string) {
args = append(args, "-e", name)
env = append(env, name+"="+value)
}

if r.runConfig.ModelsGateway != "" {
args = append(args, "-e", "DOCKER_AGENT_MODELS_GATEWAY")
env = append(env, "DOCKER_AGENT_MODELS_GATEWAY="+r.runConfig.ModelsGateway)
addEnv("DOCKER_AGENT_MODELS_GATEWAY", r.runConfig.ModelsGateway)

if token, ok := r.runConfig.EnvProvider().Get(ctx, environment.DockerDesktopTokenEnv); ok && token != "" {
args = append(args, "-e", environment.DockerDesktopTokenEnv)
env = append(env, environment.DockerDesktopTokenEnv+"="+token)
addEnv(environment.DockerDesktopTokenEnv, token)
}
} else {
for _, name := range []string{"OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY", "MISTRAL_API_KEY", "OPENROUTER_API_KEY", "XAI_API_KEY", "NEBIUS_API_KEY"} {
for _, name := range config.ProviderAPIKeyEnvVars() {
if val, ok := r.runConfig.EnvProvider().Get(ctx, name); ok && val != "" {
args = append(args, "-e", name)
env = append(env, name+"="+val)
addEnv(name, val)
}
}
}
Expand All @@ -440,11 +432,9 @@ func (r *Runner) runDockerAgentInContainer(ctx context.Context, imageID string,
// Format: KEY or KEY=VALUE
for _, entry := range r.EnvVars {
if key, val, hasValue := strings.Cut(entry, "="); hasValue && key != "" {
args = append(args, "-e", key)
env = append(env, key+"="+val)
addEnv(key, val)
} else if val, ok := r.runConfig.EnvProvider().Get(ctx, entry); ok && entry != "" {
args = append(args, "-e", entry)
env = append(env, entry+"="+val)
addEnv(entry, val)
}
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/evaluation/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ func (s *InputSession) displayTitle() string {
return s.Title
}

// criteria returns the session's evaluation criteria, or an empty (non-nil)
// EvalCriteria when the session declares none.
func (s *InputSession) criteria() *session.EvalCriteria {
if s.Evals != nil {
return s.Evals
}
return &session.EvalCriteria{}
}

// Result contains the evaluation results for a single test case.
type Result struct {
InputPath string `json:"input_path"`
Expand Down
Loading