diff --git a/feature/dockerfile.go b/feature/dockerfile.go index 7eb0ae4..8b26940 100644 --- a/feature/dockerfile.go +++ b/feature/dockerfile.go @@ -108,7 +108,6 @@ func GenerateBuildContext(plan BuildPlan, dst string) error { // generateDockerfile returns the contents of the Dockerfile that layers // the configured features on top of plan.BaseImage. Structure: // -// # syntax=docker/dockerfile:1.4 // ARG _DEV_CONTAINERS_BASE_IMAGE=... // FROM $_DEV_CONTAINERS_BASE_IMAGE AS devcontainer_target // USER root @@ -119,9 +118,14 @@ func GenerateBuildContext(plan BuildPlan, dst string) error { // LABEL devcontainer.metadata='[...]' // ARG _DEV_CONTAINERS_IMAGE_USER=root // USER $_DEV_CONTAINERS_IMAGE_USER +// +// Intentionally no `# syntax=docker/dockerfile:X` directive: nothing +// in the emitted file needs a non-builtin frontend, and declaring one +// forces buildkit to pull `docker/dockerfile:*` from a registry — +// which requires a session to forward credentials (we don't open +// one) and hangs indefinitely behind broken registry mirrors. func generateDockerfile(plan BuildPlan) (string, error) { var b strings.Builder - b.WriteString("# syntax=docker/dockerfile:1.4\n") fmt.Fprintf(&b, "ARG _DEV_CONTAINERS_BASE_IMAGE=%s\n", plan.BaseImage) b.WriteString("FROM $_DEV_CONTAINERS_BASE_IMAGE AS devcontainer_target\n\n") diff --git a/feature/dockerfile_test.go b/feature/dockerfile_test.go index c602a3e..28ca8d6 100644 --- a/feature/dockerfile_test.go +++ b/feature/dockerfile_test.go @@ -58,7 +58,6 @@ func TestGenerateBuildContext_WritesExpectedLayout(t *testing.T) { } wantSubstrings := []string{ - "# syntax=docker/dockerfile:1.4", "ARG _DEV_CONTAINERS_BASE_IMAGE=alpine:3.20", "FROM $_DEV_CONTAINERS_BASE_IMAGE", "COPY ./build-context/ /tmp/dc-features/", @@ -76,6 +75,12 @@ func TestGenerateBuildContext_WritesExpectedLayout(t *testing.T) { t.Errorf("Dockerfile missing %q", want) } } + // Frontend directive must not be reintroduced: buildkit treats it as + // a hard frontend-pull requirement, which we don't supply credentials + // for (no session) and which hangs behind broken registry mirrors. + if strings.Contains(string(df), "# syntax=") { + t.Errorf("Dockerfile must not declare a syntax= frontend; built-in is sufficient") + } // Per-feature dirs populated with run.sh, feature.env, install.sh. for _, idx := range []string{"0", "1"} { diff --git a/runtime/docker/build.go b/runtime/docker/build.go index 8b8b349..0b56473 100644 --- a/runtime/docker/build.go +++ b/runtime/docker/build.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strings" + "github.com/moby/moby/api/types/build" "github.com/moby/moby/client" "github.com/crunchloop/devcontainer/runtime" @@ -22,6 +23,15 @@ import ( // Streaming progress messages are mapped onto the events channel as // runtime.BuildEvents (drop-on-full). Build failures surface as a // non-nil error including any structured error returned by the daemon. +// +// BuildKit is required. The classic builder synthesizes one +// intermediate container per Dockerfile step and routes every +// container API through the daemon's authorization pipeline, which +// — behind an authz plugin — turns a sub-second build into a +// multi-minute one (~140× slowdown observed in production). BuildKit +// uses a single streaming session and is unaffected. Docker Engine +// has shipped with BuildKit enabled by default since 23.0 (Feb 2023); +// requiring it here is in line with the lib's modern-spec stance. func (r *Runtime) BuildImage(ctx context.Context, spec runtime.BuildSpec, events chan<- runtime.BuildEvent) (runtime.ImageRef, error) { if spec.ContextPath == "" { return runtime.ImageRef{}, fmt.Errorf("BuildImage: spec.ContextPath required") @@ -49,6 +59,16 @@ func (r *Runtime) BuildImage(ctx context.Context, spec runtime.BuildSpec, events args[k] = &v } + // Pre-pull base images so BuildKit can resolve them from the local + // image store. BuildKit refuses remote metadata resolution without + // an active session ("no active sessions" — the session is the + // callback channel for registry credentials, mandatory even for + // anonymous public pulls). Pulling via the classic ImagePull API + // (which is unaffected by the session requirement) seeds the local + // store; BuildKit then picks up the cached manifest instead of + // reaching for the registry. + prePullBaseImages(ctx, r, spec.ContextPath, dockerfile, spec.Args, events) + res, err := r.api.ImageBuild(ctx, pr, client.ImageBuildOptions{ Dockerfile: dockerfile, Tags: []string{spec.Tag}, @@ -57,6 +77,7 @@ func (r *Runtime) BuildImage(ctx context.Context, spec runtime.BuildSpec, events CacheFrom: spec.CacheFrom, NoCache: spec.NoCache, Remove: true, + Version: build.BuilderBuildKit, }) if err != nil { return runtime.ImageRef{}, fmt.Errorf("ImageBuild: %w", err) @@ -81,6 +102,22 @@ func (r *Runtime) BuildImage(ctx context.Context, spec runtime.BuildSpec, events // streamBuildOutput parses the JSON-line stream from ImageBuild, // emitting BuildEvents and returning a non-nil error if the daemon // reports a build failure. Closes body before returning. +// +// Two response shapes are handled: +// +// - Classic-builder-style records with `stream` (log lines) and +// `status` (layer/pull progress) fields. Pre-BuildKit format. +// +// - BuildKit records of the form `{"id":"moby.buildkit.trace", +// "aux":""}` for per-step progress and +// `{"id":"moby.image.id","aux":{"ID":"sha256:..."}}` for the +// final image. The aux protobuf is buildkit's `SolveStatus` — +// decoding requires the buildkit module. We intentionally don't +// pull that dep in: per-step progress events are silently dropped +// under BuildKit; BuildStart / BuildCompleted (emitted by the +// caller and at the end of BuildImage) still fire correctly, and +// errors still propagate via `errorDetail` / `error` fields. +// A future PR can revisit if vertex-level progress is needed. func streamBuildOutput(ctx context.Context, body io.ReadCloser, events chan<- runtime.BuildEvent) error { defer body.Close() @@ -129,12 +166,15 @@ func streamBuildOutput(ctx context.Context, body io.ReadCloser, events chan<- ru } // tarDirectory writes the contents of dir (recursively) into w as a -// non-gzipped tar archive. Symlinks are followed (their targets are -// included as regular files) — the build daemon doesn't need our -// engineering tmp dirs to preserve link semantics. +// non-gzipped tar archive. Symlinks are preserved as tar TypeSymlink +// entries with their original target text; the daemon-side BuildKit +// frontend handles the resolution. // -// Empty / unreadable files are best-effort logged via the returned -// error rather than silently dropped. +// Previously this passed an empty link argument to tar.FileInfoHeader +// for symlinks, producing tar entries with TypeSymlink + empty +// Linkname. Some downstream tar readers reject those as malformed and +// abort the build mid-stream — common in compose-primary contexts +// containing node_modules/.bin/* or similar bin-symlinks. func tarDirectory(dir string, w io.Writer) error { tw := tar.NewWriter(w) defer func() { _ = tw.Close() }() @@ -157,7 +197,16 @@ func tarDirectory(dir string, w io.Writer) error { if err != nil { return err } - hdr, err := tar.FileInfoHeader(info, "") + + var link string + isSymlink := info.Mode()&os.ModeSymlink != 0 + if isSymlink { + link, err = os.Readlink(path) + if err != nil { + return fmt.Errorf("readlink %s: %w", path, err) + } + } + hdr, err := tar.FileInfoHeader(info, link) if err != nil { return fmt.Errorf("tar header for %s: %w", path, err) } @@ -169,7 +218,7 @@ func tarDirectory(dir string, w io.Writer) error { if err := tw.WriteHeader(hdr); err != nil { return err } - if d.IsDir() || (info.Mode()&os.ModeSymlink != 0 && hdr.Typeflag != tar.TypeReg) { + if d.IsDir() || isSymlink { return nil } f, err := os.Open(path) @@ -181,3 +230,180 @@ func tarDirectory(dir string, w io.Writer) error { return err }) } + +// prePullBaseImages reads the Dockerfile at contextPath/dockerfile, +// extracts FROM image references (resolving simple $ARG substitutions +// against ARG defaults in the file and the spec's BuildArgs +// overrides), and pulls each that isn't already in the local image +// store. Best-effort: any pull error is silently ignored — the +// subsequent ImageBuild surfaces a clearer "failed to resolve" error +// if the image really can't be obtained, and images that exist only +// locally (e.g. dc-go-base-*) correctly skip the pull attempt because +// ImageInspect finds them. +func prePullBaseImages(ctx context.Context, r *Runtime, contextPath, dockerfile string, buildArgs map[string]string, events chan<- runtime.BuildEvent) { + df, err := os.ReadFile(filepath.Join(contextPath, dockerfile)) + if err != nil { + return + } + for _, ref := range extractBaseImages(string(df), buildArgs) { + if _, err := r.api.ImageInspect(ctx, ref); err == nil { + continue + } + _, _ = r.PullImage(ctx, ref, events) + } +} + +// extractBaseImages does a naive line-based parse of Dockerfile +// content, returning the de-duplicated list of resolved FROM image +// references. Supports: +// +// - Literal FROM lines (`FROM alpine:3.20`, with optional +// `AS ` and `--platform=...` flags). +// - $VAR / ${VAR} substitution against the file's `ARG VAR=default` +// lines, with overrides from buildArgs taking precedence. +// +// FROMs whose ref still contains an unresolved `$` after substitution +// are dropped — we'd rather miss a pre-pull (and let BuildKit surface +// its native error) than guess wrong. Stage references (`FROM +// previous-stage AS …`) are also dropped since they're not image +// refs; we detect them by skipping refs that match a prior AS name. +func extractBaseImages(content string, buildArgs map[string]string) []string { + args := map[string]string{} + for k, v := range buildArgs { + args[k] = v + } + stages := map[string]bool{} + seen := map[string]bool{} + var out []string + + for _, raw := range strings.Split(content, "\n") { + line := strings.TrimSpace(raw) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + kw := strings.ToUpper(fields[0]) + switch kw { + case "ARG": + rest := strings.TrimSpace(line[len("ARG"):]) + if eq := strings.IndexByte(rest, '='); eq > 0 { + name := strings.TrimSpace(rest[:eq]) + val := strings.TrimSpace(rest[eq+1:]) + // Only set if not already overridden by buildArgs. + if _, override := args[name]; !override { + args[name] = val + } + } + case "FROM": + rest := strings.TrimSpace(line[len("FROM"):]) + // Drop leading flags (--platform=..., --link, ...). + for strings.HasPrefix(rest, "--") { + if sp := strings.IndexByte(rest, ' '); sp > 0 { + rest = strings.TrimSpace(rest[sp+1:]) + } else { + rest = "" + } + } + // Trim "AS " suffix, recording the stage name. + ref := rest + if idx := indexFoldWord(rest, "AS"); idx > 0 { + ref = strings.TrimSpace(rest[:idx]) + stageName := strings.TrimSpace(rest[idx+len("AS"):]) + if stageName != "" { + stages[stageName] = true + } + } + ref = substituteArgs(ref, args) + if ref == "" || strings.Contains(ref, "$") { + continue + } + if stages[ref] { + continue + } + if !seen[ref] { + seen[ref] = true + out = append(out, ref) + } + } + } + return out +} + +// indexFoldWord returns the byte index of the first occurrence of +// `word` in s as a standalone whitespace-delimited token, matching +// case-insensitively. Returns -1 if not found. +func indexFoldWord(s, word string) int { + wlen := len(word) + for i := 0; i+wlen <= len(s); i++ { + if !strings.EqualFold(s[i:i+wlen], word) { + continue + } + // Must be at start or preceded by whitespace. + if i > 0 && s[i-1] != ' ' && s[i-1] != '\t' { + continue + } + // Must be at end or followed by whitespace. + if i+wlen < len(s) && s[i+wlen] != ' ' && s[i+wlen] != '\t' { + continue + } + return i + } + return -1 +} + +// substituteArgs replaces $VAR and ${VAR} occurrences in ref with the +// corresponding value from args. Leaves unknown vars unsubstituted so +// the caller can detect and skip refs with unresolved vars. +func substituteArgs(ref string, args map[string]string) string { + var b strings.Builder + i := 0 + for i < len(ref) { + if ref[i] != '$' { + b.WriteByte(ref[i]) + i++ + continue + } + // $ at end of string — keep as-is. + if i+1 >= len(ref) { + b.WriteByte('$') + i++ + continue + } + var name string + var consumed int + if ref[i+1] == '{' { + end := strings.IndexByte(ref[i+2:], '}') + if end < 0 { + b.WriteByte('$') + i++ + continue + } + name = ref[i+2 : i+2+end] + consumed = 2 + end + 1 + } else { + j := i + 1 + for j < len(ref) && (isIdentByte(ref[j])) { + j++ + } + name = ref[i+1 : j] + consumed = j - i + } + if val, ok := args[name]; ok { + b.WriteString(val) + } else { + b.WriteString(ref[i : i+consumed]) + } + i += consumed + } + return b.String() +} + +func isIdentByte(b byte) bool { + return b == '_' || + (b >= 'a' && b <= 'z') || + (b >= 'A' && b <= 'Z') || + (b >= '0' && b <= '9') +} diff --git a/runtime/docker/build_test.go b/runtime/docker/build_test.go new file mode 100644 index 0000000..3e9f7a2 --- /dev/null +++ b/runtime/docker/build_test.go @@ -0,0 +1,114 @@ +package docker + +import ( + "reflect" + "testing" +) + +func TestExtractBaseImages(t *testing.T) { + cases := []struct { + name string + dockerfile string + buildArgs map[string]string + want []string + }{ + { + name: "literal FROM", + dockerfile: `FROM alpine:3.20 +RUN echo hi`, + want: []string{"alpine:3.20"}, + }, + { + name: "ARG default substituted into FROM", + dockerfile: `ARG BASE=alpine:3.20 +FROM $BASE +RUN echo hi`, + want: []string{"alpine:3.20"}, + }, + { + name: "buildArgs override ARG default", + dockerfile: `ARG BASE=alpine:3.20 +FROM ${BASE}`, + buildArgs: map[string]string{"BASE": "debian:bookworm-slim"}, + want: []string{"debian:bookworm-slim"}, + }, + { + name: "FROM AS stage; subsequent FROM refers to stage", + dockerfile: `FROM alpine:3.20 AS builder +RUN echo a > /a +FROM builder +RUN cat /a`, + want: []string{"alpine:3.20"}, + }, + { + name: "multi-stage with distinct base images", + dockerfile: `FROM golang:1.25 AS build +FROM alpine:3.20`, + want: []string{"golang:1.25", "alpine:3.20"}, + }, + { + name: "FROM with --platform flag", + dockerfile: `FROM --platform=linux/amd64 alpine:3.20`, + want: []string{"alpine:3.20"}, + }, + { + name: "unresolved ARG dropped", + dockerfile: `FROM $UNDEFINED_BASE +FROM alpine:3.20`, + want: []string{"alpine:3.20"}, + }, + { + name: "comments and blank lines ignored", + dockerfile: `# comment + # indented comment + +FROM alpine:3.20`, + want: []string{"alpine:3.20"}, + }, + { + name: "lowercase from is recognized", + dockerfile: `from alpine:3.20`, + want: []string{"alpine:3.20"}, + }, + { + name: "empty input", + dockerfile: "", + want: nil, + }, + { + name: "dedup repeated FROMs", + dockerfile: `FROM alpine:3.20 AS a +FROM alpine:3.20 AS b`, + want: []string{"alpine:3.20"}, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := extractBaseImages(tc.dockerfile, tc.buildArgs) + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("extractBaseImages = %v, want %v", got, tc.want) + } + }) + } +} + +func TestSubstituteArgs(t *testing.T) { + args := map[string]string{"X": "alpine", "Y": "3.20"} + cases := []struct { + in, want string + }{ + {"$X:$Y", "alpine:3.20"}, + {"${X}:${Y}", "alpine:3.20"}, + {"prefix-$X-suffix", "prefix-alpine-suffix"}, + {"$UNKNOWN", "$UNKNOWN"}, + {"${UNKNOWN}", "${UNKNOWN}"}, + {"$", "$"}, + {"no-vars-here", "no-vars-here"}, + } + for _, tc := range cases { + got := substituteArgs(tc.in, args) + if got != tc.want { + t.Errorf("substituteArgs(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} diff --git a/test/integration/build_context_symlink_test.go b/test/integration/build_context_symlink_test.go new file mode 100644 index 0000000..1d6e4c8 --- /dev/null +++ b/test/integration/build_context_symlink_test.go @@ -0,0 +1,82 @@ +//go:build integration + +package integration + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + devcontainer "github.com/crunchloop/devcontainer" +) + +// TestBuildContext_SurvivesSymlinks builds a Dockerfile-source workspace +// whose context directory contains a symlink that is not referenced by +// any COPY instruction. Regression guard for a tar-packing bug where +// symlink entries were written with TypeSymlink + empty Linkname; some +// tar readers (including dockerd's older path) reject those as +// malformed and abort the build with an opaque tar error. +// +// The presence of a symlink in node_modules/.bin/* or vendored +// dependencies is the canonical real-world case — Dockerfile usually +// only COPYs ./pkg/, but the daemon still has to stream the whole +// context tar past its reader before it can prune. +func TestBuildContext_SurvivesSymlinks(t *testing.T) { + if testing.Short() { + t.Skip("integration tests skipped with -short") + } + + eng, rt := newEngine(t) + defer rt.Close() + + dir := t.TempDir() + + // Real file in the context that the symlink will point at. + mustWrite(t, filepath.Join(dir, "target.txt"), "real-content\n") + + // Relative symlink target.txt → ./target.txt, mimicking + // node_modules/.bin/* link layout. + if err := os.Symlink("target.txt", filepath.Join(dir, "link.txt")); err != nil { + t.Fatalf("symlink: %v", err) + } + + // Trivial Dockerfile that only COPYs the regular file. The symlink + // is unreferenced — the test is that the tar stream survives, not + // that COPY of the symlink works. + mustWrite(t, filepath.Join(dir, "Dockerfile"), ` +FROM alpine:3.20 +COPY target.txt /etc/target.txt +RUN echo built > /etc/symlink-build-marker +`) + + mustWrite(t, filepath.Join(dir, ".devcontainer", "devcontainer.json"), `{ + "build": { "dockerfile": "Dockerfile", "context": ".." } + }`) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + wsObj, err := eng.Up(ctx, devcontainer.UpOptions{ + LocalWorkspaceFolder: dir, + Recreate: true, + SkipLifecycle: true, + }) + if err != nil { + t.Fatalf("Up with symlinked context: %v", err) + } + defer func() { _ = eng.Down(context.Background(), wsObj, devcontainer.DownOptions{Remove: true}) }() + + res, err := eng.Exec(ctx, wsObj, devcontainer.ExecOptions{ + Cmd: []string{"cat", "/etc/symlink-build-marker"}, + }) + if err != nil { + t.Fatalf("Exec marker: %v", err) + } + if res.ExitCode != 0 || !strings.Contains(res.Stdout, "built") { + t.Errorf("marker not present (build did not complete?): exit=%d stdout=%q stderr=%q", + res.ExitCode, res.Stdout, res.Stderr) + } +}