Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Acb/with docker caching #4064

Merged
merged 2 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ast/commandflag/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ type WithDockerOpts struct {
Pulls []string `long:"pull" description:"An image which is pulled and made available in the docker cache"`
AllowPrivileged bool `long:"allow-privileged" description:"Allow targets referenced by load to assume privileged mode"`
PassArgs bool `long:"pass-args" description:"Pass arguments to external targets"`
CacheID string `long:"cache-id" description:"When specified, layer data will be persisted to specified cache"`
}

type DoOpts struct {
Expand Down
54 changes: 48 additions & 6 deletions buildkitd/dockerd-wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ fi
# This host is used to pull images from the embedded BuildKit Docker registry.
buildkit_docker_registry='172.30.0.1:8371'

# used to prefix images that are persisted to the WITH DOCKER cache
earthly_cached_docker_image_prefix="earthly_cached_"

detect_docker_compose_cmd() {
if command -v docker-compose >/dev/null; then
echo "docker-compose"
Expand Down Expand Up @@ -95,8 +98,23 @@ execute() {
fi
done

if [ "$EARTHLY_DOCKERD_CACHE_DATA" = "true" ]; then
# rename existing tags, so we can track which ones get re-tagged
for img in $(docker images -q); do
docker tag "$img" "${earthly_cached_docker_image_prefix}${img}"
done
docker images -a --format '{{.Repository}}:{{.Tag}}' | grep -v "^$earthly_cached_docker_image_prefix" | xargs --no-run-if-empty docker rmi
fi

load_file_images
load_registry_images

# delete cached images (which weren't re-tagged via the pull)
if [ "$EARTHLY_DOCKERD_CACHE_DATA" = "true" ]; then
docker images -f reference=$earthly_cached_docker_image_prefix'*' --format '{{.Repository}}:{{.Tag}}' | xargs --no-run-if-empty docker rmi
docker images -f "dangling=true" -q | xargs --no-run-if-empty docker rmi
fi

if [ "$EARTHLY_START_COMPOSE" = "true" ]; then
# shellcheck disable=SC2086
docker_compose_cmd up -d $EARTHLY_COMPOSE_SERVICES
Expand All @@ -117,7 +135,11 @@ execute() {
}

start_dockerd() {
data_root=$(TMPDIR="$EARTHLY_DOCKERD_DATA_ROOT/" mktemp -d)
if [ "$EARTHLY_DOCKERD_CACHE_DATA" = "true" ]; then
data_root="$EARTHLY_DOCKERD_DATA_ROOT"
else
data_root=$(TMPDIR="$EARTHLY_DOCKERD_DATA_ROOT/" mktemp -d)
fi
echo "Starting dockerd with data root $data_root"

if uname -a | grep microsoft-standard-WSL >/dev/null; then
Expand Down Expand Up @@ -230,6 +252,9 @@ stop_dockerd() {
}

wipe_data_root() {
if [ "$EARTHLY_DOCKERD_CACHE_DATA" = "true" ]; then
return 0
fi
if ! rm -rf "$1" 2>/dev/null >&2 && [ -n "$(ls -A "$1")" ]; then
# We have some issues about failing to delete files.
# If we fail, list the processes keeping it open for results.
Expand Down Expand Up @@ -262,12 +287,29 @@ load_file_images() {
fi
}

get_current_time_ns() {
# Note: busybox does not support date +%s%N; instead we use stat to fetch nanosecond
f="$(mktemp)"
current_time="$(stat -t "$f" | awk '{print $13}')"
current_time_ns="$(stat "$f" | grep Modify | awk '{print $3}' | awk -F . '{print $2}' | grep -o '[1-9].*')"
rm "$f"

# Note that the current_time_ns must not start with a 0 (which is why there is a grep [1-9]); however
# there's an edge case where current_time_ns="00000000", which would turn into "", so we need to set it back to "0"
if [ "$current_time_ns" = "" ]; then current_time_ns=0; fi

test -n "$current_time" || (echo "current_time is empty" && exit 1)
test -n "$current_time_ns" || (echo "current_time_ns is empty" && exit 1)
current_time_combined="$((current_time*1000000000+current_time_ns))"
echo "$current_time_combined"
}

load_registry_images() {
EARTHLY_DOCKER_LOAD_REGISTRY=${EARTHLY_DOCKER_LOAD_REGISTRY:-''}
if [ -n "$EARTHLY_DOCKER_LOAD_REGISTRY" ]; then
echo "Loading images from BuildKit via embedded registry..."

start_time=$(date +%s%N | cut -b1-13)
start_time="$(get_current_time_ns)"
bg_processes="" # Initialize the background processes variable

for img in $EARTHLY_DOCKER_LOAD_REGISTRY; do
Expand Down Expand Up @@ -299,10 +341,10 @@ load_registry_images() {
exit 1
}
done
end_time=$(date +%s%N | cut -b1-13)

elapsed=$((end_time-start_time))
echo "Loading images done in ${elapsed} ms"
end_time="$(get_current_time_ns)"
elapsed_ns="$((end_time - start_time))"
elapsed_ms="$((elapsed_ns/1000000))"
echo "Loading images done in ${elapsed_ms} ms"
fi
}

Expand Down
5 changes: 3 additions & 2 deletions buildkitd/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ if [ ! -e "/sbin/$IP_TABLES" ]; then
fi
ln -sf "/sbin/$IP_TABLES" /sbin/iptables

# clear any leftovers in the dind dir
rm -rf "$EARTHLY_TMP_DIR/dind"
# clear any leftovers (that aren't explicitly cached) in the dind dir
find /tmp/earthly/dind/ -maxdepth 1 -mindepth 1 | grep -v cache_ | xargs -r rm -rf

mkdir -p "$EARTHLY_TMP_DIR/dind"

# setup git credentials and config
Expand Down
7 changes: 7 additions & 0 deletions earthfile2llb/interpreter.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ func (i *Interpreter) handleRun(ctx context.Context, cmd spec.Command) error {
i.withDocker.NoCache = opts.NoCache
i.withDocker.Interactive = opts.Interactive
i.withDocker.interactiveKeep = opts.InteractiveKeep

// TODO: Could this be allowed in the future, if dynamic build args
// are expanded ahead of time?
allowParallel := true
Expand Down Expand Up @@ -1806,6 +1807,12 @@ func (i *Interpreter) handleWithDocker(ctx context.Context, cmd spec.Command) er
PassArgs: opts.PassArgs,
})
}
if opts.CacheID != "" {
if !i.converter.ftrs.PassArgs {
return i.errorf(cmd.SourceLocation, "the WITH DOCKER --cache-id flag must be enabled with the VERSION --docker-cache feature flag.")
}
i.withDocker.CacheID = opts.CacheID
}
return nil
}

Expand Down
3 changes: 3 additions & 0 deletions earthfile2llb/with_docker_run_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type WithDockerOpt struct {
ComposeServices []string
TryCatchSaveArtifacts []debuggercommon.SaveFilesSettings
extraRunOpts []llb.RunOption
CacheID string
}

type withDockerRunBase struct {
Expand Down Expand Up @@ -182,9 +183,11 @@ func (w *withDockerRunBase) getComposeConfig(ctx context.Context, opt WithDocker
}

func makeWithDockerdWrapFun(dindID string, tarPaths []string, imgsWithDigests []string, opt WithDockerOpt) shellWrapFun {
cacheDataRoot := strings.HasPrefix(dindID, "cache_")
dockerRoot := path.Join("/var/earthly/dind", dindID)
params := []string{
fmt.Sprintf("EARTHLY_DOCKERD_DATA_ROOT=\"%s\"", dockerRoot),
fmt.Sprintf("EARTHLY_DOCKERD_CACHE_DATA=\"%v\"", cacheDataRoot),
fmt.Sprintf("EARTHLY_DOCKER_LOAD_FILES=\"%s\"", strings.Join(tarPaths, " ")),
// This is not actually used, but it is needed in order to bust the cache
// in case an image is updated.
Expand Down
12 changes: 9 additions & 3 deletions earthfile2llb/with_docker_run_reg.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,15 @@ func (w *withDockerRunRegistry) Run(ctx context.Context, args []string, opt With
dockerdWrapperPath, pllb.Scratch(), llb.HostBind(), llb.SourcePath(dockerdWrapperPath)))
crOpts.extraRunOpts = append(crOpts.extraRunOpts, opt.extraRunOpts...)

dindID, err := w.c.mts.Final.TargetInput().Hash()
if err != nil {
return errors.Wrap(err, "make dind ID")
var dindID string
if opt.CacheID == "" {
dindID, err = w.c.mts.Final.TargetInput().Hash()
if err != nil {
return errors.Wrap(err, "make dind ID")
}
} else {
// Note that the "cache_" prefix here is used to prevent auto-cleanup
dindID = "cache_" + opt.CacheID
}
// We will pass along the variable EARTHLY_DOCKER_LOAD_REGISTRY via a secret
// to prevent busting the cache, as the intermediate image names are
Expand Down
4 changes: 4 additions & 0 deletions features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type Features struct {
// Never enabled by default
NoUseRegistryForWithDocker bool `long:"no-use-registry-for-with-docker" description:"disable use-registry-for-with-docker"` // escape hatch for disabling WITH DOCKER registry, e.g. used by eine-based tests
EarthlyCIRunnerArg bool `long:"earthly-ci-runner-arg" description:"includes EARTHLY_CI_RUNNER ARG"` // earthly CI was discontinued, no reason to enable this by default

// VERSION 0.5
ExecAfterParallel bool `long:"exec-after-parallel" enabled_in_version:"0.5" description:"force execution after parallel conversion"`
ParallelLoad bool `long:"parallel-load" enabled_in_version:"0.5" description:"perform parallel loading of images into WITH DOCKER"`
Expand Down Expand Up @@ -65,6 +66,7 @@ type Features struct {
GitRefs bool `long:"git-refs" enabled_in_version:"0.8" description:"includes EARTHLY_GIT_REFS ARG"`
UseVisitedUpfrontHashCollection bool `long:"use-visited-upfront-hash-collection" enabled_in_version:"0.8" description:"Uses a new target visitor implementation that computes upfront the hash of the visited targets and adds support for running all targets with the same name but different args in parallel"`
UseFunctionKeyword bool `long:"use-function-keyword" enabled_in_version:"0.8" description:"Use the FUNCTION key word instead of COMMAND"`

// unreleased
TryFinally bool `long:"try" description:"allow the use of the TRY/FINALLY commands"`
WildcardBuilds bool `long:"wildcard-builds" description:"allow for the expansion of wildcard (glob) paths for BUILD commands"`
Expand All @@ -74,7 +76,9 @@ type Features struct {
WildcardCopy bool `long:"wildcard-copy" description:"allow for the expansion of wildcard (glob) paths for COPY commands"`
RawOutput bool `long:"raw-output" description:"allow for --raw-output on RUN commands"`
GitAuthorEmailNameArgs bool `long:"git-author-email-name-args" description:"includes EARTHLY_GIT_AUTHOR_EMAIL and EARTHLY_GIT_AUTHOR_NAME builtin ARGs"`
DockerCache bool `long:"docker-cache" description:"enable the WITH DOCKER --cache-id option"`

// version numbers
Major int
Minor int
}
Expand Down
1 change: 1 addition & 0 deletions tests/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
ga-no-qemu-slow:
BUILD +server
BUILD --pass-args ./with-docker+all
BUILD --pass-args ./with-docker-cache+test
# this has been moved to a seperate target until we get the flakey "tell me who you are" bug
# fixed; see https://github.com/earthly/earthly/issues/2567
#BUILD --pass-args ./git-metadata+test
Expand Down Expand Up @@ -1885,6 +1886,6 @@
fi
fi
" >/tmp/earthly-script
RUN --privileged \

Check failure on line 1889 in tests/Earthfile

View workflow job for this annotation

GitHub Actions / podman-tests-no-qemu-group4 / +testing-gha-ubuntu-latest-podman

Error

The command RUN --privileged /bin/sh /tmp/earthly-script did not complete successfully. Exit code 1
--mount=type=tmpfs,target=/tmp/earthly-tmpfs \
/bin/sh /tmp/earthly-script
36 changes: 36 additions & 0 deletions tests/with-docker-cache/Earthfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
VERSION --docker-cache 0.8

img:
FROM alpine:3.18
RUN dd if=/dev/urandom of=large-file0 bs=16M count=2 # adjust as needed to test performance
SAVE IMAGE img

debug-dind:
FROM earthly/dind:alpine-3.19-docker-25.0.5-r0
RUN mkdir /etc/docker && echo '{"debug": true}' > /etc/docker/daemon.json

wd:
FROM +debug-dind
ARG --required CHECK_CACHED_TAG_EXISTS
ARG --required CACHE_BUSTER
RUN echo "CACHE_BUSTER=$CACHE_BUSTER"
WITH DOCKER --cache-id=yupyup --load myimg=+img
RUN \
if [ "$CHECK_CACHED_TAG_EXISTS" = "true" ]; then \
echo "performing check due to CHECK_CACHED_TAG_EXISTS=$CHECK_CACHED_TAG_EXISTS"; \
grep 'Calling POST.*earthly_cached_[0-9a-f]\+' /var/log/docker.log || (echo "no reference to earthly_cached_[0-9a-f]+; which indicates no docker image data exists in the cache" && exit 1); \
echo "found instance of earthly_cached_ tag; check passed"; \
else \
echo "skipping check due to CHECK_CACHED_TAG_EXISTS=$CHECK_CACHED_TAG_EXISTS"; \
fi
END

test:
FROM alpine:3.18
RUN --no-cache date +%s > /cache-buster
WAIT
BUILD +wd --CHECK_CACHED_TAG_EXISTS=false --CACHE_BUSTER="$(cat /cache-buster)"
END
WAIT
BUILD +wd --CHECK_CACHED_TAG_EXISTS=true --CACHE_BUSTER="$(cat /cache-buster)"
END
Loading