From 15f2ab512eb02effebb0cbf8bb30dfe248919565 Mon Sep 17 00:00:00 2001 From: Jon Edvald Date: Tue, 19 Jan 2021 21:30:51 +0100 Subject: [PATCH] feat(k8s): add cluster-buildkit buildMode This adds a new `cluster-buildkit` buildMode to the kubernetes provider, which uses the [buildkit](https://github.com/moby/buildkit) project. It also works differently to the existing `cluster-docker` and `kaniko` build modes, in that it doesn't require any cluster-wide components, such as the ever-frustrating NFS provisioner. When configured, a `garden-buildkit` Deployment with an ephemeral volume is deployed on-demand to _the project namespace_. This greatly simplifies administration and operation, while still offering a performance benefit compared to `kaniko`, and it still uses the deployment registry (in-cluster or otherwise) as a shared cache between users/namespaces. It can also be configured to run in "rootless" mode, i.e. run without elevated privileges. This has some caveats (see the buildkit docs for details) but offers a much improved security profile compared to the privileged Docker and Kaniko builders. --- .circleci/config.yml | 4 +- core/package.json | 2 +- core/src/plugins/container/build.ts | 30 +- core/src/plugins/kubernetes/api.ts | 28 ++ .../commands/cleanup-cluster-registry.ts | 4 +- .../plugins/kubernetes/commands/pull-image.ts | 128 ++---- core/src/plugins/kubernetes/config.ts | 55 ++- core/src/plugins/kubernetes/constants.ts | 6 +- .../kubernetes/container/build/build.ts | 17 +- .../kubernetes/container/build/buildkit.ts | 435 ++++++++++++++++++ .../kubernetes/container/build/common.ts | 120 ++++- .../kubernetes/container/build/kaniko.ts | 82 +--- .../plugins/kubernetes/hot-reload/helpers.ts | 62 ++- .../kubernetes/hot-reload/hot-reload.ts | 1 - core/src/plugins/kubernetes/init.ts | 17 +- core/src/plugins/kubernetes/kubernetes.ts | 21 +- core/src/plugins/kubernetes/run.ts | 7 +- core/src/plugins/kubernetes/status/status.ts | 4 +- core/src/plugins/kubernetes/util.ts | 10 +- .../data/test-projects/container/garden.yml | 15 + core/test/e2e/garden.yml | 2 +- .../plugins/kubernetes/commands/pull-image.ts | 71 ++- .../plugins/kubernetes/container/.gitignore | 1 + .../kubernetes/container/{ => build}/build.ts | 233 +++++++++- .../kubernetes/container/build/buildkit.ts | 161 +++++++ .../plugins/kubernetes/container/.gitignore | 1 + .../container/{build.ts => build/kaniko.ts} | 2 +- .../unit/src/plugins/kubernetes/hot-reload.ts | 5 +- core/test/unit/src/plugins/kubernetes/init.ts | 5 +- docs/guides/cloud-provider-setup.md | 4 +- docs/guides/in-cluster-building.md | 333 ++++++++++---- examples/gke/README.md | 13 +- examples/gke/garden.yml | 29 +- examples/hot-reload/garden.yml | 2 +- examples/kubernetes-secrets/garden.yml | 2 +- examples/project-variables/garden.yml | 2 +- examples/vote-helm/garden.yml | 2 +- examples/vote/garden.yml | 4 +- images/README.md | 2 + images/buildkit/Dockerfile | 20 + images/buildkit/garden.yml | 21 + images/k8s-util/Dockerfile | 14 + images/k8s-util/garden.yml | 6 + images/k8s-util/rsync-server.sh | 24 + images/support-images.garden.yml | 6 + 45 files changed, 1626 insertions(+), 387 deletions(-) create mode 100644 core/src/plugins/kubernetes/container/build/buildkit.ts create mode 100644 core/test/integ/src/plugins/kubernetes/container/.gitignore rename core/test/integ/src/plugins/kubernetes/container/{ => build}/build.ts (66%) create mode 100644 core/test/integ/src/plugins/kubernetes/container/build/buildkit.ts create mode 100644 core/test/unit/src/plugins/kubernetes/container/.gitignore rename core/test/unit/src/plugins/kubernetes/container/{build.ts => build/kaniko.ts} (97%) create mode 100644 images/buildkit/Dockerfile create mode 100644 images/buildkit/garden.yml create mode 100644 images/k8s-util/Dockerfile create mode 100644 images/k8s-util/garden.yml create mode 100755 images/k8s-util/rsync-server.sh create mode 100644 images/support-images.garden.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index ab299674a8..daed1b21e3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -595,6 +595,7 @@ jobs: machine: image: 'ubuntu-1604:202004-01' docker_layer_caching: true + resource_class: large parameters: minikubeVersion: description: The Minikube version to use @@ -639,11 +640,12 @@ jobs: --extra-config=kubeadm.ignore-preflight-errors=RSRC_INSUFFICIENT_CORES \ --kubernetes-version=$K8S_VERSION \ --vm-driver=none \ - --cpus 2 \ + --cpus 3 \ --memory 4096 sudo chown -R circleci:circleci /home/circleci/.kube /home/circleci/.minikube /etc/kubernetes # Work around annoying issue on recent minikubes where namespaces take a long time to generate default service account kubectl create namespace container-default + sleep 10 - run: name: Integ tests # Note: We skip tests that only work for remote environments diff --git a/core/package.json b/core/package.json index 1f3ecc3ff5..aecaaf6045 100644 --- a/core/package.json +++ b/core/package.json @@ -241,7 +241,7 @@ "lint": "tslint -p .", "migration:generate": "typeorm migration:generate --config ormconfig.js -n", "integ": "mocha --opts test/mocha.integ.opts", - "integ-kind": "GARDEN_INTEG_TEST_MODE=local GARDEN_SKIP_TESTS=\"cluster-docker kaniko remote-only\" mocha --opts test/mocha.integ.opts", + "integ-kind": "GARDEN_INTEG_TEST_MODE=local GARDEN_SKIP_TESTS=\"cluster-docker cluster-buildkit cluster-buildkit-rootless kaniko remote-only\" mocha --opts test/mocha.integ.opts", "integ-local": "GARDEN_INTEG_TEST_MODE=local GARDEN_SKIP_TESTS=remote-only mocha --opts test/mocha.integ.opts", "integ-remote": "GARDEN_INTEG_TEST_MODE=remote GARDEN_SKIP_TESTS=local-only mocha --opts test/mocha.integ.opts", "e2e": "cd test/e2e && ../../../bin/garden test", diff --git a/core/src/plugins/container/build.ts b/core/src/plugins/container/build.ts index d6244841f1..477bf129c0 100644 --- a/core/src/plugins/container/build.ts +++ b/core/src/plugins/container/build.ts @@ -82,27 +82,33 @@ export async function buildContainerModule({ ctx, module, log }: BuildModulePara export function getDockerBuildFlags(module: ContainerModule) { const args: string[] = [] + for (const arg of getDockerBuildArgs(module)) { + args.push("--build-arg", arg) + } + + if (module.spec.build.targetImage) { + args.push("--target", module.spec.build.targetImage) + } + + args.push(...(module.spec.extraFlags || [])) + + return args +} + +export function getDockerBuildArgs(module: ContainerModule) { const buildArgs: PrimitiveMap = { GARDEN_MODULE_VERSION: module.version.versionString, ...module.spec.buildArgs, } - for (const [key, value] of Object.entries(buildArgs)) { + return Object.entries(buildArgs).map(([key, value]) => { // 0 is falsy if (value || value === 0) { - args.push("--build-arg", `${key}=${value}`) + return `${key}=${value}` } else { // If the value of a build-arg is null, Docker pulls it from // the environment: https://docs.docker.com/engine/reference/commandline/build/ - args.push("--build-arg", `${key}`) + return key } - } - - if (module.spec.build.targetImage) { - args.push("--target", module.spec.build.targetImage) - } - - args.push(...(module.spec.extraFlags || [])) - - return args + }) } diff --git a/core/src/plugins/kubernetes/api.ts b/core/src/plugins/kubernetes/api.ts index 2e1e147571..17de3809e6 100644 --- a/core/src/plugins/kubernetes/api.ts +++ b/core/src/plugins/kubernetes/api.ts @@ -28,6 +28,7 @@ import { V1Status, Exec, Attach, + V1Deployment, } from "@kubernetes/client-node" import AsyncLock = require("async-lock") import request = require("request-promise") @@ -106,6 +107,14 @@ const crudMap = { replace: "replaceNamespacedSecret", delete: "deleteNamespacedSecret", }, + Deployment: { + cls: new V1Deployment(), + group: "apps", + read: "readNamespacedDeployment", + create: "createNamespacedDeployment", + replace: "replaceNamespacedDeployment", + delete: "deleteNamespacedDeployment", + }, } type CrudMap = typeof crudMap @@ -323,6 +332,9 @@ export class KubeApi { } } + /** + * Given a manifest, attempt to read the matching resource from the cluster. + */ async readBySpec({ log, namespace, manifest }: { log: LogEntry; namespace: string; manifest: KubernetesResource }) { log.silly(`Fetching Kubernetes resource ${manifest.apiVersion}/${manifest.kind}/${manifest.metadata.name}`) @@ -332,6 +344,22 @@ export class KubeApi { return res.body } + /** + * Same as readBySpec() but returns null if the resource is missing. + */ + async readOrNull(params: { log: LogEntry; namespace: string; manifest: KubernetesResource }) { + try { + const resource = await this.readBySpec(params) + return resource + } catch (err) { + if (err.statusCode === 404) { + return null + } else { + throw err + } + } + } + async listResources({ log, apiVersion, diff --git a/core/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts b/core/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts index 302b3bc584..06609b2798 100644 --- a/core/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts +++ b/core/src/plugins/kubernetes/commands/cleanup-cluster-registry.ts @@ -25,7 +25,7 @@ import { apply } from "../kubectl" import { waitForResources } from "../status/status" import { execInWorkload } from "../container/exec" import { dedent, deline } from "../../../util/string" -import { buildSyncDeploymentName } from "../container/build/common" +import { sharedBuildSyncDeploymentName } from "../container/build/common" import { getDeploymentPod } from "../util" import { getSystemNamespace } from "../namespace" import { PluginContext } from "../../../plugin-context" @@ -427,7 +427,7 @@ async function cleanupBuildSyncVolume({ status: "active", }) - const pod = await getDeploymentPod({ api, deploymentName: buildSyncDeploymentName, namespace: systemNamespace }) + const pod = await getDeploymentPod({ api, deploymentName: sharedBuildSyncDeploymentName, namespace: systemNamespace }) const runner = new PodRunner({ api, diff --git a/core/src/plugins/kubernetes/commands/pull-image.ts b/core/src/plugins/kubernetes/commands/pull-image.ts index bc9c8463dd..04aa2055ac 100644 --- a/core/src/plugins/kubernetes/commands/pull-image.ts +++ b/core/src/plugins/kubernetes/commands/pull-image.ts @@ -8,7 +8,7 @@ import fs from "fs" import tmp from "tmp-promise" -import { KubernetesPluginContext, KubernetesProvider } from "../config" +import { KubernetesPluginContext } from "../config" import { PluginError, ParameterError } from "../../../exceptions" import { PluginCommand } from "../../../types/plugin/command" import chalk from "chalk" @@ -20,12 +20,12 @@ import { LogEntry } from "../../../logger/log-entry" import { containerHelpers } from "../../container/helpers" import { RuntimeError } from "../../../exceptions" import { PodRunner } from "../run" -import { inClusterRegistryHostname } from "../constants" +import { inClusterRegistryHostname, gardenUtilDaemonDeploymentName } from "../constants" import { getAppNamespace, getSystemNamespace } from "../namespace" -import { makePodName, getSkopeoContainer, getDockerAuthVolume } from "../util" +import { getDeploymentPod } from "../util" import { getRegistryPortForward } from "../container/util" import { PluginContext } from "../../../plugin-context" -import { KubernetesPod } from "../types" +import { buildkitDeploymentName } from "../container/build/buildkit" export const pullImage: PluginCommand = { name: "pull-image", @@ -140,11 +140,21 @@ async function pullFromExternalRegistry( localId: string ) { const api = await KubeApi.factory(log, ctx, ctx.provider) - const namespace = await getAppNamespace(ctx, log, ctx.provider) - const podName = makePodName("skopeo", namespace, module.name) - const systemNamespace = await getSystemNamespace(ctx, ctx.provider, log) + const buildMode = ctx.provider.config.buildMode + + let namespace: string + let deploymentName: string + + if (buildMode === "cluster-buildkit") { + namespace = await getAppNamespace(ctx, log, ctx.provider) + deploymentName = buildkitDeploymentName + } else { + namespace = await getSystemNamespace(ctx, ctx.provider, log) + deploymentName = gardenUtilDaemonDeploymentName + } + const imageId = containerHelpers.getDeploymentImageId(module, module.version, ctx.provider.config.deploymentRegistry) - const tarName = `${module.name}-${module.version.versionString}` + const tarName = `/tmp/${module.name}-${module.version.versionString}` const skopeoCommand = [ "skopeo", @@ -155,18 +165,29 @@ async function pullFromExternalRegistry( `docker-archive:${tarName}`, ] - const runner = await launchSkopeoContainer({ + const pod = await getDeploymentPod({ + api, + deploymentName, + namespace, + }) + const runner = new PodRunner({ + api, ctx, provider: ctx.provider, - api, - podName, - systemNamespace, + namespace, + pod, + }) + + await runner.exec({ + command: ["sh", "-c", skopeoCommand.join(" ")], + containerName: "util", log, + timeoutSec: 60 * 1000 * 5, // 5 minutes, }) try { - await pullImageFromRegistry(runner, skopeoCommand.join(" "), log) await importImage({ module, runner, tarName, imageId, log, ctx }) + await containerHelpers.dockerCli({ cwd: module.buildPath, args: ["tag", imageId, localId], log, ctx }) await containerHelpers.dockerCli({ cwd: module.buildPath, args: ["rmi", imageId], log, ctx }) } catch (err) { @@ -175,7 +196,15 @@ async function pullFromExternalRegistry( imageId, }) } finally { - await runner.stop() + try { + await runner.exec({ + command: ["rm", "-rf", tarName], + containerName: "util", + log, + }) + } catch (err) { + log.warn("Failed cleaning up temporary file: " + err.message) + } } } @@ -194,14 +223,14 @@ async function importImage({ log: LogEntry ctx: PluginContext }) { - const sourcePath = `/${tarName}` - const getOutputCommand = ["cat", sourcePath] + const getOutputCommand = ["cat", tarName] + await tmp.withFile(async ({ path }) => { let writeStream = fs.createWriteStream(path) await runner.exec({ command: getOutputCommand, - containerName: "skopeo", + containerName: "util", log, stdout: writeStream, }) @@ -210,68 +239,3 @@ async function importImage({ await containerHelpers.dockerCli({ cwd: module.buildPath, args, log, ctx }) }) } - -async function pullImageFromRegistry(runner: PodRunner, command: string, log: LogEntry) { - // TODO: make this timeout configurable - await runner.exec({ - command: ["sh", "-c", command], - containerName: "skopeo", - log, - timeoutSec: 60 * 1000 * 5, // 5 minutes, - }) -} - -async function launchSkopeoContainer({ - ctx, - provider, - api, - podName, - systemNamespace, - log, -}: { - ctx: PluginContext - provider: KubernetesProvider - api: KubeApi - podName: string - systemNamespace: string - log: LogEntry -}): Promise { - const sleepCommand = "sleep 86400" - - const pod: KubernetesPod = { - apiVersion: "v1", - kind: "Pod", - metadata: { - name: podName, - namespace: systemNamespace, - }, - spec: { - shareProcessNamespace: true, - volumes: [ - // Mount the docker auth secret, so skopeo can inspect private registries. - getDockerAuthVolume(), - ], - containers: [getSkopeoContainer(sleepCommand)], - }, - } - - const runner = new PodRunner({ - ctx, - api, - pod, - provider, - namespace: systemNamespace, - }) - - const { status } = await runner.start({ - log, - }) - - if (status.state !== "ready") { - throw new RuntimeError("Failed to start skopeo container", { - status, - }) - } - - return runner -} diff --git a/core/src/plugins/kubernetes/config.ts b/core/src/plugins/kubernetes/config.ts index cb913ed8e4..2019d8e6a1 100644 --- a/core/src/plugins/kubernetes/config.ts +++ b/core/src/plugins/kubernetes/config.ts @@ -84,13 +84,16 @@ interface KubernetesStorage { sync: KubernetesStorageSpec } -export type ContainerBuildMode = "local-docker" | "cluster-docker" | "kaniko" +export type ContainerBuildMode = "local-docker" | "cluster-docker" | "kaniko" | "cluster-buildkit" export type DefaultDeploymentStrategy = "rolling" export type DeploymentStrategy = DefaultDeploymentStrategy | "blue-green" export interface KubernetesConfig extends GenericProviderConfig { buildMode: ContainerBuildMode + clusterBuildkit?: { + rootless?: boolean + } clusterDocker?: { enableBuildKit?: boolean } @@ -291,27 +294,30 @@ export const kubernetesConfigBase = () => providerConfigBaseSchema().keys({ buildMode: joi .string() - .allow("local-docker", "cluster-docker", "kaniko") + .allow("local-docker", "cluster-docker", "kaniko", "cluster-buildkit") .default("local-docker") .description( dedent` - Choose the mechanism for building container images before deploying. By default it uses the local Docker - daemon, but you can set it to \`cluster-docker\` or \`kaniko\` to sync files to a remote Docker daemon, - installed in the cluster, and build container images there. This removes the need to run Docker or - Kubernetes locally, and allows you to share layer and image caches between multiple developers, as well - as between your development and CI workflows. - - This is currently experimental and sometimes not desired, so it's not enabled by default. For example when using - the \`local-kubernetes\` provider with Docker for Desktop and Minikube, we directly use the in-cluster docker - daemon when building. You might also be deploying to a remote cluster that isn't intended as a development - environment, so you'd want your builds to happen elsewhere. - - Functionally, both \`cluster-docker\` and \`kaniko\` do the same thing, but use different underlying mechanisms - to build. The former uses a normal Docker daemon in the cluster. Because this has to run in privileged mode, - this is less secure than Kaniko, but in turn it is generally faster. See the - [Kaniko docs](https://github.com/GoogleContainerTools/kaniko) for more information on Kaniko. - ` + Choose the mechanism for building container images before deploying. By default your local Docker daemon is used, but you can set it to \`cluster-buildkit\`, \`cluster-docker\` or \`kaniko\` to sync files to the cluster, and build container images there. This removes the need to run Docker locally, and allows you to share layer and image caches between multiple developers, as well as between your development and CI workflows. + + For more details on all the different options and what makes sense to use for your setup, please check out the [in-cluster building guide](https://docs.garden.io/guides/in-cluster-building). + ` ), + clusterBuildkit: joi + .object() + .keys({ + rootless: joi + .boolean() + .default(false) + .description( + dedent` + Enable rootless mode for the cluster-buildkit daemon, which runs the daemon with decreased privileges. + Please see [the buildkit docs](https://github.com/moby/buildkit/blob/master/docs/rootless.md) for caveats when using this mode. + ` + ), + }) + .default(() => {}) + .description("Configuration options for the `cluster-buildkit` build mode."), clusterDocker: joi .object() .keys({ @@ -385,14 +391,13 @@ export const kubernetesConfigBase = () => .object() .keys({ builder: resourceSchema(defaultResources.builder).description(dedent` - Resource requests and limits for the in-cluster builder. + Resource requests and limits for the in-cluster builder. It's important to consider which build mode you're using when configuring this. + + When \`buildMode\` is \`kaniko\`, this refers to _each Kaniko pod_, i.e. each individual build, so you'll want to consider the requirements for your individual image builds, with your most expensive/heavy images in mind. - When \`buildMode\` is \`cluster-docker\`, this refers to the Docker Daemon that is installed and run - cluster-wide. This is shared across all users and builds, so it should be resourced accordingly, factoring - in how many concurrent builds you expect and how heavy your builds tend to be. + When \`buildMode\` is \`cluster-buildkit\`, this applies to the BuildKit deployment created in _each project namespace_. So think of this as the resource spec for each individual user or project namespace. - When \`buildMode\` is \`kaniko\`, this refers to _each instance_ of Kaniko, so you'd generally use lower - limits/requests, but you should evaluate based on your needs. + When \`buildMode\` is \`cluster-docker\`, this applies to the single Docker Daemon that is installed and run cluster-wide. This is shared across all users and builds in the cluster, so it should be resourced accordingly, factoring in how many concurrent builds you expect and how heavy your builds tend to be. `), registry: resourceSchema(defaultResources.registry).description(dedent` Resource requests and limits for the in-cluster image registry. Built images are pushed to this registry, @@ -550,7 +555,7 @@ export const configSchema = () => .keys({ name: joiProviderName("kubernetes"), context: k8sContextSchema().required(), - deploymentRegistry: containerRegistryConfigSchema(), + deploymentRegistry: containerRegistryConfigSchema().allow(null), ingressClass: joi.string().description(dedent` The ingress class to use on configured Ingresses (via the \`kubernetes.io/ingress.class\` annotation) when deploying \`container\` services. Use this if you have multiple ingress controllers in your cluster. diff --git a/core/src/plugins/kubernetes/constants.ts b/core/src/plugins/kubernetes/constants.ts index 313f09050d..7ac28ae294 100644 --- a/core/src/plugins/kubernetes/constants.ts +++ b/core/src/plugins/kubernetes/constants.ts @@ -6,7 +6,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -export const RSYNC_PORT = 873 +export const rsyncPort = 873 +export const rsyncPortName = "garden-rsync" +export const buildSyncVolumeName = `garden-sync` + export const CLUSTER_REGISTRY_PORT = 5000 export const CLUSTER_REGISTRY_DEPLOYMENT_NAME = "garden-docker-registry" export const MAX_CONFIGMAP_DATA_SIZE = 1024 * 1024 // max ConfigMap data size is 1MB @@ -21,5 +24,6 @@ export const inClusterRegistryHostname = "127.0.0.1:5000" export const gardenUtilDaemonDeploymentName = "garden-util-daemon" export const dockerDaemonDeploymentName = "garden-docker-daemon" +export const k8sUtilImageName = "gardendev/k8s-util:0.3.5" export const dockerDaemonContainerName = "docker-daemon" export const skopeoDaemonContainerName = "util" diff --git a/core/src/plugins/kubernetes/container/build/build.ts b/core/src/plugins/kubernetes/container/build/build.ts index 963ee852d6..3ad83cead4 100644 --- a/core/src/plugins/kubernetes/container/build/build.ts +++ b/core/src/plugins/kubernetes/container/build/build.ts @@ -12,7 +12,7 @@ import { containerHelpers } from "../../../container/helpers" import { getDockerBuildFlags } from "../../../container/build" import { GetBuildStatusParams, BuildStatus } from "../../../../types/plugin/module/getBuildStatus" import { BuildModuleParams, BuildResult } from "../../../../types/plugin/module/build" -import { inClusterRegistryHostname, dockerDaemonContainerName } from "../../constants" +import { inClusterRegistryHostname, dockerDaemonContainerName, rsyncPort } from "../../constants" import { posix } from "path" import { KubeApi } from "../../api" import { KubernetesProvider, ContainerBuildMode } from "../../config" @@ -24,7 +24,8 @@ import chalk = require("chalk") import { getKanikoBuildStatus, runKaniko, kanikoBuildFailed, getKanikoFlags } from "./kaniko" import { getClusterDockerBuildStatus, getDockerDaemonPodRunner } from "./cluster-docker" import { getLocalBuildStatus, localBuild } from "./local" -import { BuildStatusHandler, BuildHandler, syncToSharedBuildSync } from "./common" +import { BuildStatusHandler, BuildHandler, syncToBuildSync, sharedBuildSyncDeploymentName } from "./common" +import { buildkitBuildHandler, getBuildkitBuildStatus } from "./buildkit" export async function k8sGetContainerBuildStatus(params: GetBuildStatusParams): Promise { const { ctx, module } = params @@ -56,8 +57,7 @@ export async function k8sBuildContainer(params: BuildModuleParams { ) const dockerfile = module.spec.dockerfile || "Dockerfile" - const { contextPath } = await syncToSharedBuildSync({ ...params, api, systemNamespace }) + const { contextPath } = await syncToBuildSync({ + ...params, + api, + namespace: systemNamespace, + deploymentName: sharedBuildSyncDeploymentName, + rsyncPort, + }) log.setState(`Building image ${localId}...`) @@ -191,6 +197,7 @@ const remoteBuild: BuildHandler = async (params) => { const buildHandlers: { [mode in ContainerBuildMode]: BuildHandler } = { "local-docker": localBuild, + "cluster-buildkit": buildkitBuildHandler, "cluster-docker": remoteBuild, "kaniko": remoteBuild, } diff --git a/core/src/plugins/kubernetes/container/build/buildkit.ts b/core/src/plugins/kubernetes/container/build/buildkit.ts new file mode 100644 index 0000000000..5415648c13 --- /dev/null +++ b/core/src/plugins/kubernetes/container/build/buildkit.ts @@ -0,0 +1,435 @@ +/* + * Copyright (C) 2018-2020 Garden Technologies, Inc. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +import AsyncLock from "async-lock" +import chalk from "chalk" +import split2 = require("split2") +import { cloneDeep } from "lodash" +import { + buildSyncVolumeName, + dockerAuthSecretKey, + inClusterRegistryHostname, + k8sUtilImageName, + rsyncPortName, +} from "../../constants" +import { KubeApi } from "../../api" +import { KubernetesDeployment } from "../../types" +import { LogEntry } from "../../../../logger/log-entry" +import { waitForResources, compareDeployedResources } from "../../status/status" +import { KubernetesProvider, KubernetesPluginContext } from "../../config" +import { PluginContext } from "../../../../plugin-context" +import { prepareDockerAuth, getRegistryHostname } from "../../init" +import { BuildStatusHandler, skopeoBuildStatus, BuildHandler, syncToBuildSync, getSocatContainer } from "./common" +import { getNamespace } from "../../namespace" +import { containerHelpers } from "../../../container/helpers" +import { LogLevel } from "../../../../logger/log-node" +import { renderOutputStream, sleep } from "../../../../util/util" +import { ContainerModule } from "../../../container/config" +import { getDockerBuildArgs } from "../../../container/build" +import { getDeploymentPod, millicpuToString, megabytesToString } from "../../util" +import { PodRunner } from "../../run" +import { V1Container } from "@kubernetes/client-node" + +export const buildkitImageName = "gardendev/buildkit:v0.8.1-4" +export const buildkitDeploymentName = "garden-buildkit" +export const buildkitAuthSecretName = "garden-docker-auth" +const buildkitContainerName = "buildkitd" +const utilRsyncPort = 8730 + +const deployLock = new AsyncLock() + +export const getBuildkitBuildStatus: BuildStatusHandler = async (params) => { + const { ctx, module, log } = params + const k8sCtx = ctx as KubernetesPluginContext + const provider = k8sCtx.provider + + const api = await KubeApi.factory(log, ctx, provider) + const namespace = await getNamespace({ log, ctx, provider }) + + await ensureBuildkit({ + ctx, + provider, + log, + api, + namespace, + }) + + return skopeoBuildStatus({ + namespace, + deploymentName: buildkitDeploymentName, + containerName: utilContainer.name, + log, + api, + ctx, + provider, + module, + }) +} + +export const buildkitBuildHandler: BuildHandler = async (params) => { + const { ctx, module, log } = params + const provider = ctx.provider + const api = await KubeApi.factory(log, ctx, provider) + const namespace = await getNamespace({ log, ctx, provider }) + + await ensureBuildkit({ + ctx, + provider, + log, + api, + namespace, + }) + + const localId = containerHelpers.getLocalImageId(module, module.version) + const deploymentImageName = containerHelpers.getDeploymentImageName(module, provider.config.deploymentRegistry) + const deploymentImageId = containerHelpers.getDeploymentImageId( + module, + module.version, + provider.config.deploymentRegistry + ) + const dockerfile = module.spec.dockerfile || "Dockerfile" + + const { contextPath } = await syncToBuildSync({ + ...params, + api, + namespace, + deploymentName: buildkitDeploymentName, + rsyncPort: utilRsyncPort, + }) + + log.setState(`Building image ${localId}...`) + + let buildLog = "" + + // Stream debug log to a status line + const stdout = split2() + const statusLine = log.placeholder({ level: LogLevel.verbose }) + + stdout.on("error", () => {}) + stdout.on("data", (line: Buffer) => { + statusLine.setState(renderOutputStream(line.toString())) + }) + + // Prepare the build command (this thing, while an otherwise excellent piece of software, is clearly is not meant for + // everyday human usage) + let outputSpec = `type=image,name=${deploymentImageId},push=true` + + if (provider.config.deploymentRegistry?.hostname === inClusterRegistryHostname) { + // The in-cluster registry is not exposed, so we don't configure TLS on it. + outputSpec += ",registry.insecure=true" + } + + const command = [ + "buildctl", + "build", + "--frontend=dockerfile.v0", + "--local", + "context=" + contextPath, + "--local", + "dockerfile=" + contextPath, + "--opt", + "filename=" + dockerfile, + "--output", + outputSpec, + "--export-cache", + "type=inline", + "--import-cache", + `type=registry,ref=${deploymentImageName}`, + ...getDockerBuildFlags(module), + ] + + // Execute the build + const buildTimeout = module.spec.build.timeout + + const pod = await getDeploymentPod({ api, deploymentName: buildkitDeploymentName, namespace }) + + const runner = new PodRunner({ + api, + ctx, + provider, + namespace, + pod, + }) + + const buildRes = await runner.exec({ + log, + command, + timeoutSec: buildTimeout, + containerName: buildkitContainerName, + stdout, + }) + + buildLog = buildRes.log + + log.silly(buildLog) + + return { + buildLog, + fetched: false, + fresh: true, + version: module.version.versionString, + } +} + +export async function ensureBuildkit({ + ctx, + provider, + log, + api, + namespace, +}: { + ctx: PluginContext + provider: KubernetesProvider + log: LogEntry + api: KubeApi + namespace: string +}) { + return deployLock.acquire("deploy", async () => { + const deployLog = log.placeholder() + + // Check status of the buildkit deployment + const manifest = getBuildkitDeployment(provider) + const status = await compareDeployedResources(ctx as KubernetesPluginContext, api, namespace, [manifest], deployLog) + + // Ensure docker auth secret is available and up-to-date in the namespace + const authSecret = await prepareDockerAuth(api, provider, namespace) + authSecret.metadata.name = buildkitAuthSecretName + const existingSecret = await api.readOrNull({ log: deployLog, namespace, manifest: authSecret }) + + if (!existingSecret || authSecret.data?.[dockerAuthSecretKey] !== existingSecret.data?.[dockerAuthSecretKey]) { + deployLog.setState(chalk.gray(`-> Updating Docker auth secret in namespace ${namespace}`)) + await api.upsert({ kind: "Secret", namespace, log: deployLog, obj: authSecret }) + // Need to wait a little to ensure the secret is updated in the buildkit deployment + if (status.state === "ready") { + await sleep(5) + } + } + + if (status.state === "ready") { + return false + } + + // Deploy the buildkit daemon + deployLog.setState( + chalk.gray(`-> Deploying ${buildkitDeploymentName} daemon in ${namespace} namespace (was ${status.state})`) + ) + + await api.upsert({ kind: "Deployment", namespace, log: deployLog, obj: manifest }) + + await waitForResources({ + namespace, + ctx, + provider, + serviceName: "garden-buildkit", + resources: [manifest], + log: deployLog, + timeoutSec: 600, + }) + + deployLog.setState({ append: true, msg: "Done!" }) + return true + }) +} + +function getDockerBuildFlags(module: ContainerModule) { + const args: string[] = [] + + for (const arg of getDockerBuildArgs(module)) { + args.push("--opt", "build-arg:" + arg) + } + + if (module.spec.build.targetImage) { + args.push("--opt", "target:" + module.spec.build.targetImage) + } + + args.push(...(module.spec.extraFlags || [])) + + return args +} + +function getBuildkitDeployment(provider: KubernetesProvider) { + const deployment = cloneDeep(baseBuildkitDeployment) + const buildkitContainer = deployment.spec!.template.spec!.containers[0] + + // Optionally run buildkit in rootless mode + if (!!provider.config.clusterBuildkit?.rootless) { + deployment.spec!.template.metadata!.annotations = { + "container.apparmor.security.beta.kubernetes.io/buildkitd": "unconfined", + "container.seccomp.security.alpha.kubernetes.io/buildkitd": "unconfined", + } + buildkitContainer.image += "-rootless" + buildkitContainer.args = [ + "--addr", + "unix:///run/user/1000/buildkit/buildkitd.sock", + "--oci-worker-no-process-sandbox", + ] + buildkitContainer.securityContext = { + runAsUser: 1000, + runAsGroup: 1000, + } + } + + buildkitContainer.resources = { + limits: { + cpu: millicpuToString(provider.config.resources.builder.limits.cpu), + memory: megabytesToString(provider.config.resources.builder.limits.memory), + }, + requests: { + cpu: millicpuToString(provider.config.resources.builder.requests.cpu), + memory: megabytesToString(provider.config.resources.builder.requests.memory), + }, + } + + // We need a proxy sidecar to be able to reach the in-cluster registry from the Pod + const registryHostname = getRegistryHostname(provider.config) + deployment.spec!.template.spec!.containers.push(getSocatContainer(registryHostname)) + + return deployment +} + +const utilContainer: V1Container = { + name: "util", + image: k8sUtilImageName, + imagePullPolicy: "IfNotPresent", + command: ["/rsync-server.sh"], + env: [ + // This makes sure the server is accessible on any IP address, because CIDRs can be different across clusters. + // K8s can be trusted to secure the port. - JE + { name: "ALLOW", value: "0.0.0.0/0" }, + { + name: "RSYNC_PORT", + value: "" + utilRsyncPort, + }, + ], + volumeMounts: [ + { + name: buildkitAuthSecretName, + mountPath: "/home/user/.docker", + readOnly: true, + }, + { + name: buildSyncVolumeName, + mountPath: "/data", + }, + ], + ports: [ + { + name: rsyncPortName, + protocol: "TCP", + containerPort: utilRsyncPort, + }, + ], + readinessProbe: { + initialDelaySeconds: 1, + periodSeconds: 1, + timeoutSeconds: 3, + successThreshold: 2, + failureThreshold: 5, + tcpSocket: { port: (rsyncPortName) }, + }, + resources: { + // This should be ample + limits: { + cpu: "256m", + memory: "512Mi", + }, + }, + securityContext: { + runAsUser: 1000, + runAsGroup: 1000, + }, +} + +const baseBuildkitDeployment: KubernetesDeployment = { + apiVersion: "apps/v1", + kind: "Deployment", + metadata: { + labels: { + app: buildkitDeploymentName, + }, + name: buildkitDeploymentName, + }, + spec: { + replicas: 1, + selector: { + matchLabels: { + app: buildkitDeploymentName, + }, + }, + template: { + metadata: { + labels: { + app: buildkitDeploymentName, + }, + }, + spec: { + containers: [ + { + name: buildkitContainerName, + image: buildkitImageName, + args: ["--addr", "unix:///run/buildkit/buildkitd.sock"], + readinessProbe: { + exec: { + command: ["buildctl", "debug", "workers"], + }, + initialDelaySeconds: 3, + periodSeconds: 5, + }, + livenessProbe: { + exec: { + command: ["buildctl", "debug", "workers"], + }, + initialDelaySeconds: 5, + periodSeconds: 30, + }, + securityContext: { + privileged: true, + }, + volumeMounts: [ + { + name: buildkitAuthSecretName, + mountPath: "/.docker", + readOnly: true, + }, + { + name: buildSyncVolumeName, + mountPath: "/garden-build", + }, + ], + env: [ + { + name: "DOCKER_CONFIG", + value: "/.docker", + }, + ], + }, + // Attach a util container for the rsync server and to use skopeo + utilContainer, + ], + volumes: [ + { + name: buildkitAuthSecretName, + secret: { + secretName: buildkitAuthSecretName, + items: [ + { + key: dockerAuthSecretKey, + path: "config.json", + }, + ], + }, + }, + { + name: buildSyncVolumeName, + emptyDir: {}, + }, + ], + }, + }, + }, +} diff --git a/core/src/plugins/kubernetes/container/build/common.ts b/core/src/plugins/kubernetes/container/build/common.ts index 22022b7941..51760318fc 100644 --- a/core/src/plugins/kubernetes/container/build/common.ts +++ b/core/src/plugins/kubernetes/container/build/common.ts @@ -12,7 +12,7 @@ import { containerHelpers } from "../../../container/helpers" import { GetBuildStatusParams, BuildStatus } from "../../../../types/plugin/module/getBuildStatus" import { BuildModuleParams, BuildResult } from "../../../../types/plugin/module/build" import { getDeploymentPod } from "../../util" -import { gardenUtilDaemonDeploymentName, RSYNC_PORT } from "../../constants" +import { gardenUtilDaemonDeploymentName, inClusterRegistryHostname } from "../../constants" import { KubeApi } from "../../api" import { KubernetesProvider } from "../../config" import { PodRunner } from "../../run" @@ -21,24 +21,30 @@ import { resolve } from "path" import { getPortForward } from "../../port-forward" import { normalizeLocalRsyncPath } from "../../../../util/fs" import { exec } from "../../../../util/util" +import { InternalError, RuntimeError } from "../../../../exceptions" +import { LogEntry } from "../../../../logger/log-entry" -export const buildSyncDeploymentName = "garden-build-sync" +const inClusterRegistryPort = 5000 + +export const sharedBuildSyncDeploymentName = "garden-build-sync" export type BuildStatusHandler = (params: GetBuildStatusParams) => Promise export type BuildHandler = (params: BuildModuleParams) => Promise interface SyncToSharedBuildSyncParams extends BuildModuleParams { api: KubeApi - systemNamespace: string + namespace: string + deploymentName: string + rsyncPort: number } -export async function syncToSharedBuildSync(params: SyncToSharedBuildSyncParams) { - const { ctx, module, log, api, systemNamespace } = params +export async function syncToBuildSync(params: SyncToSharedBuildSyncParams) { + const { ctx, module, log, api, namespace, deploymentName, rsyncPort } = params const buildSyncPod = await getDeploymentPod({ api, - deploymentName: buildSyncDeploymentName, - namespace: systemNamespace, + deploymentName, + namespace, }) // Sync the build context to the remote sync service // -> Get a tunnel to the service @@ -46,9 +52,9 @@ export async function syncToSharedBuildSync(params: SyncToSharedBuildSyncParams) const syncFwd = await getPortForward({ ctx, log, - namespace: systemNamespace, + namespace, targetResource: `Pod/${buildSyncPod.metadata.name}`, - port: RSYNC_PORT, + port: rsyncPort, }) // -> Run rsync @@ -87,6 +93,84 @@ export async function syncToSharedBuildSync(params: SyncToSharedBuildSyncParams) return { contextPath } } +/** + * Checks if the module has been built by exec-ing skopeo in a deployed pod in the cluster. + */ +export async function skopeoBuildStatus({ + namespace, + deploymentName, + containerName, + log, + api, + ctx, + provider, + module, +}: { + namespace: string + deploymentName: string + containerName: string + log: LogEntry + api: KubeApi + ctx: PluginContext + provider: KubernetesProvider + module: ContainerModule +}) { + const deploymentRegistry = provider.config.deploymentRegistry + + if (!deploymentRegistry) { + // This is validated in the provider configure handler, so this is an internal error if it happens + throw new InternalError(`Expected configured deploymentRegistry for remote build`, { config: provider.config }) + } + const remoteId = containerHelpers.getDeploymentImageId(module, module.version, deploymentRegistry) + const inClusterRegistry = deploymentRegistry?.hostname === inClusterRegistryHostname + const skopeoCommand = ["skopeo", "--command-timeout=30s", "inspect", "--raw", "--authfile", "/.docker/config.json"] + if (inClusterRegistry) { + // The in-cluster registry is not exposed, so we don't configure TLS on it. + skopeoCommand.push("--tls-verify=false") + } + + skopeoCommand.push(`docker://${remoteId}`) + + const podCommand = ["sh", "-c", skopeoCommand.join(" ")] + + const pod = await getDeploymentPod({ + api, + deploymentName, + namespace, + }) + + const runner = new PodRunner({ + api, + ctx, + provider, + namespace, + pod, + }) + + try { + await runner.exec({ + log, + command: podCommand, + timeoutSec: 300, + containerName, + }) + return { ready: true } + } catch (err) { + const res = err.detail?.result || {} + + // Non-zero exit code can both mean the manifest is not found, and any other unexpected error + if (res.exitCode !== 0 && !res.stderr.includes("manifest unknown")) { + const output = res.allLogs || err.message + + throw new RuntimeError(`Unable to query registry for image status: ${output}`, { + command: skopeoCommand, + output, + }) + } + return { ready: false } + } +} + export async function getUtilDaemonPodRunner({ api, systemNamespace, @@ -113,6 +197,24 @@ export async function getUtilDaemonPodRunner({ }) } +export function getSocatContainer(registryHostname: string) { + return { + name: "proxy", + image: "gardendev/socat:0.1.0", + command: ["/bin/sh", "-c", `socat TCP-LISTEN:5000,fork TCP:${registryHostname}:${inClusterRegistryPort} || exit 0`], + ports: [ + { + name: "proxy", + containerPort: inClusterRegistryPort, + protocol: "TCP", + }, + ], + readinessProbe: { + tcpSocket: { port: inClusterRegistryPort }, + }, + } +} + export async function getManifestInspectArgs(module: ContainerModule, deploymentRegistry: ContainerRegistryConfig) { const remoteId = containerHelpers.getDeploymentImageId(module, module.version, deploymentRegistry) diff --git a/core/src/plugins/kubernetes/container/build/kaniko.ts b/core/src/plugins/kubernetes/container/build/kaniko.ts index 9d3014f81c..8a4742c873 100644 --- a/core/src/plugins/kubernetes/container/build/kaniko.ts +++ b/core/src/plugins/kubernetes/container/build/kaniko.ts @@ -8,14 +8,18 @@ import { V1PodSpec } from "@kubernetes/client-node" import { ContainerModule } from "../../../container/config" -import { containerHelpers } from "../../../container/helpers" import { millicpuToString, megabytesToString, makePodName } from "../../util" -import { dockerAuthSecretName, inClusterRegistryHostname, skopeoDaemonContainerName } from "../../constants" +import { + dockerAuthSecretName, + inClusterRegistryHostname, + skopeoDaemonContainerName, + gardenUtilDaemonDeploymentName, +} from "../../constants" import { KubeApi } from "../../api" import { LogEntry } from "../../../../logger/log-entry" import { getDockerAuthVolume } from "../../util" import { KubernetesProvider, KubernetesPluginContext, DEFAULT_KANIKO_IMAGE } from "../../config" -import { InternalError, RuntimeError, ConfigurationError } from "../../../../exceptions" +import { ConfigurationError } from "../../../../exceptions" import { PodRunner } from "../../run" import { getRegistryHostname, getKubernetesSystemVariables } from "../../init" import { Writable } from "stream" @@ -24,59 +28,27 @@ import { dedent } from "../../../../util/string" import { RunResult } from "../../../../types/plugin/base" import { PluginContext } from "../../../../plugin-context" import { KubernetesPod } from "../../types" -import { getUtilDaemonPodRunner, BuildStatusHandler } from "./common" +import { BuildStatusHandler, skopeoBuildStatus, getSocatContainer } from "./common" import { differenceBy } from "lodash" -const registryPort = 5000 - export const getKanikoBuildStatus: BuildStatusHandler = async (params) => { const { ctx, module, log } = params const k8sCtx = ctx as KubernetesPluginContext const provider = k8sCtx.provider - const deploymentRegistry = provider.config.deploymentRegistry - - if (!deploymentRegistry) { - // This is validated in the provider configure handler, so this is an internal error if it happens - throw new InternalError(`Expected configured deploymentRegistry for remote build`, { config: provider.config }) - } - - const remoteId = containerHelpers.getDeploymentImageId(module, module.version, deploymentRegistry) - const inClusterRegistry = deploymentRegistry?.hostname === inClusterRegistryHostname - const skopeoCommand = ["skopeo", "--command-timeout=30s", "inspect", "--raw"] - if (inClusterRegistry) { - // The in-cluster registry is not exposed, so we don't configure TLS on it. - skopeoCommand.push("--tls-verify=false") - } - - skopeoCommand.push(`docker://${remoteId}`) - const podCommand = ["sh", "-c", skopeoCommand.join(" ")] const api = await KubeApi.factory(log, ctx, provider) const systemNamespace = await getSystemNamespace(ctx, provider, log) - const runner = await getUtilDaemonPodRunner({ api, systemNamespace, ctx, provider }) - try { - await runner.exec({ - log, - command: podCommand, - timeoutSec: 300, - containerName: skopeoDaemonContainerName, - }) - return { ready: true } - } catch (err) { - const res = err.detail?.result || {} - - // Non-zero exit code can both mean the manifest is not found, and any other unexpected error - if (res.exitCode !== 0 && !res.stderr.includes("manifest unknown")) { - const output = res.allLogs || err.message - - throw new RuntimeError(`Unable to query registry for image status: ${output}`, { - command: skopeoCommand, - output, - }) - } - return { ready: false } - } + return skopeoBuildStatus({ + namespace: systemNamespace, + deploymentName: gardenUtilDaemonDeploymentName, + containerName: skopeoDaemonContainerName, + log, + api, + ctx, + provider, + module, + }) } export const DEFAULT_KANIKO_FLAGS = ["--cache=true"] @@ -283,21 +255,3 @@ export async function runKaniko({ version: module.version.versionString, } } - -function getSocatContainer(registryHostname: string) { - return { - name: "proxy", - image: "gardendev/socat:0.1.0", - command: ["/bin/sh", "-c", `socat TCP-LISTEN:5000,fork TCP:${registryHostname}:5000 || exit 0`], - ports: [ - { - name: "proxy", - containerPort: registryPort, - protocol: "TCP", - }, - ], - readinessProbe: { - tcpSocket: { port: registryPort }, - }, - } -} diff --git a/core/src/plugins/kubernetes/hot-reload/helpers.ts b/core/src/plugins/kubernetes/hot-reload/helpers.ts index ab2e952cf7..94b454ab9d 100644 --- a/core/src/plugins/kubernetes/hot-reload/helpers.ts +++ b/core/src/plugins/kubernetes/hot-reload/helpers.ts @@ -6,7 +6,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import { V1Container } from "@kubernetes/client-node" import { ContainerHotReloadSpec } from "../../container/config" import { RuntimeError, ConfigurationError } from "../../../exceptions" import { resolve as resolvePath, dirname, posix } from "path" @@ -17,7 +16,7 @@ import { LogEntry } from "../../../logger/log-entry" import { getResourceContainer, getServiceResourceSpec } from "../util" import { execInWorkload } from "../container/exec" import { getPortForward, killPortForward } from "../port-forward" -import { RSYNC_PORT } from "../constants" +import { rsyncPort, buildSyncVolumeName, rsyncPortName } from "../constants" import { KubernetesPluginContext } from "../config" import { KubernetesWorkload } from "../types" import { normalizeLocalRsyncPath, normalizeRelativePath } from "../../../util/fs" @@ -26,7 +25,7 @@ import { GardenModule } from "../../../types/module" import { getBaseModule } from "../helm/common" import { HelmModule, HelmService } from "../helm/config" import { KubernetesModule, KubernetesService } from "../kubernetes-module/config" -import { HotReloadableKind, HotReloadableResource, RSYNC_PORT_NAME } from "./hot-reload" +import { HotReloadableKind, HotReloadableResource } from "./hot-reload" import Bluebird from "bluebird" import normalizePath from "normalize-path" @@ -54,7 +53,6 @@ export function configureHotReload({ const kind = target.kind set(target, ["metadata", "annotations", gardenAnnotationKey("hot-reload")], "true") const mainContainer = getResourceContainer(target, containerName) - const syncVolumeName = `garden-sync` // We're copying the target folder, not just its contents const syncConfig = hotReloadSpec.sync @@ -69,7 +67,7 @@ export function configureHotReload({ imagePullPolicy: "IfNotPresent", volumeMounts: [ { - name: syncVolumeName, + name: buildSyncVolumeName, mountPath: "/.garden/hot_reload", }, ], @@ -77,7 +75,7 @@ export function configureHotReload({ const syncMounts = targets.map((t) => { return { - name: syncVolumeName, + name: buildSyncVolumeName, mountPath: t, // Need to prefix the target with "root" because we need a "tmp" folder next to it while syncing subPath: posix.join("root", rsyncTargetPath(t)), @@ -94,11 +92,11 @@ export function configureHotReload({ mainContainer.ports = [] } - if (mainContainer.ports.find((p) => p.containerPort === RSYNC_PORT)) { + if (mainContainer.ports.find((p) => p.containerPort === rsyncPort)) { throw new Error(deline` ${kind} ${target.metadata.name} is configured for hot reload, but one of its containers uses - port ${RSYNC_PORT}, which is reserved for internal use while hot reload is active. Please remove - ${RSYNC_PORT} from your services' port config.`) + port ${rsyncPort}, which is reserved for internal use while hot reload is active. Please remove + ${rsyncPort} from your services' port config.`) } if (hotReloadCommand) { @@ -109,7 +107,22 @@ export function configureHotReload({ mainContainer.args = hotReloadArgs } - const rsyncContainer: V1Container = { + // These any casts are necessary because of flaws in the TS definitions in the client library. + if (!target.spec.template.spec!.volumes) { + target.spec.template.spec!.volumes = [] + } + + target.spec.template.spec!.volumes.push({ + name: buildSyncVolumeName, + emptyDir: {}, + }) + + if (!target.spec.template.spec!.initContainers) { + target.spec.template.spec!.initContainers = [] + } + target.spec.template.spec!.initContainers.push(initContainer) + + target.spec.template.spec!.containers.push({ name: "garden-rsync", image: "gardendev/rsync:0.2.0", imagePullPolicy: "IfNotPresent", @@ -120,7 +133,7 @@ export function configureHotReload({ ], volumeMounts: [ { - name: syncVolumeName, + name: buildSyncVolumeName, /** * We mount at /data because the rsync image we're currently using is configured * to use that path. @@ -130,9 +143,9 @@ export function configureHotReload({ ], ports: [ { - name: RSYNC_PORT_NAME, + name: rsyncPortName, protocol: "TCP", - containerPort: RSYNC_PORT, + containerPort: rsyncPort, }, ], readinessProbe: { @@ -141,26 +154,9 @@ export function configureHotReload({ timeoutSeconds: 3, successThreshold: 1, failureThreshold: 5, - tcpSocket: { port: (RSYNC_PORT_NAME) }, + tcpSocket: { port: (rsyncPortName) }, }, - } - - // These any casts are necessary because of flaws in the TS definitions in the client library. - if (!target.spec.template.spec!.volumes) { - target.spec.template.spec!.volumes = [] - } - - target.spec.template.spec!.volumes.push({ - name: syncVolumeName, - emptyDir: {}, }) - - if (!target.spec.template.spec!.initContainers) { - target.spec.template.spec!.initContainers = [] - } - target.spec.template.spec!.initContainers.push(initContainer) - - target.spec.template.spec!.containers.push(rsyncContainer) } export function getHotReloadSpec(service: KubernetesService | HelmService) { @@ -284,7 +280,7 @@ export async function syncToService({ ctx, service, hotReloadSpec, namespace, wo const targetResource = `${workload.kind.toLowerCase()}/${workload.metadata.name}` const doSync = async () => { - const portForward = await getPortForward({ ctx, log, namespace, targetResource, port: RSYNC_PORT }) + const portForward = await getPortForward({ ctx, log, namespace, targetResource, port: rsyncPort }) const syncResult = await Bluebird.map(hotReloadSpec.sync, ({ source, target }) => { const sourcePath = rsyncSourcePath(service.sourceModule.path, source) @@ -345,7 +341,7 @@ export async function syncToService({ ctx, service, hotReloadSpec, namespace, wo } catch (error) { if (error.message.includes("did not see server greeting") || error.message.includes("Connection reset by peer")) { log.debug(`Port-forward to ${targetResource} disconnected. Retrying.`) - killPortForward(targetResource, RSYNC_PORT) + killPortForward(targetResource, rsyncPort) await doSync() } else { throw error diff --git a/core/src/plugins/kubernetes/hot-reload/hot-reload.ts b/core/src/plugins/kubernetes/hot-reload/hot-reload.ts index fb8399957b..d34b4ae471 100644 --- a/core/src/plugins/kubernetes/hot-reload/hot-reload.ts +++ b/core/src/plugins/kubernetes/hot-reload/hot-reload.ts @@ -31,7 +31,6 @@ import { getHotReloadSpec, syncToService } from "./helpers" export type HotReloadableResource = KubernetesResource export type HotReloadableKind = "Deployment" | "DaemonSet" | "StatefulSet" -export const RSYNC_PORT_NAME = "garden-rsync" export const hotReloadableKinds: HotReloadableKind[] = ["Deployment", "DaemonSet", "StatefulSet"] /** diff --git a/core/src/plugins/kubernetes/init.ts b/core/src/plugins/kubernetes/init.ts index c2cf933c89..3b23f2569d 100644 --- a/core/src/plugins/kubernetes/init.ts +++ b/core/src/plugins/kubernetes/init.ts @@ -37,8 +37,6 @@ import { V1Secret, V1Toleration } from "@kubernetes/client-node" import { KubernetesResource } from "./types" import { compareDeployedResources } from "./status/status" import { PrimitiveMap } from "../../config/common" -import { LogEntry } from "../../logger/log-entry" -import { PluginContext } from "../../plugin-context" // Note: We need to increment a version number here if we ever make breaking changes to the NFS provisioner StatefulSet const nfsStorageClassVersion = 2 @@ -157,7 +155,7 @@ export async function getEnvironmentStatus({ let secretsUpToDate = true if (provider.config.buildMode !== "local-docker") { - const authSecret = await prepareDockerAuth(api, ctx, provider, log) + const authSecret = await prepareDockerAuth(api, provider, systemNamespace) const comparison = await compareDeployedResources(k8sCtx, api, systemNamespace, [authSecret], log) secretsUpToDate = comparison.state === "ready" } @@ -283,7 +281,7 @@ export async function prepareSystem({ // Set auth secret for in-cluster builder if (provider.config.buildMode !== "local-docker") { log.info("Updating builder auth secret") - const authSecret = await prepareDockerAuth(sysApi, ctx, sysProvider, log) + const authSecret = await prepareDockerAuth(sysApi, sysProvider, systemNamespace) await sysApi.upsert({ kind: "Secret", namespace: systemNamespace, obj: authSecret, log }) } @@ -479,7 +477,7 @@ export async function buildDockerAuthConfig( throw new ConfigurationError( dedent` Could not parse configured imagePullSecret '${secret.metadata.name}' as a valid docker authentication file, - because it is missing an "auths", "credHelpers" key. + because it is missing an "auths" or "credHelpers" key. ${dockerAuthDocsLink} `, { secretRef } @@ -497,23 +495,18 @@ export async function buildDockerAuthConfig( export async function prepareDockerAuth( api: KubeApi, - ctx: PluginContext, provider: KubernetesProvider, - log: LogEntry + namespace: string ): Promise> { // Read all configured imagePullSecrets and combine into a docker config file to use in the in-cluster builders. const config = await buildDockerAuthConfig(provider.config.imagePullSecrets, api) - // Enabling experimental features, in order to support advanced registry querying - // Store the config as a Secret (overwriting if necessary) - const systemNamespace = await getSystemNamespace(ctx, provider, log, api) - return { apiVersion: "v1", kind: "Secret", metadata: { name: dockerAuthSecretName, - namespace: systemNamespace, + namespace, }, data: { [dockerAuthSecretKey]: Buffer.from(JSON.stringify(config)).toString("base64"), diff --git a/core/src/plugins/kubernetes/kubernetes.ts b/core/src/plugins/kubernetes/kubernetes.ts index 94ccd0fd4f..6ee4be67a7 100644 --- a/core/src/plugins/kubernetes/kubernetes.ts +++ b/core/src/plugins/kubernetes/kubernetes.ts @@ -61,8 +61,13 @@ export async function configureProvider({ } } - if (config.buildMode === "cluster-docker" || config.buildMode === "kaniko") { - config._systemServices.push("build-sync", "util") + const buildMode = config.buildMode + + // TODO: clean this up, this is getting confusing here + if (buildMode !== "local-docker") { + if (buildMode !== "cluster-buildkit") { + config._systemServices.push("build-sync", "util") + } const usingInClusterRegistry = !config.deploymentRegistry || config.deploymentRegistry.hostname === inClusterRegistryHostname @@ -79,18 +84,20 @@ export async function configureProvider({ } config._systemServices.push("docker-registry", "registry-proxy") } - if (!usingInClusterRegistry || config.buildMode === "kaniko") { - // If using an external registry we need the util service + + if (buildMode !== "cluster-buildkit" && (!usingInClusterRegistry || buildMode === "kaniko")) { + // If using an external registry and kaniko or cluster-docker, we need the util service // Also the kaniko buildMode needs the util service even if using an in-cluster registry config._systemServices.push("util") } - if (config.buildMode === "cluster-docker") { + if (buildMode === "cluster-docker") { config._systemServices.push("docker-daemon") } - // Set up an NFS provisioner if the user doesn't explicitly set a storage class for the shared sync volume - if (!config.storage.sync.storageClass) { + // Set up an NFS provisioner if not using cluster-buildkit, and the user doesn't explicitly set a storage class for + // the shared sync volume + if (buildMode !== "cluster-buildkit" && !config.storage.sync.storageClass) { config._systemServices.push("nfs-provisioner") } } else if (config.name !== "local-kubernetes" && !config.deploymentRegistry) { diff --git a/core/src/plugins/kubernetes/run.ts b/core/src/plugins/kubernetes/run.ts index b1d75b3299..a435bdd95c 100644 --- a/core/src/plugins/kubernetes/run.ts +++ b/core/src/plugins/kubernetes/run.ts @@ -852,7 +852,7 @@ export class PodRunner extends PodRunnerParams { * Executes a command in the running Pod. Must be called after `start()`. */ async exec(params: ExecParams) { - const { command, containerName: container, timeoutSec, tty = false } = params + const { command, containerName: container, timeoutSec, tty = false, log } = params let { stdout, stderr, stdin } = params if (tty) { @@ -866,11 +866,14 @@ export class PodRunner extends PodRunnerParams { } const startedAt = new Date() + const containerName = container || this.pod.spec.containers[0].name + + log.debug(`Execing command in ${this.namespace}/Pod/${this.podName}/${containerName}: ${command.join(" ")}`) const result = await this.api.execInPod({ namespace: this.namespace, podName: this.podName, - containerName: container || this.pod.spec.containers[0].name, + containerName, command, stdout, stderr, diff --git a/core/src/plugins/kubernetes/status/status.ts b/core/src/plugins/kubernetes/status/status.ts index 7c275a32bc..93d286535b 100644 --- a/core/src/plugins/kubernetes/status/status.ts +++ b/core/src/plugins/kubernetes/status/status.ts @@ -316,9 +316,9 @@ export async function compareDeployedResources( log.silly( dedent` - Resource(s) with non-ready status found in the cluster: + Resource(s) with non-ready status found in the cluster: - ${descriptions}` + "\n" + ${descriptions}` + "\n" ) result.state = combineStates(deployedStates) diff --git a/core/src/plugins/kubernetes/util.ts b/core/src/plugins/kubernetes/util.ts index 99f1d90cff..fb9f89f101 100644 --- a/core/src/plugins/kubernetes/util.ts +++ b/core/src/plugins/kubernetes/util.ts @@ -187,8 +187,8 @@ export async function getPods( }) .filter( (pod) => - // Filter out failed pods - !(pod.status && pod.status.phase === "Failed") && + // Filter out failed and terminating pods + !(pod.status && (pod.status.phase === "Failed" || pod.status.phase === "Terminating")) && // Filter out evicted pods !(pod.status && pod.status.reason && pod.status.reason.includes("Evicted")) ) @@ -405,11 +405,11 @@ export async function getDeploymentPod({ deploymentName: string namespace: string }) { - const status = await api.apps.readNamespacedDeployment(deploymentName, namespace) - const pods = await getPods(api, namespace, status.spec.selector?.matchLabels || {}) + const resource = await api.apps.readNamespacedDeployment(deploymentName, namespace) + const pods = await getWorkloadPods(api, namespace, resource) const pod = sample(pods) if (!pod) { - throw new PluginError(`Could not a running pod in a deployment: ${deploymentName}`, { + throw new PluginError(`Could not find a running pod in deployment ${deploymentName}`, { deploymentName, namespace, }) diff --git a/core/test/data/test-projects/container/garden.yml b/core/test/data/test-projects/container/garden.yml index fefdd312cd..cf7e5e8e4a 100644 --- a/core/test/data/test-projects/container/garden.yml +++ b/core/test/data/test-projects/container/garden.yml @@ -10,6 +10,9 @@ environments: - name: kaniko - name: kaniko-image-override - name: kaniko-remote-registry + - name: cluster-buildkit + - name: cluster-buildkit-rootless + - name: cluster-buildkit-remote-registry providers: - name: local-kubernetes environments: [local] @@ -48,3 +51,15 @@ providers: kaniko: image: gcr.io/kaniko-project/executor:debug-perf deploymentRegistry: *deploymentRegistry + - <<: *clusterDocker + environments: [cluster-buildkit] + buildMode: cluster-buildkit + - <<: *clusterDocker + environments: [cluster-buildkit-rootless] + buildMode: cluster-buildkit + clusterBuildkit: + rootless: true + - <<: *clusterDocker + environments: [cluster-buildkit-remote-registry] + buildMode: cluster-buildkit + deploymentRegistry: *deploymentRegistry diff --git a/core/test/e2e/garden.yml b/core/test/e2e/garden.yml index 46c12c8115..442033fc38 100644 --- a/core/test/e2e/garden.yml +++ b/core/test/e2e/garden.yml @@ -12,7 +12,7 @@ providers: context: gke_garden-dev-200012_europe-west1-b_garden-dev-1 namespace: e2e-tests defaultHostname: dev-1.sys.garden - buildMode: cluster-docker + buildMode: cluster-buildkit setupIngressController: nginx - name: local-kubernetes environments: [local] diff --git a/core/test/integ/src/plugins/kubernetes/commands/pull-image.ts b/core/test/integ/src/plugins/kubernetes/commands/pull-image.ts index 3a15aea64d..adda0af6a9 100644 --- a/core/test/integ/src/plugins/kubernetes/commands/pull-image.ts +++ b/core/test/integ/src/plugins/kubernetes/commands/pull-image.ts @@ -16,7 +16,6 @@ import { KubernetesProvider, KubernetesPluginContext } from "../../../../../../s import { GardenModule } from "../../../../../../src/types/module" import { containerHelpers } from "../../../../../../src/plugins/container/helpers" import { expect } from "chai" -import { LogEntry } from "../../../../../../src/logger/log-entry" import { grouped } from "../../../../../helpers" describe("pull-image plugin command", () => { @@ -38,12 +37,22 @@ describe("pull-image plugin command", () => { ctx = await garden.getPluginContext(provider) } - async function ensureImagePulled(module: GardenModule, log: LogEntry) { + async function removeImage(module: GardenModule) { + const imageId = containerHelpers.getLocalImageId(module, module.version) + await containerHelpers.dockerCli({ + cwd: "/tmp", + args: ["rmi", imageId], + log: garden.log, + ctx, + }) + } + + async function ensureImagePulled(module: GardenModule) { const imageId = containerHelpers.getLocalImageId(module, module.version) const imageHash = await containerHelpers.dockerCli({ cwd: module.buildPath, args: ["images", "-q", imageId], - log, + log: garden.log, ctx, }) @@ -69,8 +78,9 @@ describe("pull-image plugin command", () => { }) it("should pull the image", async () => { + await removeImage(module) await pullModule(ctx as KubernetesPluginContext, module, garden.log) - await ensureImagePulled(module, garden.log) + await ensureImagePulled(module) }) }) @@ -93,8 +103,59 @@ describe("pull-image plugin command", () => { }) it("should pull the image", async () => { + await removeImage(module) + await pullModule(ctx as KubernetesPluginContext, module, garden.log) + await ensureImagePulled(module) + }) + }) + + grouped("cluster-buildkit", "remote-only").context("using an external cluster registry", () => { + let module: GardenModule + + before(async () => { + await init("cluster-buildkit-remote-registry") + + module = graph.getModule("remote-registry-test") + + // build the image + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should pull the image", async () => { + await removeImage(module) + await pullModule(ctx as KubernetesPluginContext, module, garden.log) + await ensureImagePulled(module) + }) + }) + + grouped("cluster-buildkit").context("using the in cluster registry", () => { + let module: GardenModule + + before(async () => { + await init("cluster-buildkit") + + module = graph.getModule("simple-service") + + // build the image + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should pull the image", async () => { + await removeImage(module) await pullModule(ctx as KubernetesPluginContext, module, garden.log) - await ensureImagePulled(module, garden.log) + await ensureImagePulled(module) }) }) }) diff --git a/core/test/integ/src/plugins/kubernetes/container/.gitignore b/core/test/integ/src/plugins/kubernetes/container/.gitignore new file mode 100644 index 0000000000..684ae5d80c --- /dev/null +++ b/core/test/integ/src/plugins/kubernetes/container/.gitignore @@ -0,0 +1 @@ +!build \ No newline at end of file diff --git a/core/test/integ/src/plugins/kubernetes/container/build.ts b/core/test/integ/src/plugins/kubernetes/container/build/build.ts similarity index 66% rename from core/test/integ/src/plugins/kubernetes/container/build.ts rename to core/test/integ/src/plugins/kubernetes/container/build/build.ts index 4a6729836d..fa59821afe 100644 --- a/core/test/integ/src/plugins/kubernetes/container/build.ts +++ b/core/test/integ/src/plugins/kubernetes/container/build/build.ts @@ -6,22 +6,22 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import { expectError, grouped } from "../../../../../helpers" -import { Garden } from "../../../../../../src/garden" -import { ConfigGraph } from "../../../../../../src/config-graph" +import { expectError, grouped } from "../../../../../../helpers" +import { Garden } from "../../../../../../../src/garden" +import { ConfigGraph } from "../../../../../../../src/config-graph" import { k8sBuildContainer, k8sGetContainerBuildStatus, -} from "../../../../../../src/plugins/kubernetes/container/build/build" -import { PluginContext } from "../../../../../../src/plugin-context" -import { KubernetesProvider } from "../../../../../../src/plugins/kubernetes/config" +} from "../../../../../../../src/plugins/kubernetes/container/build/build" +import { PluginContext } from "../../../../../../../src/plugin-context" +import { KubernetesProvider } from "../../../../../../../src/plugins/kubernetes/config" import { expect } from "chai" -import { getContainerTestGarden } from "./container" -import { containerHelpers } from "../../../../../../src/plugins/container/helpers" -import { dockerDaemonContainerName } from "../../../../../../src/plugins/kubernetes/constants" -import { KubeApi } from "../../../../../../src/plugins/kubernetes/api" -import { getSystemNamespace } from "../../../../../../src/plugins/kubernetes/namespace" -import { getDockerDaemonPodRunner } from "../../../../../../src/plugins/kubernetes/container/build/cluster-docker" +import { getContainerTestGarden } from "../container" +import { containerHelpers } from "../../../../../../../src/plugins/container/helpers" +import { dockerDaemonContainerName } from "../../../../../../../src/plugins/kubernetes/constants" +import { KubeApi } from "../../../../../../../src/plugins/kubernetes/api" +import { getSystemNamespace } from "../../../../../../../src/plugins/kubernetes/namespace" +import { getDockerDaemonPodRunner } from "../../../../../../../src/plugins/kubernetes/container/build/cluster-docker" describe("kubernetes build flow", () => { let garden: Garden @@ -467,4 +467,213 @@ describe("kubernetes build flow", () => { }) }) }) + + grouped("cluster-buildkit").context("cluster-buildkit mode", () => { + before(async () => { + await init("cluster-buildkit") + }) + + it("should build a simple container", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should get the build status from the registry", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.true + }) + + grouped("remote-only").it("should support pulling from private registries", async () => { + const module = graph.getModule("private-base") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should return ready=false status when image doesn't exist in registry", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + module.spec.image = "skee-ba-dee-skoop" + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.false + }) + + it("should throw if attempting to pull from private registry without access", async () => { + const module = graph.getModule("inaccessible-base") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await expectError( + () => + k8sBuildContainer({ + ctx, + log: garden.log, + module, + }), + (err) => { + expect(err.message).to.include("authorization failed") + } + ) + }) + }) + + grouped("cluster-buildkit").context("cluster-buildkit-rootless mode", () => { + before(async () => { + await init("cluster-buildkit-rootless") + }) + + it("should build a simple container", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should get the build status from the registry", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.true + }) + + grouped("remote-only").it("should support pulling from private registries", async () => { + const module = graph.getModule("private-base") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should return ready=false status when image doesn't exist in registry", async () => { + const module = graph.getModule("simple-service") + await garden.buildStaging.syncFromSrc(module, garden.log) + + module.spec.image = "skee-ba-dee-skoop" + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.false + }) + + it("should throw if attempting to pull from private registry without access", async () => { + const module = graph.getModule("inaccessible-base") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await expectError( + () => + k8sBuildContainer({ + ctx, + log: garden.log, + module, + }), + (err) => { + expect(err.message).to.include("authorization failed") + } + ) + }) + }) + + grouped("cluster-buildkit", "remote-only").context("cluster-buildkit-remote-registry mode", () => { + before(async () => { + await init("cluster-buildkit-remote-registry") + }) + + it("should push to configured deploymentRegistry if specified", async () => { + const module = graph.getModule("remote-registry-test") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + }) + + it("should get the build status from the registry", async () => { + const module = graph.getModule("remote-registry-test") + await garden.buildStaging.syncFromSrc(module, garden.log) + + await k8sBuildContainer({ + ctx, + log: garden.log, + module, + }) + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.true + }) + + it("should return ready=false status when image doesn't exist in registry", async () => { + const module = graph.getModule("remote-registry-test") + await garden.buildStaging.syncFromSrc(module, garden.log) + + module.version.versionString = "v-0000000000" + + const status = await k8sGetContainerBuildStatus({ + ctx, + log: garden.log, + module, + }) + + expect(status.ready).to.be.false + }) + }) }) diff --git a/core/test/integ/src/plugins/kubernetes/container/build/buildkit.ts b/core/test/integ/src/plugins/kubernetes/container/build/buildkit.ts new file mode 100644 index 0000000000..b204ad9cc7 --- /dev/null +++ b/core/test/integ/src/plugins/kubernetes/container/build/buildkit.ts @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2018-2020 Garden Technologies, Inc. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +import { getContainerTestGarden } from "../container" +import { KubernetesProvider } from "../../../../../../../src/plugins/kubernetes/config" +import { Garden } from "../../../../../../../src" +import { PluginContext } from "../../../../../../../src/plugin-context" +import { + ensureBuildkit, + buildkitDeploymentName, + buildkitAuthSecretName, +} from "../../../../../../../src/plugins/kubernetes/container/build/buildkit" +import { KubeApi } from "../../../../../../../src/plugins/kubernetes/api" +import { getNamespace } from "../../../../../../../src/plugins/kubernetes/namespace" +import { expect } from "chai" +import { cloneDeep } from "lodash" +import { buildDockerAuthConfig } from "../../../../../../../src/plugins/kubernetes/init" +import { dockerAuthSecretKey } from "../../../../../../../src/plugins/kubernetes/constants" +import { grouped } from "../../../../../../helpers" + +describe("ensureBuildkit", () => { + let garden: Garden + let provider: KubernetesProvider + let ctx: PluginContext + let api: KubeApi + let namespace: string + + before(async () => { + garden = await getContainerTestGarden("cluster-buildkit") + }) + + beforeEach(async () => { + provider = await garden.resolveProvider(garden.log, "local-kubernetes") + ctx = await garden.getPluginContext(provider) + api = await KubeApi.factory(garden.log, ctx, provider) + namespace = await getNamespace({ log: garden.log, ctx, provider }) + }) + + after(async () => { + if (garden) { + await garden.close() + } + }) + + grouped("cluster-buildkit").context("cluster-buildkit mode", () => { + it("deploys buildkit if it isn't already in the namespace", async () => { + try { + await api.apps.deleteNamespacedDeployment(buildkitDeploymentName, namespace) + } catch {} + + const deployed = await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + + // Make sure deployment is there + await api.apps.readNamespacedDeployment(buildkitDeploymentName, namespace) + + expect(deployed).to.be.true + }) + + it("creates a docker auth secret from configured imagePullSecrets", async () => { + await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + await api.core.readNamespacedSecret(buildkitAuthSecretName, namespace) + }) + + it("creates an empty docker auth secret if there are no imagePullSecrets", async () => { + const _provider = cloneDeep(provider) + _provider.config.imagePullSecrets = [] + + await ensureBuildkit({ + ctx, + provider: _provider, + log: garden.log, + api, + namespace, + }) + + const secret = await api.core.readNamespacedSecret(buildkitAuthSecretName, namespace) + const expectedConfig = await buildDockerAuthConfig([], api) + + const decoded = JSON.parse(Buffer.from(secret.data![dockerAuthSecretKey], "base64").toString()) + expect(decoded).to.eql(expectedConfig) + }) + + it("returns false if buildkit is already deployed", async () => { + await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + const deployed = await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + expect(deployed).to.be.false + }) + }) + + grouped("cluster-buildkit-rootless").context("cluster-buildkit-rootless mode", () => { + it("deploys in rootless mode", async () => { + try { + await api.apps.deleteNamespacedDeployment(buildkitDeploymentName, namespace) + } catch {} + + provider.config.clusterBuildkit = { rootless: true } + + await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + + const deployment = await api.apps.readNamespacedDeployment(buildkitDeploymentName, namespace) + + expect(deployment.spec.template.spec?.containers[0].securityContext?.runAsUser).to.equal(1000) + }) + + it("deploys again if switching from normal to rootless mode", async () => { + await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + + provider.config.clusterBuildkit = { rootless: true } + + const deployed = await ensureBuildkit({ + ctx, + provider, + log: garden.log, + api, + namespace, + }) + expect(deployed).to.be.true + }) + }) +}) diff --git a/core/test/unit/src/plugins/kubernetes/container/.gitignore b/core/test/unit/src/plugins/kubernetes/container/.gitignore new file mode 100644 index 0000000000..684ae5d80c --- /dev/null +++ b/core/test/unit/src/plugins/kubernetes/container/.gitignore @@ -0,0 +1 @@ +!build \ No newline at end of file diff --git a/core/test/unit/src/plugins/kubernetes/container/build.ts b/core/test/unit/src/plugins/kubernetes/container/build/kaniko.ts similarity index 97% rename from core/test/unit/src/plugins/kubernetes/container/build.ts rename to core/test/unit/src/plugins/kubernetes/container/build/kaniko.ts index 9874dbddc8..dc061b37e3 100644 --- a/core/test/unit/src/plugins/kubernetes/container/build.ts +++ b/core/test/unit/src/plugins/kubernetes/container/build/kaniko.ts @@ -10,7 +10,7 @@ import { kanikoBuildFailed, getKanikoFlags, DEFAULT_KANIKO_FLAGS, -} from "../../../../../../src/plugins/kubernetes/container/build/kaniko" +} from "../../../../../../../src/plugins/kubernetes/container/build/kaniko" import { expect } from "chai" describe("kaniko build", () => { diff --git a/core/test/unit/src/plugins/kubernetes/hot-reload.ts b/core/test/unit/src/plugins/kubernetes/hot-reload.ts index 787b787caa..97097b361a 100644 --- a/core/test/unit/src/plugins/kubernetes/hot-reload.ts +++ b/core/test/unit/src/plugins/kubernetes/hot-reload.ts @@ -9,7 +9,7 @@ import { platform } from "os" import { expect } from "chai" import td from "testdouble" -import { HotReloadableResource, RSYNC_PORT_NAME } from "../../../../../src/plugins/kubernetes/hot-reload/hot-reload" +import { HotReloadableResource } from "../../../../../src/plugins/kubernetes/hot-reload/hot-reload" import { setPlatform, makeTestGarden, TestGarden, getDataDir } from "../../../../helpers" import { ConfigGraph } from "../../../../../src/config-graph" @@ -21,6 +21,7 @@ import { makeCopyCommand, filesForSync, } from "../../../../../src/plugins/kubernetes/hot-reload/helpers" +import { rsyncPortName } from "../../../../../src/plugins/kubernetes/constants" describe("configureHotReload", () => { it("should correctly augment a resource manifest with containers and volume for hot reloading", async () => { @@ -99,7 +100,7 @@ describe("configureHotReload", () => { timeoutSeconds: 3, successThreshold: 1, failureThreshold: 5, - tcpSocket: { port: (RSYNC_PORT_NAME) }, + tcpSocket: { port: (rsyncPortName) }, }, volumeMounts: [ { diff --git a/core/test/unit/src/plugins/kubernetes/init.ts b/core/test/unit/src/plugins/kubernetes/init.ts index 90313fd02e..2851925e40 100644 --- a/core/test/unit/src/plugins/kubernetes/init.ts +++ b/core/test/unit/src/plugins/kubernetes/init.ts @@ -142,7 +142,7 @@ describe("prepareDockerAuth", () => { td.replace(api, "upsert") }) it("should merge both", async () => { - const res = await prepareDockerAuth(api, ctx, basicProvider, garden.log) + const res = await prepareDockerAuth(api, basicProvider, "default") const dockerAuth = jsonLoadBase64(res.data![dockerAuthSecretKey]) expect(dockerAuth).to.haveOwnProperty("auths") expect(dockerAuth.auths.myDockerRepo).to.equal("simple-auth") @@ -187,9 +187,10 @@ describe("prepareDockerAuth", () => { ) td.replace(api, "upsert") }) + it("should fail when both are missing", async () => { await expectError( - () => prepareDockerAuth(api, ctx, basicProvider, garden.log), + () => prepareDockerAuth(api, basicProvider, "default"), (e) => expect(e).to.be.instanceof(ConfigurationError) ) }) diff --git a/docs/guides/cloud-provider-setup.md b/docs/guides/cloud-provider-setup.md index 2661a8ff75..58a1cd01fa 100644 --- a/docs/guides/cloud-provider-setup.md +++ b/docs/guides/cloud-provider-setup.md @@ -145,13 +145,13 @@ Then, run garden --env=eks plugins kubernetes cluster-init ``` -and +and finally ```sh garden --env=eks deploy ``` -Now you should be good to go. +In order to set up in-cluster building with an ECR registry, please refer to the [In-cluster Building](./in-cluster-building.md) guide, and specifically the section on [using in-cluster building with ECR](./in-cluster-building.md#using-in-cluster-building-with-ecr). Note: In order to dynamically provision EBS/EFS volumes using `persistenvolumeclaim` modules, consult the [storage classes documentation](https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html) provided by AWS. diff --git a/docs/guides/in-cluster-building.md b/docs/guides/in-cluster-building.md index 256615faa0..8f159e8243 100644 --- a/docs/guides/in-cluster-building.md +++ b/docs/guides/in-cluster-building.md @@ -11,61 +11,155 @@ This guide assumes you've already read through the [Remote Kubernetes](./remote- ## Security considerations -First off, you should only use in-cluster building in development clusters! Production clusters should not run the -builder services for multiple reasons, both to do with resource and security concerns. +First off, you should only use in-cluster building in development and testing clusters! Production clusters should not run the builder services for multiple reasons, both to do with resource and security concerns. -You should also avoid using in-cluster building in clusters where you don't control/trust all the code being deployed, -i.e. multi-tenant setups (where tenants are external, or otherwise not fully trusted). +You should also avoid using in-cluster building in clusters where you don't control/trust all the code being deployed, i.e. multi-tenant setups (where tenants are external, or otherwise not fully trusted). -## Requirements +## General requirements -In-cluster building works with _most_ Kubernetes clusters, provided they have enough resources allocated. We have -tested on GKE, AKS, EKS, DigitalOcean, and some custom installations. +In-cluster building works with _most_ Kubernetes clusters, provided they have enough resources allocated and meet some basic requirements. We have tested it on GKE, AKS, EKS, DigitalOcean, and various other custom installations. -Specifically, the clusters need the following: +The specific requirements vary by the [_build mode_](#build-modes) used, and whether you're using the optional in-cluster registry or not. -- Support for `hostPort`, and for reaching `hostPort`s from the node/Kubelet. This should work out-of-the-box in most standard setups, but clusters using Cilium for networking may need to configure this specifically, for example. -- At least 2GB of RAM _on top of your own service requirements_. More RAM is strongly recommended if you have many concurrent developers or CI builds. -- Support for `PersistentVolumeClaim`s and enough disk space for layer caches and the in-cluster image registry. +In all cases you'll need at least 2GB of RAM _on top of your own service requirements_. More RAM is strongly recommended if you have many concurrent developers or CI builds. + +For the [`cluster-docker`](#cluster-docker) and [`kaniko`](#kaniko) modes, and the (optional) in-cluster image registry, support for `PersistentVolumeClaim`s is required, with enough disk space for layer caches and built images. The in-cluster registry also requires support for `hostPort`, and for reaching `hostPort`s from the node/Kubelet. This should work out-of-the-box in most standard setups, but clusters using Cilium for networking may need to configure this specifically, for example. You can—_and should_—adjust the allocated resources and storage in the provider configuration, under [resources](../reference/providers/kubernetes.md#providersresources) and [storage](../reference/providers/kubernetes.md#providersstorage). See the individual modes below as well for more information on how to allocate resources appropriately. +We also strongly recommend a separate image registry to use for built images. Garden can also—and does by default—deploy an in-cluster registry. The latter is convenient to test things out and may be fine for individual users or small teams. However, we generally recommend using managed container registries (such as ECR, GCR etc.) since they tend to perform better, they scale more easily, and don't need to be operated by your team. See the [Configuring a deployment registry](#configuring-a-deployment-registry) section for more details. + ## Build modes Garden supports multiple methods for building images and making them available to the cluster: -1. Cluster Docker -2. Kaniko -3. Local Docker +1. [**`kaniko`**](#kaniko) — Individual [Kaniko](https://github.com/GoogleContainerTools/kaniko) pods created for each build in the `garden-system` namespace. +2. [**`cluster-buildkit`**](#cluster-buildkit) _(experimental)_— A [BuildKit](https://github.com/moby/buildkit) deployment created for each project namespace. +3. [**`cluster-docker`**](#cluster-docker) — A single Docker daemon installed in the `garden-system` namespace and shared between users/deployments. +4. `local-docker` — Build using the local Docker daemon on the developer/CI machine before pushing to the cluster/registry. + +The `local-docker` mode is set by default. You should definitely use that when using _Docker for Desktop_, _Minikube_ and most other local development clusters. + +The other modes—which are why you're reading this guide—all build your images inside your development/testing cluster, so you don't need to run Docker on your machine, and avoid having to build locally and push build artifacts over the wire to the cluster for every change to your code. + +The remote building options each have some pros and cons. You'll find more details below but **here are our general recommendations** at the moment: + +- [**`kaniko`**](#kaniko) is a solid choice for most cases and is _currently our first recommendation_. It is battle-tested among Garden's most demanding users (including the Garden team itself). It also scales horizontally, since individual Pods are created for each build. +- [**`cluster-buildkit`**](#cluster-buildkit) is a new addition and is for now considered experimental, **but** we are hoping to make that the default in the future. Unlike the other options, which deploy cluster-wide services in the `garden-system` namespace, a [BuildKit](https://github.com/moby/buildkit) Deployment is dynamically created in each project namespace and requires no other cluster-wide services. This mode also offers a _rootless_ option, which runs without any elevated privileges, in clusters that support it. +- [**`cluster-docker`**](#cluster-docker) was the first implementation included with Garden. It's pretty quick and efficient for small team setups, but relies on a single Docker daemon for all users of a cluster, and also requires supporting services in `garden-system` and some operations to keep it from filling its data volume. It is *no longer recommended* and we may deprecate it in future releases. + +Let's look at how each mode works in more detail, and how you configure them: + +### kaniko + +This mode uses an individual [Kaniko](https://github.com/GoogleContainerTools/kaniko) Pod for each image build. + +The Kaniko project provides a compelling alternative to the standard Docker daemon because it can run without special privileges on the cluster, and is thus more secure. It may also scale better because it doesn't rely on a single daemon shared across users, so builds are executed in individual Pods and don't share the same resources of a single Pod. This also removes the need to provision another persistent volume, which the Docker daemon needs for its layer cache. + +In this mode, builds are executed as follows: + +1. Your code (build context) is synchronized to a sync service in the cluster, making it available to Kaniko pods. +2. A Kaniko pod is created for the build in the `garden-system` namespace. +3. Kaniko pulls caches from the [deployment registry](#configuring-a-deployment-registry), builds the image, and then pushes the built image back to the registry, which makes it available to the cluster. + +#### Comparison + +The trade-off compared to the [`cluster-docker`](#cluster-docker) is generally in performance, partly because it relies only on the Docker registry to cache layers, and has no local cache. There are also some occasional issues and incompatibilities, so your mileage may vary. + +Compared to [`cluster-buildkit`](#cluster-buildkit), Kaniko may be a bit slower because it has no local cache. It also requires cluster-wide services to be installed and operated, and for each user to have access to those services in the `garden-system` namespace, which can be a problem in some environments. It is however currently considered more "battle-tested", since the [`cluster-buildkit`](#cluster-buildkit) mode is a recent addition. + +#### Configuration and requirements + +Enable this by setting `buildMode: kaniko` in your `kubernetes` provider configuration, and running `garden plugins kubernetes cluster-init --env=` to install required cluster-wide service. -The _Cluster Docker_ and _Kaniko_ modes build container images inside your development cluster, so you don't need to -run Docker on your machine, and avoid having to build locally and push build artifacts over the wire to the cluster -for every change to your code. +By default, Garden will install an NFS volume provisioner into `garden-system` in order to be able to efficiently synchronize build sources to the cluster and then attaching those to the Kaniko pods. You can also [specify a storageClass](../reference/providers/kubernetes.md#providersstoragesyncstorageclass) to provide another _ReadWriteMany_ capable storage class to use instead of NFS. This may be advisable if your cloud provider provides a good alternative, or if you already have such a provisioner installed. -The _Local Docker_ mode is the default. You should definitely use that when using _Docker for Desktop_, _Minikube_ -and most other local development clusters, and also if you're using Garden to deploy to staging/production clusters -(more on [security considerations](#security-considerations) above). +Note the difference in how resources for the builder are allocated between Kaniko and the other modes. For this mode, the resource configuration applies to _each Kaniko pod_. See the [builder resources](../reference/providers/kubernetes.md#providersresourcesbuilder) reference for details. -Let's look at how each mode works, and how you configure them: +{% hint style="info" %} +If you're using ECR on AWS, you may need to create a cache repository manually for Kaniko to store caches. + +That is, if you have a repository like, `my-org/my-image`, you need to manually create a repository next to it called `my-org/my-image/cache`. + +You can also select a different name for the cache repository and pass the path to Kaniko via the `--cache-repo` flag, which you can set on the [`extraFlags`](../reference/providers/kubernetes.md#providerskanikoextraFlags) field. See [this GitHub comment](https://github.com/GoogleContainerTools/kaniko/issues/410#issuecomment-433229841) in the Kaniko repo for more details. + +This does not appear to be an issue for GCR on GCP. We haven't tested this on other container repositories. +{% endhint %} + +You can provide extra arguments to Kaniko via the [`extraFlags`](../reference/providers/kubernetes.md#providerskanikoextraFlags) field. Users with projects with a large number of files should take a look at the `--snapshoteMode=redo` and `--use-new-run` options as these can provide [significant performance improvements](https://github.com/GoogleContainerTools/kaniko/releases/tag/v1.0.0). Please refer to the [official docs](https://github.com/GoogleContainerTools/kaniko#additional-flags) for the full list of available flags. -### Cluster Docker +### cluster-buildkit -The Cluster Docker mode installs a standalone Docker daemon into your cluster, that is then used for builds across -all users of the clusters, along with a handful of other supporting services. Enable this mode by setting -`buildMode: cluster-docker` in your `kubernetes` provider configuration. +With this mode, a [BuildKit](https://github.com/moby/buildkit) Deployment is dynamically created in each project namespace to perform in-cluster builds. + +In this mode, builds are executed as follows: + +1. BuildKit is automatically deployed to the project namespace, if it hasn't already been deployed there. +2. Your code (build context) is synchronized directly to the BuildKit deployment. +3. BuildKit imports caches from the [deployment registry](#configuring-a-deployment-registry), builds the image, and then pushes the built image and caches back to the registry. + +#### Comparison + +_This mode is a recent addition and is still considered experimental_. **However**, the general plan is for this to become the recommended approach, because it has several benefits compared to the alternatives. + +- It requires **no cluster-wide services or permissions** to be managed, and thus no permissions outside of a single namespace for each user/project. +- By extension, operators/users **don't need to run a cluster initialization command** ahead of building and deploying projects. The BuildKit deployment is automatically installed and updated ahead of builds, as needed. +- It **does not rely on persistent volumes**. Other modes need to either install an NFS provisioner, or for a ReadWriteMany storage class to be provided and configured by the user. +- BuildKit offers a [rootless](https://github.com/moby/buildkit/blob/master/docs/rootless.md) mode (see below for how to enable it and some caveats). If it's supported on your cluster, this coupled with the per-namespace isolation, makes `cluster-buildkit` by far the most secure option. +- BuildKit is a very efficient builder, and uses a combination of local and registry-based caching, so it **should perform better than [`kaniko`](#kaniko)** in most cases, and for long-running namespaces as good as [`cluster-docker`](#cluster-docker). + +Beyond being less tested in the wild (for the moment), there are a couple of drawbacks to consider: + +- It doesn't scale quite as horizontally as Kaniko, since there is a single deployment per each project namespace, instead of a pod for every single build. +- The local cache is ephemeral, and local to each project namespace. This means users only share a cache at the registry level, much like with Kaniko. The [`cluster-docker`](#cluster-docker) daemon has a persistent local cache that is shared across a cluster (but in turn needs to be maintained and [cleaned up](#cleaning-up-cached-images)). The effect of this is most pronounced for short-lived namespaces, e.g. ones created in CI runs, where the local cache won't exist ahead of the builds. + +#### Configuration and requirements + +Enable this mode by setting `buildMode: cluster-buildkit` in your `kubernetes` provider configuration. Unlike other remote building modes, no further cluster-wide installation or initialization is required. + +In order to enable [rootless](https://github.com/moby/buildkit/blob/master/docs/rootless.md) mode, add the following to your `kubernetes` provider configuration: + +```yaml +clusterBuildkit: + rootless: false +``` + +*Note that not all clusters can currently support rootless operation, and that you may need to configure your cluster with this in mind. Please see the [BuildKits docs](https://github.com/moby/buildkit/blob/master/docs/rootless.md) for details.* + +You should also set the builder resource requests/limits. For this mode, the resource configuration applies to _each BuildKit deployment_, i.e. for _each project namespace_. See the [builder resources](../reference/providers/kubernetes.md#providersresourcesbuilder) reference for details. + +### cluster-docker + +The `cluster-docker` mode installs a standalone Docker daemon into your cluster, that is then used for builds across all users of the clusters, along with a handful of other supporting services. + +{% hint style="warning" %} +The `cluster-docker` build mode may be deprecated in an upcoming release. +{% endhint %} In this mode, builds are executed as follows: 1. Your code (build context) is synchronized to a sync service in the cluster, making it available to the Docker daemon. 2. A build is triggered in the Docker daemon. -3. The built image is pushed to an in-cluster registry (which is automatically installed), which makes it available to the cluster. +3. The built image is pushed to the [deployment registry](#configuring-a-deployment-registry), which makes it available to the cluster. + +#### Comparison + +The Docker daemon is of course tried and tested, and is an efficient builder. However, it's not designed with multi-tenancy and is a slightly awkward fit for the context of building images in a shared cluster. It also requires a fair bit of operation and several supporting services deployed along-side it in the `garden-system`  namespace. + +*As of now, we only recommend this option for certain scenarios, e.g. clusters serving individuals, small teams or other low-load setups.* -After enabling this mode (we currently still default to the `local-docker` mode), you will need to run `garden plugins kubernetes cluster-init --env=` for each applicable environment, in order to install the required cluster-wide services. Those services include the Docker daemon itself, as well as an image registry, a sync service for receiving build contexts, two persistent volumes, an NFS volume provisioner for one of those volumes, and a couple of small utility services. +#### Configuration and requirements -Optionally, you can also enable [BuildKit](https://github.com/moby/buildkit). In most cases, this should work well and be more performant, but remains optional for now. If you have `cluster-docker` set as your `buildMode` you can enable BuildKit for an environment as follows: +Enable this mode by setting `buildMode: cluster-docker` in your `kubernetes` provider configuration. + +After enabling this mode, you will need to run `garden plugins kubernetes cluster-init --env=` for each applicable environment, in order to install the required cluster-wide services. Those services include the Docker daemon itself, as well as an image registry, a sync service for receiving build contexts, two persistent volumes, an NFS volume provisioner for one of those volumes, and a couple of small utility services. + +By default, Garden will install an NFS volume provisioner into `garden-system` in order to be able to efficiently synchronize build sources to the cluster and then attaching those to the Kaniko pods. You can also [specify a storageClass](../reference/providers/kubernetes.md#providersstoragesyncstorageclass) to provide another _ReadWriteMany_ capable storage class to use instead of NFS. This may be advisable if your cloud provider provides a good alternative, or if you already have such a provisioner installed. + +Optionally, you can also enable [BuildKit](https://github.com/moby/buildkit) to be used by the Docker daemon. _This is not to be confused with the [`cluster-buildkit`](#cluster-buildkit) build mode, which doesn't use Docker at all._ In most cases, this should work well and offer a bit of added performance, but it remains optional for now. If you have `cluster-docker` set as your `buildMode` you can enable BuildKit for an environment by adding the following to your `kubernetes` provider configuration: ```yaml clusterDocker: @@ -76,41 +170,145 @@ Make sure your cluster has enough resources and storage to support the required services are shared across all users of the cluster. Please look at the [resources](../reference/providers/kubernetes.md#providersresources) and [storage](../reference/providers/kubernetes.md#providersstorage) sections in the provider reference for details. -### Kaniko +### Local Docker -This mode works _mostly_ the same way as Cluster Docker, but replaces the Docker daemon with [Kaniko](https://github.com/GoogleContainerTools/kaniko). Enable this by setting `buildMode: kaniko` in your `kubernetes` provider configuration, and running `garden plugins kubernetes cluster-init --env=` to install required cluster-wide service. +This is the default mode. It is the least efficient one for remote clusters, but requires no additional configuration or services to be deployed to the cluster. For remote clusters, you do however need to explicitly configure a [deployment registry](#configuring-a-deployment-registry), and obviously you'll need to have Docker running locally. -You can provide extra arguments to Kaniko via the [`extraFlags`](../reference/providers/kubernetes.md#providerskanikoextraFlags) field. Users with projects with a large number of files should take a look at the `--snapshoteMode=redo` and `--use-new-run` options as these can provide [significant performance improvements](https://github.com/GoogleContainerTools/kaniko/releases/tag/v1.0.0). Please refer to the [official docs](https://github.com/GoogleContainerTools/kaniko#additional-flags) for the full list of available flags. +See the [Local Docker builds](./remote-kubernetes.md) section in the Remote Clusters guide for details. -{% hint style="info" %} -If you're using ECR on AWS, you may need to create the cache repository manually. +## Configuring a deployment registry -That is, if you have a repository like, `my-org/my-image`, you need to manually create a repository next to it called `my-org/my-image/cache`. +To deploy a built image to a remote Kubernetes cluster, the image first needs to be pushed to a container registry that is accessible to the cluster. We refer to this as a _deployment registry_. Garden offers two options to handle this process: -You can also select a different name for the cache repository and pass the path to Kaniko via the `--cache-repo` flag, which you can set on the [`extraFlags`](../reference/providers/kubernetes.md#providerskanikoextraFlags) field. See [this GitHub comment](https://github.com/GoogleContainerTools/kaniko/issues/410#issuecomment-433229841) in the Kaniko repo for more details. +1. An in-cluster registry. +2. An external registry, e.g. a cloud provider managed registry like ECR or GCR. **(recommended)** -This did not appear to be the case for GCR on GCP. We haven't tested this on other container repositories. +The in-cluster registry is a simple way to get started with Garden that requires no configuration. To set it up, leave the `deploymentRegistry` field on the `kubernetes` provider config undefined, and run `garden plugins kubernetes cluster-init --env=` to install the registry. This is nice and convenient, but is _not a particularly good approach for clusters with many users or lots of builds_. When using the in-cluster registry you need to take care of [cleaning it up routinely](#cleaning-up-cached-images), and it may become a performance and redundancy bottleneck with many users and frequent (or heavy) builds. + +So, **for any scenario with a non-trivial amount of users and builds, we strongly suggest configuring a separate registry outside of your cluster.** If your cloud provider offers a managed option, that's usually a good choice. + +To configure a deployment registry, you need to specify at least the `deploymentRegistry` field on your `kubernetes` provider, and in many cases you also need to provide a Secret in order to authenticate with the registry via the `imagePullSecrets` field: + +```yaml +kind: Project +name: my-project +... +providers: + - name: kubernetes + ... + deploymentRegistry: + hostname: my-private-registry.com # <--- the hostname of your registry + namespace: my-project # <--- the namespace to use within your registry + imagePullSecrets: + - name: my-deployment-registry-secret # <--- the name and namespace of a valid Kubernetes imagePullSecret + namespace: default +``` + +Now say, if you specify `hostname: my-registry.com` and `namespace: my-project-id` for the `deploymentRegistry` field, and you have a container module named `some-module` in your project, it will be tagged and pushed to `my-registry.com/my-project-id/some-module:v:` after building. That image ID will be then used in Kubernetes manifests when running containers. + +For this to work, you in most cases also need to provide the authentication necessary for both the cluster to read the image and for the builder to push to the registry. We use the same format and mechanisms as Kubernetes _imagePullSecrets_ for this. See [this guide](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) for how to create the secret, **but keep in mind that for this context, the authentication provided must have write privileges to the configured registry and namespace.** + +See below for specific instruction for working with ECR. + +{% hint style="warning" %} +Note: If you're using the [`kaniko`](#kaniko) or [`cluster-docker`](#cluster-docker) build mode, you need to re-run `garden plugins kubernetes cluster-init` any time you add or modify imagePullSecrets, for them to work. {% endhint %} -The Kaniko project is still improving, but it provides a -compelling alternative to the standard Docker daemon because it can run without special privileges on the cluster, -and is thus more secure. It may also scale better because it doesn't rely on a single daemon shared across users, so -builds are executed in individual Pods and don't share the same resources of a single Pod. This also removes the need -to provision another persistent volume, which the Docker daemon needs for its layer cache. +### Using in-cluster building with ECR -The trade-off is generally in performance, at least for the moment, partly because it relies on the Docker registry to -cache layers. There are also some known issues and incompatibilities, so your mileage may vary. +For AWS ECR (Elastic Container Registry), you need to enable the ECR credential helper once for the repository by adding an `imagePullSecret` for you ECR repository. -Note the difference in how resources for the builder are allocated. See the -[builder resources](../reference/providers/kubernetes.md#providersresourcesbuilder) reference for details. +First create a `config.json` somewhere with the following contents (`` and `` are placeholders that you need to replace for your repo): -### Local Docker +```json +{ + "credHelpers": { + ".dkr.ecr..amazonaws.com": "ecr-login" + } +} +``` -This is the default mode. It is the least efficient one for remote clusters, but requires no additional services to be -deployed to the cluster. For remote clusters, you do however need to explicitly configure a _deployment registry_, and -to have Docker running locally. +Next create the _imagePullSecret_ in your cluster (feel free to replace the default namespace, just make sure it's correctly referenced in the config below): -See the [Local Docker builds](./remote-kubernetes.md) section in the Remote Clusters guide for details. +```sh +kubectl --namespace default create secret generic ecr-config \ + --from-file=.dockerconfigjson=./config.json \ + --type=kubernetes.io/dockerconfigjson +``` + +Finally, add the secret reference to your `kubernetes` provider configuration: + +```yaml +kind: Project +name: my-project +... +providers: + - name: kubernetes + ... + imagePullSecrets: + - name: ecr-config + namespace: default +``` + +### Using in-cluster building with GCR + +To use in-cluster building with GCR (Google Container Registry) you need to set up authentication, with the following steps: + +1. Create a Google Service Account (GSA). +2. Give the GSA the appropriate permissions. +3. Create a JSON key for the account. +4. Create an _imagePullSecret_ for using the JSON key. +5. Add a reference to the imagePullSecret in your Garden project configuration. + +First, create a Google Service Account: + +```sh +# You can replace the gcr-access name of course, but make sure you also replace it in the commands below +gcloud iam service-accounts create gcr-access --project ${PROJECT_ID} +``` + +Then, to grant the Google Service account the right permission to push to GCR, run the following GCR commands: + +```sh +# Create a role with the required permissions +gcloud iam roles create gcrAccess \ + --project ${PROJECT_ID} \ + --permissions=storage.objects.get,storage.objects.create,storage.objects.list,storage.objects.update,storage.objects.delete,storage.buckets.create,storage.buckets.get + +# Attach the role to the newly create Google Service Account +gcloud projects add-iam-policy-binding ${PROJECT_ID} \ + --member=serviceAccount:gcr-access@${PROJECT_ID}.iam.gserviceaccount.com \ + --role==projects/${PROJECT_ID}/roles/gcrAccess +``` + +Next create a JSON key file for the GSA: + +```sh +gcloud iam service-accounts keys create keyfile.json --iam-account gcr-access@${PROJECT_ID}.iam.gserviceaccount.com +``` + +Then prepare the _imagePullSecret_ in your Kubernetes cluster. Run the following command, if appropriate replacing `gcr.io` with the correct registry hostname (e.g. `eu.gcr.io` or `asia.gcr.io`): + +```sh +kubectl --namespace default create secret docker-registry gcr-config \ + --docker-server=gcr.io \ + --docker-username=_json_key \ + --docker-password="$(cat keyfile.json)" +``` + +Finally, add the created _imagePullSecret_ to your `kubernetes` provider configuration: + +```yaml +kind: Project +name: my-project +... +providers: + - name: kubernetes + ... + imagePullSecrets: + - name: gcr-config + namespace: default +``` ## Publishing images @@ -132,7 +330,7 @@ image: my-repo/my-image:v1.2.3 # <- omit the tag here if you'd like to use the ## Cleaning up cached images -In order to avoid disk-space issues in the cluster, the `kubernetes` provider exposes a utility command: +In order to avoid disk-space issues in the cluster when using the in-cluster registry and/or either of the [`kaniko`](#kaniko) or [`cluster-docker`](#cluster-docker) build modes, the `kubernetes` provider exposes a utility command: ```sh garden --env= plugins kubernetes cleanup-cluster-registry @@ -140,15 +338,17 @@ garden --env= plugins kubernetes cleanup-cluster-registry The command does the following: -1. Looks through all Pods in the cluster to see which images/tags are in use, and flags all other images as deleted in the in-cluster registry. +1. Looks through all Pods in the cluster to see which images/tags are in use, and flags all other images as deleted in the in-cluster registry and. 2. Restarts the registry in read-only mode. 3. Runs the registry garbage collection. 4. Restarts the registry again without the read-only mode. -5. When using the `cluster-docker` build mode, we additionally untag in the Docker daemon all images that are no longer in the registry, and then clean up the dangling image layers by running `docker image prune`. +5. When using the [`cluster-docker`](#cluster-docker) build mode, we additionally untag in the Docker daemon all images that are no longer in the registry, and then clean up the dangling image layers by running `docker image prune`. There are plans to do this automatically when disk-space runs low, but for now you can run this manually or set up your own cron jobs. +**You can avoid this entirely by using a remote [deployment registry](#configuring-a-deployment-registry) and the [`cluster-buildkit`](#cluster-buildkit) build mode.** + ## Pulling base images from private registries The in-cluster builder may need to be able to pull base images from a private registry, e.g. if your Dockerfile starts with something like this: @@ -177,27 +377,6 @@ providers: This registry auth secret will then be copied and passed to the in-cluster builder. You can specify as many as you like, and they will be merged together. -> Note: Any time you add or modify imagePullSecrets after first initializing your cluster, you need to run `garden plugins kubernetes cluster-init` again for them to work when pulling base images! - -## Using private registries for deployments - -You can also use your private registry to store images after building and for deployment. If you've completed the steps above for configuring your `imagePullSecrets`, you can also configure a `deploymentRegistry` in your provider configuration: - -```yaml -kind: Project -name: my-project -... -providers: - - name: kubernetes - ... - imagePullSecrets: - - name: my-registry-secret - namespace: default - deploymentRegistry: - hostname: my-private-registry.com - namespace: my-project # <--- make sure your configured imagePullSecrets can write to repos in this namespace -``` - -This is often more scalable than using the default in-cluster registry, and may fit better with existing deployment pipelines. Just make sure the configured `imagePullSecrets` have the privileges to push to repos in the configured namespace. - -For GKE, take a look at the [gke example project](https://github.com/garden-io/garden/tree/0.12.16/examples/gke)) to see the additional steps required to set up in-cluster building on GKE with Kaniko and GCR as a deployment registry. +{% hint style="warning" %} +Note: If you're using the [`kaniko`](#kaniko) or [`cluster-docker`](#cluster-docker) build mode, you need to re-run `garden plugins kubernetes cluster-init` any time you add or modify imagePullSecrets, for them to work when pulling base images! +{% endhint %} diff --git a/examples/gke/README.md b/examples/gke/README.md index 087a0c495c..d81e44cd89 100644 --- a/examples/gke/README.md +++ b/examples/gke/README.md @@ -1,8 +1,8 @@ # gke project -A variant on the `demo-project` example, with an example configuration for GKE with in-cluster building with Kaniko. +A variant on the `demo-project` example, with an example configuration for GKE with in-cluster building with Kaniko or BuildKit. -Two environments are configured, `gke-kaniko` and `gke-kaniko-gcr`. Both use Kaniko for in-cluster builds, but the latter uses GCR as a deployment registry (which is often preferable to deploying an in-cluster registry). +A few environments are configured, `gke-kaniko`, `gke-kaniko-gcr`, `gke-buildkit` and `gke-buildkit-gcr`. The first two use Kaniko for in-cluster builds, the last two use BuildKit. The ones with the `-gcr` suffix use GCR as the deployment registry, and the other ones use the basic in-cluster registry (which is simpler to set up but won't scale as well as using GCR). ## Setup @@ -28,6 +28,7 @@ gcloud alpha billing projects link $PROJECT_ID --billing-account= # Enable the required APIs (this can sometimes take a while). gcloud services enable compute.googleapis.com container.googleapis.com servicemanagement.googleapis.com --project $PROJECT_ID ``` + ### Step 2 - Create a GKE cluster (if you don't already have one) See the general GKE instructions [here](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-zonal-cluster). @@ -80,14 +81,16 @@ You'll need to replace the values under the `variables` keys in the `garden.yml` You can optionally set up an ingress controller in the cluster and point a DNS hostname to it, and set that under `variables.default-hostname`. -### Step 5 - Initialize the cluster +### Step 6 - Initialize the cluster (Kaniko only) -Install the cluster-wide services Garden needs by running: +When using Kaniko, you need to install the cluster-wide services Garden needs by running: ```sh garden plugins kubernetes cluster-init --env= ``` +_This is not necessary when using BuildKit._ + ## Usage ### Deploy your services @@ -96,5 +99,5 @@ Finally, to build and deploy your services to your new GKE cluster, run: ```sh # Choose which environment to deploy with the --env parameter -garden deploy --env= +garden deploy --env= ``` diff --git a/examples/gke/garden.yml b/examples/gke/garden.yml index 25ee66b22f..df3ff006af 100644 --- a/examples/gke/garden.yml +++ b/examples/gke/garden.yml @@ -4,25 +4,28 @@ environments: - name: gke-kaniko variables: buildMode: kaniko + # Use an in-cluster registry instead of GCR + deploymentRegistry: null imagePullSecrets: [] - name: gke-kaniko-gcr variables: buildMode: kaniko - deploymentRegistry: - # Replace these values as appropriate - hostname: eu.gcr.io # <- set this according to the region your cluster runs in - namespace: garden-dev-200012 # <- set this to the project ID of the target cluster - imagePullSecrets: - # Make sure this matches the name and namespace of the secret you created - - name: gcr-config - namespace: default + - name: gke-buildkit + variables: + buildMode: cluster-buildkit + # Use an in-cluster registry instead of GCR + deploymentRegistry: null + imagePullSecrets: [] + - name: gke-buildkit-gcr + variables: + buildMode: cluster-buildkit providers: - name: kubernetes context: ${var.gkeContext} namespace: ${var.namespace} defaultHostname: ${var.defaultHostname} buildMode: ${var.buildMode} - deploymentRegistry: ${var.deploymentRegistry}? # <- note the ? suffix, which allows this to be undefined + deploymentRegistry: ${var.deploymentRegistry} imagePullSecrets: ${var.imagePullSecrets} variables: # Replace these values as appropriate @@ -32,3 +35,11 @@ variables: defaultHostname: ${local.env.CIRCLE_BUILD_NUM || local.username}-gke.dev-1.sys.garden # > the namespace to deploy to in the cluster namespace: gke-testing-${local.env.CIRCLE_BUILD_NUM || local.username} + deploymentRegistry: + # Replace these values as appropriate + hostname: eu.gcr.io # <- set this according to the region your cluster runs in + namespace: garden-dev-200012 # <- set this to the project ID of the target cluster + imagePullSecrets: + # Make sure this matches the name and namespace of the imagePullSecret you've created + - name: gcr-config + namespace: default diff --git a/examples/hot-reload/garden.yml b/examples/hot-reload/garden.yml index 2480899bae..5685796dc8 100644 --- a/examples/hot-reload/garden.yml +++ b/examples/hot-reload/garden.yml @@ -11,4 +11,4 @@ providers: environments: [testing] context: gke_garden-dev-200012_europe-west1-b_garden-dev-1 defaultHostname: ${environment.namespace}.dev-1.sys.garden - buildMode: cluster-docker \ No newline at end of file + buildMode: cluster-buildkit \ No newline at end of file diff --git a/examples/kubernetes-secrets/garden.yml b/examples/kubernetes-secrets/garden.yml index fc9ea6d9a0..aef8fc71c6 100644 --- a/examples/kubernetes-secrets/garden.yml +++ b/examples/kubernetes-secrets/garden.yml @@ -11,4 +11,4 @@ providers: environments: [testing] context: gke_garden-dev-200012_europe-west1-b_garden-dev-1 defaultHostname: ${environment.namespace}.dev-1.sys.garden - buildMode: cluster-docker + buildMode: kaniko diff --git a/examples/project-variables/garden.yml b/examples/project-variables/garden.yml index 33695448ab..67b18cd850 100644 --- a/examples/project-variables/garden.yml +++ b/examples/project-variables/garden.yml @@ -17,4 +17,4 @@ providers: environments: [testing] context: gke_garden-dev-200012_europe-west1-b_garden-dev-1 defaultHostname: ${environment.namespace}.dev-1.sys.garden - buildMode: cluster-docker + buildMode: kaniko diff --git a/examples/vote-helm/garden.yml b/examples/vote-helm/garden.yml index df4275a261..55a78a04cf 100644 --- a/examples/vote-helm/garden.yml +++ b/examples/vote-helm/garden.yml @@ -14,6 +14,6 @@ providers: - name: kubernetes environments: [testing] context: gke_garden-dev-200012_europe-west1-b_garden-dev-1 - buildMode: cluster-docker + buildMode: kaniko variables: userId: ${local.env.CIRCLE_BUILD_NUM || local.username} diff --git a/examples/vote/garden.yml b/examples/vote/garden.yml index c7942de22c..ddd671ed64 100644 --- a/examples/vote/garden.yml +++ b/examples/vote/garden.yml @@ -23,9 +23,7 @@ providers: - name: kubernetes environments: [testing] context: ${var.remoteContext} - buildMode: cluster-docker - clusterDocker: - enableBuildKit: true + buildMode: cluster-buildkit variables: userId: ${local.env.CIRCLE_BUILD_NUM || local.username} remoteContext: gke_garden-dev-200012_europe-west1-b_garden-dev-1 diff --git a/images/README.md b/images/README.md index 38341521e2..353bb69783 100644 --- a/images/README.md +++ b/images/README.md @@ -1,3 +1,5 @@ # images Here we place container images that we maintain and reference when using Garden. + +To build, just use `garden build`, and to publish, `garden publish`. diff --git a/images/buildkit/Dockerfile b/images/buildkit/Dockerfile new file mode 100644 index 0000000000..29987ffab9 --- /dev/null +++ b/images/buildkit/Dockerfile @@ -0,0 +1,20 @@ +ARG BASE_IMAGE_SUFFIX +ARG BUILDKIT_VERSION=v0.8.1 +FROM moby/buildkit:${BUILDKIT_VERSION} as deps + +RUN apk add --no-cache curl + +# ECR credential helper +RUN cd /tmp && \ + curl -O https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.4.0/linux-amd64/docker-credential-ecr-login && \ + chmod +x docker-credential-ecr-login + +# GCR credential helper +RUN curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/releases/download/v2.0.1/docker-credential-gcr_linux_amd64-2.0.1.tar.gz" \ + | tar xz --to-stdout ./docker-credential-gcr \ + > /tmp/docker-credential-gcr && chmod +x /tmp/docker-credential-gcr + +FROM moby/buildkit:${BUILDKIT_VERSION}${BASE_IMAGE_SUFFIX} as output + +COPY --from=deps /tmp/docker-credential-ecr-login /usr/local/bin/docker-credential-ecr-login +COPY --from=deps /tmp/docker-credential-gcr /usr/local/bin/docker-credential-gcr diff --git a/images/buildkit/garden.yml b/images/buildkit/garden.yml new file mode 100644 index 0000000000..593a6ca670 --- /dev/null +++ b/images/buildkit/garden.yml @@ -0,0 +1,21 @@ +kind: Module +type: container +name: buildkit +description: Used for the cluster-buildkit build mode in the kubernetes provider +image: gardendev/buildkit:v0.8.1-4 +dockerfile: Dockerfile +build: + targetImage: output +--- +kind: Module +type: container +name: buildkit-rootless +description: Used for the cluster-buildkit build mode in the kubernetes provider, rootless variant +image: gardendev/buildkit:v0.8.1-4-rootless +dockerfile: Dockerfile +build: + dependencies: + - buildkit + targetImage: output +buildArgs: + BASE_IMAGE_SUFFIX: "-rootless" diff --git a/images/k8s-util/Dockerfile b/images/k8s-util/Dockerfile new file mode 100644 index 0000000000..d48d86e398 --- /dev/null +++ b/images/k8s-util/Dockerfile @@ -0,0 +1,14 @@ +FROM danifernandezs/skopeo:1.41.0-alpine3.10.3 + +RUN apk add --no-cache curl rsync +RUN cd /usr/local/bin && \ + curl -O https://amazon-ecr-credential-helper-releases.s3.us-east-2.amazonaws.com/0.4.0/linux-amd64/docker-credential-ecr-login && \ + chmod +x docker-credential-ecr-login + +RUN adduser -g 1000 -D user && \ + mkdir -p /data && \ + chown -R user:user /data + +USER user + +ADD rsync-server.sh / diff --git a/images/k8s-util/garden.yml b/images/k8s-util/garden.yml new file mode 100644 index 0000000000..02d39ef4bd --- /dev/null +++ b/images/k8s-util/garden.yml @@ -0,0 +1,6 @@ +kind: Module +type: container +name: k8s-util +description: Used by the kubernetes provider for build-related activities +image: gardendev/k8s-util:0.3.1 +dockerfile: Dockerfile diff --git a/images/k8s-util/rsync-server.sh b/images/k8s-util/rsync-server.sh new file mode 100755 index 0000000000..cda0f066dc --- /dev/null +++ b/images/k8s-util/rsync-server.sh @@ -0,0 +1,24 @@ +#!/bin/sh +PORT=${RSYNC_PORT:-"873"} +VOLUME=${VOLUME:-/data} +ALLOW=${ALLOW:-192.168.0.0/16 172.16.0.0/12} + +mkdir -p ${VOLUME} +mkdir -p ${VOLUME}/tmp + +cat < /home/user/rsyncd.conf +uid = 1000 +#gid = 1000 +use chroot = no +log file = /dev/stdout +reverse lookup = no +munge symlinks = yes +[volume] + hosts deny = * + hosts allow = ${ALLOW} + read only = false + path = ${VOLUME} + comment = build context volume +EOF + +exec /usr/bin/rsync --no-detach --port=${PORT} --daemon --config /home/user/rsyncd.conf diff --git a/images/support-images.garden.yml b/images/support-images.garden.yml new file mode 100644 index 0000000000..01b60a3d80 --- /dev/null +++ b/images/support-images.garden.yml @@ -0,0 +1,6 @@ +kind: Project +name: garden-support-images +environments: + - name: local +providers: + - name: container