Skip to content

Commit

Permalink
fix(k8s): issues with GCR auth when running in-cluster builds on GKE
Browse files Browse the repository at this point in the history
This adds a new `gke` example project, with instructions on how to set
up GCR auth with Kaniko. The procedure is unfortunately a bit involved,
but it does follow the suggested best practice in the Kaniko docs:
https://github.com/GoogleContainerTools/kaniko#pushing-to-gcr-using-workload-identity
  • Loading branch information
edvald authored and thsig committed Feb 4, 2021
1 parent e81b7b4 commit 1d01ed6
Show file tree
Hide file tree
Showing 28 changed files with 372 additions and 17 deletions.
9 changes: 7 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ workflows:
<<: *only-internal-prs
context: docker
requires: [build-dist]

- test-docker-gcloud:
<<: *only-internal-prs
context: docker
Expand All @@ -799,12 +800,10 @@ workflows:
<<: *only-internal-prs
requires: [build-dist]
- test-windows:
# Don't attempt to run dist tests for external PRs (they won't have access to the required keys)
<<: *only-internal-prs
requires: [build-dist]

- e2e-project:
# Don't attempt to run e2e tests for external PRs (they won't have access to the required keys)
<<: *only-internal-prs
name: e2e-demo-project
project: demo-project
Expand All @@ -815,6 +814,12 @@ workflows:
name: e2e-deployment-strategies
project: deployment-strategies
requires: [build]
- e2e-project:
<<: *only-internal-prs
name: e2e-gke-kaniko-gcr
project: gke
environment: gke-kaniko-gcr
requires: [build]
- e2e-project:
<<: *only-internal-prs
name: e2e-hot-reload
Expand Down
6 changes: 3 additions & 3 deletions core/src/plugins/kubernetes/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ const crudMap = {
group: "core",
read: "readNamespacedSecret",
create: "createNamespacedSecret",
patch: "patchNamespacedSecret",
replace: "replaceNamespacedSecret",
delete: "deleteNamespacedSecret",
},
}
Expand Down Expand Up @@ -485,7 +485,7 @@ export class KubeApi {

try {
await api[crudMap[kind].read](name, namespace)
await api[crudMap[kind].patch](name, namespace, obj)
await api[crudMap[kind].replace](name, namespace, obj)
log.debug(`Patched ${kind} ${namespace}/${name}`)
} catch (err) {
if (err.statusCode === 404) {
Expand All @@ -495,7 +495,7 @@ export class KubeApi {
} catch (err) {
if (err.statusCode === 409) {
log.debug(`Patched ${kind} ${namespace}/${name}`)
await api[crudMap[kind].patch](name, namespace, obj)
await api[crudMap[kind].replace](name, namespace, obj)
} else {
throw err
}
Expand Down
11 changes: 7 additions & 4 deletions core/src/plugins/kubernetes/container/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ const buildStatusHandlers: { [mode in ContainerBuildMode]: BuildStatusHandler }
})
return { ready: true }
} catch (err) {
const res = err.detail.result
const res = err.detail?.result

// Non-zero exit code can both mean the manifest is not found, and any other unexpected error
if (res.exitCode !== 0 && !res.stderr.includes("no such manifest")) {
Expand Down Expand Up @@ -187,12 +187,15 @@ const buildStatusHandlers: { [mode in ContainerBuildMode]: BuildStatusHandler }
})
return { ready: true }
} catch (err) {
const res = err.detail.result
const res = err.detail?.result || {}

// Non-zero exit code can both mean the manifest is not found, and any other unexpected error
if (res.exitCode !== 0 && !res.stderr.includes("manifest unknown")) {
throw new RuntimeError(`Unable to query registry for image status: ${res.all}`, {
const output = res.allLogs || err.message

throw new RuntimeError(`Unable to query registry for image status: ${output}`, {
command: skopeoCommand,
output: res.all,
output,
})
}
return { ready: false }
Expand Down
14 changes: 13 additions & 1 deletion core/src/plugins/kubernetes/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ See https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-
a registry auth secret.
`

// Used to automatically support GCR auth on GKE.
// Users can override by setting other values for any of these keys in any of their imagePullSecrets.
const defaultCredHelpers = {
"asia.gcr.io": "gcr",
"eu.gcr.io": "gcr",
"gcr.io": "gcr",
"marketplace.gcr.io": "gcr",
"staging-k8s.gcr.io": "gcr",
"us.gcr.io": "gcr",
}

interface KubernetesProviderOutputs extends PrimitiveMap {
"app-namespace": string
"metadata-namespace": string
Expand Down Expand Up @@ -282,6 +293,7 @@ export async function prepareSystem({

// Set auth secret for in-cluster builder
if (provider.config.buildMode !== "local-docker") {
log.info("Updating builder auth secret")
const authSecret = await prepareDockerAuth(sysApi, ctx, sysProvider, log)
await sysApi.upsert({ kind: "Secret", namespace: systemNamespace, obj: authSecret, log })
}
Expand Down Expand Up @@ -490,7 +502,7 @@ export async function buildDockerAuthConfig(
credHelpers: { ...accumulator.credHelpers, ...decoded.credHelpers },
}
},
{ experimental: "enabled", auths: {}, credHelpers: {} }
{ experimental: "enabled", auths: {}, credHelpers: defaultCredHelpers }
)
}

Expand Down
2 changes: 1 addition & 1 deletion core/src/plugins/kubernetes/kubernetes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export async function configureProvider({
}

if (config.buildMode === "cluster-docker" || config.buildMode === "kaniko") {
config._systemServices.push("build-sync")
config._systemServices.push("build-sync", "util")

const usingInClusterRegistry =
!config.deploymentRegistry || config.deploymentRegistry.hostname === inClusterRegistryHostname
Expand Down
2 changes: 1 addition & 1 deletion core/src/plugins/kubernetes/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import { getChartPath, renderHelmTemplateString } from "./helm/common"
import { HotReloadableResource } from "./hot-reload/hot-reload"
import { ProviderMap } from "../../config/provider"

export const skopeoImage = "gardendev/skopeo:1.41.0-1"
export const skopeoImage = "gardendev/skopeo:1.41.0-2"

const STATIC_LABEL_REGEX = /[0-9]/g
export const workloadTypes = ["Deployment", "DaemonSet", "ReplicaSet", "StatefulSet"]
Expand Down
4 changes: 4 additions & 0 deletions docs/guides/cloud-provider-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ name: your-project

Run `garden --env=remote plugins kubernetes cluster-init`, then `garden dev --env=remote`. Now you should be good to go.

### Optional: Use in-cluster building with GCR and Kaniko

Take a look at the [gke example project](https://github.com/garden-io/garden/tree/master/examples/gke)) to see the additional steps required to set up in-cluster building on GKE with Kaniko and GCR as a deployment registry.

### Optional: Configure DNS

First, get the public IP address of the ingress controller you set up in the previous step. If you configured Garden to set up _nginx_, run: `kubectl describe service --namespace=garden-system garden-nginx-ingress-nginx-controller | grep 'LoadBalancer Ingress'` and make note of returned IP address.
Expand Down
2 changes: 2 additions & 0 deletions docs/guides/in-cluster-building.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,5 @@ providers:
```

This is often more scalable than using the default in-cluster registry, and may fit better with existing deployment pipelines. Just make sure the configured `imagePullSecrets` have the privileges to push to repos in the configured namespace.

For GKE, take a look at the [gke example project](https://github.com/garden-io/garden/tree/master/examples/gke)) to see the additional steps required to set up in-cluster building on GKE with Kaniko and GCR as a deployment registry.
102 changes: 102 additions & 0 deletions examples/gke/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# gke project

A variant on the `demo-project` example, with an example configuration for GKE with in-cluster building with Kaniko.

Two environments are configured, `gke-kaniko` and `gke-kaniko-gcr`. Both use Kaniko for in-cluster builds, but the latter uses GCR as a deployment registry (which is often preferable to deploying an in-cluster registry).

## Setup

### Step 1 - Install the Google Cloud SDK and authenticate

If you haven't already, follow the instructions [here](https://cloud.google.com/sdk/docs/quickstarts) to install the `gcloud` tool, and authenticate with GCP:

```sh
gcloud auth application-default login
```

### Step 2 - Set up a GCP project

Choose a project ID for the demo project and run the following (skip individual steps as appropriate):

```sh
export PROJECT_ID=<id>
# (Skip if you already have a project)
gcloud projects create $PROJECT_ID
# If you haven't already, enable billing for the project (required for the APIs below).
# You need an account ID (of the form 0X0X0X-0X0X0X-0X0X0X) to use for billing.
gcloud alpha billing projects link $PROJECT_ID --billing-account=<account ID>
# Enable the required APIs (this can sometimes take a while).
gcloud services enable compute.googleapis.com container.googleapis.com servicemanagement.googleapis.com --project $PROJECT_ID
```

### Step 2 - Create a GKE cluster (if you don't already have one)

If you don't already have a GKE cluster to work with, you can create one like this:

```sh
# Replace the cluster name as you see fit, of course.
# The --workload-pool flag makes sure Workload Identity is enabled for the cluster.
gcloud container clusters create garden-gke-example --workload-pool=${PROJECT_ID}.svc.id.goog
```

You can of course also use the GKE console to do this or add many configuration parameters with the command line, **just make sure _Workload Identity_ is enabled when you create the cluster** (note the `--workload-pool` flag in the above example). See the general GKE instructions [here](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-zonal-cluster).

### Step 3 - Configure Workload Identity

For Kaniko to be able to seamlessly authenticate with your GCR registry, you need to use _Workload Identity_ and give the service account in the `garden-system` namespace access to the GCR registry through that.

To quote the [Kaniko docs](https://github.com/GoogleContainerTools/kaniko#pushing-to-gcr-using-workload-identity) on the subject:

> To authenticate using workload identity you need to run the kaniko pod using a Kubernetes Service Account (KSA) bound to Google Service Account (GSA) which has Storage.Admin permissions to push images to Google Container registry.
In our case, we will use the existing `default` service account in the `garden-system` namespace.

Follow these steps to set all this up:

#### Make sure Workload Identity is enabled for your cluster

If you're using an existing cluster, please see the GKE docs for how to [enable Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#enable_on_cluster).
You'll need the cluster to have Workload Identity enabled, and for your node pools to have it enabled as well.

#### Create and configure a Google Service Account (GSA)

Please follow the detailed steps [here](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#authenticating_to) **(noting that you should skip creating a new Kubernetes service account and instead attach to the `default` service account in the `garden-system` namespace)** to create a Google Service Account and an IAM policy binding between the GSA and the Kubernetes Service account to allow it to act as the Google service account.

Then, to grant the Google Service account the right permission to push to GCR, run the following GCR commands (replacing `[google-service-account-name]` with your new GSA name):

```sh
# Create a role with the required permissions
gcloud iam roles create gcrAccess \
--project ${PROJECT_ID} \
--permissions=storage.objects.get,storage.objects.create,storage.objects.list,storage.objects.update,storage.objects.delete,storage.buckets.create,storage.buckets.get

# Attach the role to the newly create Google Service Account
gcloud projects add-iam-policy-binding ${PROJECT_ID} \
--member=serviceAccount:[google-service-account-name]@${PROJECT_ID}.iam.gserviceaccount.com \
--role==projects/${PROJECT_ID}/roles/gcrAccess
```

### Step 4 - Set the variables in the project config

Simply replace the values under the `variables` keys in the `garden.yml` file, as instructed in the comments in the file.

You can optionally set up an ingress controller in the cluster and point a DNS hostname to it, and set that under `variables.default-hostname`.

### Step 5 - Initialize the cluster

Install the cluster-wide services Garden needs by running:

```sh
garden plugins kubernetes cluster-init --env=<gke-kaniko|gke-kaniko-gcr>
```

## Usage

### Deploy your services

Finally, to build and deploy your services to your new GKE cluster, run:

```sh
# Choose which environment to deploy with the --env parameter
garden deploy --env=<gke-kaniko|gke-kaniko-gcr>
```
4 changes: 4 additions & 0 deletions examples/gke/backend/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node_modules
Dockerfile
garden.yml
app.yaml
27 changes: 27 additions & 0 deletions examples/gke/backend/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so

# Folders
_obj
_test

# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out

*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*

_testmain.go

*.exe
*.test
*.prof

.vscode/settings.json
webserver/*server*
11 changes: 11 additions & 0 deletions examples/gke/backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM golang:1.8.3-alpine

ENV PORT=8080
EXPOSE ${PORT}
WORKDIR /app

COPY main.go .

RUN go build -o main .

ENTRYPOINT ["./main"]
17 changes: 17 additions & 0 deletions examples/gke/backend/garden.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
kind: Module
name: backend
description: Backend service container
type: container
services:
- name: backend
ports:
- name: http
containerPort: 8080
# Maps service:80 -> container:8080
servicePort: 80
ingresses:
- path: /hello-backend
port: http
tasks:
- name: test
command: ["sh", "-c", "echo task output"]
17 changes: 17 additions & 0 deletions examples/gke/backend/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package main

import (
"fmt"
"net/http"
)

func handler(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, "Hello from Go!")
}

func main() {
http.HandleFunc("/hello-backend", handler)
fmt.Println("Server running...")

http.ListenAndServe(":8080", nil)
}
4 changes: 4 additions & 0 deletions examples/gke/frontend/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node_modules
Dockerfile
garden.yml
app.yaml
12 changes: 12 additions & 0 deletions examples/gke/frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM node:9-alpine

ENV PORT=8080
EXPOSE ${PORT}
WORKDIR /app

COPY package.json /app
RUN npm install

COPY . /app

CMD ["npm", "start"]
27 changes: 27 additions & 0 deletions examples/gke/frontend/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const express = require('express');
const request = require('request-promise')
const app = express();

const backendServiceEndpoint = `http://backend/hello-backend`

app.get('/hello-frontend', (req, res) => res.send('Hello from the frontend!'));

app.get('/call-backend', (req, res) => {
// Query the backend and return the response
request.get(backendServiceEndpoint)
.then(message => {
message = `Backend says: '${message}'`
res.json({
message,
})
})
.catch(err => {
res.statusCode = 500
res.json({
error: err,
message: "Unable to reach service at " + backendServiceEndpoint,
})
});
});

module.exports = { app }

0 comments on commit 1d01ed6

Please sign in to comment.