Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,17 @@ docker-build: test ## Build docker image with the manager.
docker-push: ## Push docker image with the manager.
docker push ${IMG}

# also generates a placeholder cert for the webhook - this cert is not intended to be valid
.PHONY: build-deploy
build-deploy: ## Create a deployment file that can be applied with `kubectl apply -f deploy.yaml`
cd config/manager && kustomize edit set image controller=${ECRIMAGES}
kustomize build config/default > deploy.yaml
openssl req -x509 -nodes -days 1 -newkey rsa:2048 -keyout tls.key -out tls.crt -subj "/CN=not-a-real-cn/O=not-a-real-o" > /dev/null 2>&1
$(eval export KEY_B64 := $(shell cat tls.key | base64))
$(eval export CERT_B64 := $(shell cat tls.crt | base64))
yq -i e '(.[] as $$item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.crt") = env(CERT_B64)' deploy.yaml 2>&1
yq -i e '(.[] as $$item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.key") = env(KEY_B64)' deploy.yaml 2>&1
rm tls.key tls.crt

.PHONY: manifest
manifest: ## Generate CRD manifest
Expand Down Expand Up @@ -144,3 +151,20 @@ api-reference: ## Update documentation in docs/api-reference.md
docs:
mkdir -p site
mkdocs build

# NB webhook tests can only run if the controller is deployed to the cluster
webhook-e2e-test-namespace := "webhook-e2e-test"

.PHONY: webhook-e2e-test
webhook-e2e-test:
@kubectl create namespace $(webhook-e2e-test-namespace) > /dev/null 2>&1 || true # ignore already exists error
LOG_LEVEL=debug
cd test && go test \
-p 1 \
-count 1 \
-timeout 10m \
-v \
./suites/webhook/... \
--ginkgo.focus="${FOCUS}" \
--ginkgo.skip="${SKIP}" \
--ginkgo.v
31 changes: 25 additions & 6 deletions cmd/aws-application-networking-k8s/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ package main

import (
"flag"
"os"

"github.com/aws/aws-application-networking-k8s/pkg/webhook"
"github.com/go-logr/zapr"
"go.uber.org/zap/zapcore"
"os"
k8swebhook "sigs.k8s.io/controller-runtime/pkg/webhook"

"github.com/aws/aws-application-networking-k8s/pkg/aws"
"github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog"
Expand Down Expand Up @@ -49,7 +50,6 @@ import (
"github.com/aws/aws-application-networking-k8s/pkg/config"
"github.com/aws/aws-application-networking-k8s/pkg/k8s"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)

var (
Expand Down Expand Up @@ -128,14 +128,24 @@ func main() {
setupLog.Fatal("cloud client setup failed: %s", err)
}

// do not create the webhook server when running locally
var webhookServer k8swebhook.Server
isLocalDev := config.DevMode != ""
if !isLocalDev {
webhookServer = k8swebhook.NewServer(k8swebhook.Options{
Port: 9443,
CertDir: "/etc/webhook-cert/",
CertName: "tls.crt",
KeyName: "tls.key",
})
}
Copy link
Contributor

@zijun726911 zijun726911 Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

~~Where this dns name come from webhook-service.{namespace}.svc ? Does k8s defined that or we define that? do we need to pass it when start the webhookServer? ~~

oh, nvm, I saw this part:

kind: Service
metadata:
  name: webhook-service
  namespace: aws-application-networking-system
spec:
  ports:
    - port: 443
      targetPort: 9443
  selector:
    control-plane: gateway-api-controller


Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When using make run, DevMode will be non-empty, so locally running the controller will skip this webhook configuration. Otherwise, the controller will only start if the webhook cert files are present (at /etc/webhook-cert/tls.crt and tls.key).

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: metricsAddr,
},
WebhookServer: webhook.NewServer(webhook.Options{
Port: 9443,
}),
WebhookServer: webhookServer,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "amazon-vpc-lattice.io",
Expand All @@ -144,6 +154,15 @@ func main() {
setupLog.Fatal("manager setup failed:", err)
}

if !isLocalDev {
// register webhook handlers
readinessGateInjector := webhook.NewPodReadinessGateInjector(
mgr.GetClient(),
log.Named("pod-readiness-gate-injector"),
)
webhook.NewPodMutator(scheme, readinessGateInjector).SetupWithManager(mgr)
}

finalizerManager := k8s.NewDefaultFinalizerManager(mgr.GetClient())

// parent logging scope for all controllers
Expand Down
63 changes: 4 additions & 59 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
# Adds namespace to all resources.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much of this commented code came from the aws-load-balancer-controller. I've tried to remove what we don't need. One notable difference is that the gateway controller enables the webhook configuration by default, even though you still have to opt into it for any given namespace.

#namespace: code-aws-application-networking-system

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
#namePrefix: code-

# Labels to add to all resources and selectors.
#commonLabels:
# someName: someValue

bases:
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../crds
- ../rbac
- ../manager
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- ../webhook
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
- ../webhook
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus

Expand All @@ -33,42 +17,3 @@ patchesStrategicMerge:
# Mount the controller config file for loading manager configurations
# through a ComponentConfig type
#- manager_config_patch.yaml

# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- manager_webhook_patch.yaml

# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'.
# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks.
# 'CERTMANAGER' needs to be enabled to use ca injection
#- webhookcainjection_patch.yaml

# the following config is for teaching kustomize how to do var substitution
vars:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
#- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
# objref:
# kind: Certificate
# group: cert-manager.io
# version: v1
# name: serving-cert # this name should match the one in certificate.yaml
# fieldref:
# fieldpath: metadata.namespace
#- name: CERTIFICATE_NAME
# objref:
# kind: Certificate
# group: cert-manager.io
# version: v1
# name: serving-cert # this name should match the one in certificate.yaml
#- name: SERVICE_NAMESPACE # namespace of the service
# objref:
# kind: Service
# version: v1
# name: webhook-service
# fieldref:
# fieldpath: metadata.namespace
#- name: SERVICE_NAME
# objref:
# kind: Service
# version: v1
# name: webhook-service
21 changes: 21 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,26 @@ spec:
requests:
cpu: 10m
memory: 64Mi
volumeMounts:
- mountPath: /etc/webhook-cert
name: webhook-cert
readOnly: true
serviceAccountName: gateway-api-controller
terminationGracePeriodSeconds: 10
volumes:
- name: webhook-cert
secret:
defaultMode: 420
secretName: webhook-cert
---
Copy link
Contributor Author

@erikfuller erikfuller Feb 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This placeholder certificate is generated during make build-deploy:

When calling a webhook, the API server performs the following validations:

  1. Cert validity window has not passed
  2. SAN matches DNS (e.g. webhook-service.aws-application-networking-system.svc)
  3. Cert is signed by a known authority OR matches the ca field on the client configuration for the webhook (either signer or direct match)

The placeholder cert is valid for 1 day (will pass check 1 for 1 day) but will fail checks 2 and 3 with this message (from kubectl get events):

 Error creating: Internal error occurred: failed calling webhook "mpod.gwc.k8s.aws": failed to call webhook: Post "https://webhook-service.aws-application-networking-system.svc:443/mutate-pod?timeout=10s": tls: failed to verify certificate: x509: certificate is not valid for any names, but wanted to match webhook-service.aws-application-networking-system.svc

Having a placeholder here is important because the controller's webhook server needs this value to start up successfully. The webhook server does not perform strict validation.

Once the secret is patched with a valid cert, Kubernetes will distribute the value and the webhook server will be notified when the files are updated on the volume mount. See controller-runtime/pkg/webhook/server.go for more details.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the webhook validation don't pass, does the controller still work?(i.e., for only the placeholder cert, can it still enter the (r *routeReconciler) Reconcile() function for a incomming HTTPRoute resource? )

# placeholder secret so volume can mount successfully and controller can start
# populated during make-deploy. Will not pass validations (no CA, expires after 1 day, wrong DNS names)
apiVersion: v1
kind: Secret
metadata:
name: webhook-cert
namespace: aws-application-networking-system
type: kubernetes.io/tls
data:
tls.crt: Cg==
tls.key: Cg==
2 changes: 2 additions & 0 deletions config/prometheus/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- monitor.yaml
2 changes: 2 additions & 0 deletions config/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# All RBAC will be applied under this service account in
# the deployment namespace. You may comment out this resource
Expand Down
4 changes: 4 additions & 0 deletions config/webhook/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- manifests.yaml
50 changes: 50 additions & 0 deletions config/webhook/manifests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:
creationTimestamp: null
name: aws-appnet-gwc-mutating-webhook
webhooks:
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: aws-application-networking-system
path: /mutate-pod
failurePolicy: Fail
name: mpod.gwc.k8s.aws
rules:
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
resources:
- pods
sideEffects: None
namespaceSelector:
matchExpressions:
- key: application-networking.k8s.aws/pod-readiness-gate-inject
operator: In
values:
- enabled
objectSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: NotIn
values:
- gateway-api-controller
---
apiVersion: v1
kind: Service
metadata:
name: webhook-service
namespace: aws-application-networking-system
spec:
ports:
- port: 443
targetPort: 9443
selector:
control-plane: gateway-api-controller
Loading