diff --git a/common/ambassador/base/service.yaml b/common/ambassador/base/service.yaml index 1125d583826..440fe38e46f 100644 --- a/common/ambassador/base/service.yaml +++ b/common/ambassador/base/service.yaml @@ -17,6 +17,9 @@ spec: apiVersion: v1 kind: Service metadata: + annotations: + # Ambassador is only used on GCP with basic auth. + beta.cloud.google.com/backend-config: '{"ports": {"ambassador":"basicauth-backendconfig"}}' labels: service: ambassador name: ambassador diff --git a/gcp/basic-auth-ingress/base/backend-config.yaml b/gcp/basic-auth-ingress/base/backend-config.yaml new file mode 100644 index 00000000000..998daddd244 --- /dev/null +++ b/gcp/basic-auth-ingress/base/backend-config.yaml @@ -0,0 +1,7 @@ +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 \ No newline at end of file diff --git a/gcp/basic-auth-ingress/base/config-map.yaml b/gcp/basic-auth-ingress/base/config-map.yaml index 5f89e15b2a8..1dc9146cef1 100644 --- a/gcp/basic-auth-ingress/base/config-map.yaml +++ b/gcp/basic-auth-ingress/base/config-map.yaml @@ -3,8 +3,8 @@ data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -15,58 +15,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done - - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + # For debugging print out what account we are using + gcloud auth list + + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi + + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } + + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi - - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config diff --git a/gcp/basic-auth-ingress/base/kustomization.yaml b/gcp/basic-auth-ingress/base/kustomization.yaml index 69f99c16a81..5f0cdf32321 100644 --- a/gcp/basic-auth-ingress/base/kustomization.yaml +++ b/gcp/basic-auth-ingress/base/kustomization.yaml @@ -1,6 +1,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml diff --git a/gcp/iap-ingress/base/backend-config.yaml b/gcp/iap-ingress/base/backend-config.yaml index 4d6c981946c..42cba627b7b 100644 --- a/gcp/iap-ingress/base/backend-config.yaml +++ b/gcp/iap-ingress/base/backend-config.yaml @@ -3,6 +3,8 @@ kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: diff --git a/gcp/iap-ingress/base/config-map.yaml b/gcp/iap-ingress/base/config-map.yaml index 5e181c9b322..a6716a934a6 100644 --- a/gcp/iap-ingress/base/config-map.yaml +++ b/gcp/iap-ingress/base/config-map.yaml @@ -48,7 +48,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -66,9 +67,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -110,30 +117,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -145,58 +137,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config diff --git a/tests/ambassador-base_test.go b/tests/ambassador-base_test.go index d039c9dc8cf..cfb07756344 100644 --- a/tests/ambassador-base_test.go +++ b/tests/ambassador-base_test.go @@ -129,6 +129,9 @@ spec: apiVersion: v1 kind: Service metadata: + annotations: + # Ambassador is only used on GCP with basic auth. + beta.cloud.google.com/backend-config: '{"ports": {"ambassador":"basicauth-backendconfig"}}' labels: service: ambassador name: ambassador diff --git a/tests/basic-auth-ingress-base_test.go b/tests/basic-auth-ingress-base_test.go index b89f8307a65..25b0b0d9b39 100644 --- a/tests/basic-auth-ingress-base_test.go +++ b/tests/basic-auth-ingress-base_test.go @@ -14,6 +14,14 @@ import ( ) func writeBasicAuthIngressBase(th *KustTestHarness) { + th.writeF("/manifests/gcp/basic-auth-ingress/base/backend-config.yaml", ` +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600`) th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint @@ -73,8 +81,8 @@ data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -85,58 +93,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} + # For debugging print out what account we are using + gcloud auth list - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - echo health check URI is ${HEALTH_CHECK_URI} + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi + echo health check URI is ${HEALTH_CHECK_URI} - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 + done kind: ConfigMap metadata: name: envoy-config @@ -376,6 +393,7 @@ istioNamespace=istio-system apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml diff --git a/tests/basic-auth-ingress-overlays-application_test.go b/tests/basic-auth-ingress-overlays-application_test.go index caa5af1d88d..e2ddb0f935e 100644 --- a/tests/basic-auth-ingress-overlays-application_test.go +++ b/tests/basic-auth-ingress-overlays-application_test.go @@ -1,20 +1,20 @@ package tests_test import ( - "sigs.k8s.io/kustomize/v3/k8sdeps/kunstruct" - "sigs.k8s.io/kustomize/v3/k8sdeps/transformer" - "sigs.k8s.io/kustomize/v3/pkg/fs" - "sigs.k8s.io/kustomize/v3/pkg/loader" - "sigs.k8s.io/kustomize/v3/pkg/plugins" - "sigs.k8s.io/kustomize/v3/pkg/resmap" - "sigs.k8s.io/kustomize/v3/pkg/resource" - "sigs.k8s.io/kustomize/v3/pkg/target" - "sigs.k8s.io/kustomize/v3/pkg/validators" - "testing" + "sigs.k8s.io/kustomize/v3/k8sdeps/kunstruct" + "sigs.k8s.io/kustomize/v3/k8sdeps/transformer" + "sigs.k8s.io/kustomize/v3/pkg/fs" + "sigs.k8s.io/kustomize/v3/pkg/loader" + "sigs.k8s.io/kustomize/v3/pkg/plugins" + "sigs.k8s.io/kustomize/v3/pkg/resmap" + "sigs.k8s.io/kustomize/v3/pkg/resource" + "sigs.k8s.io/kustomize/v3/pkg/target" + "sigs.k8s.io/kustomize/v3/pkg/validators" + "testing" ) func writeBasicAuthIngressOverlaysApplication(th *KustTestHarness) { - th.writeF("/manifests/gcp/basic-auth-ingress/overlays/application/application.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/overlays/application/application.yaml", ` apiVersion: app.k8s.io/v1beta1 kind: Application metadata: @@ -47,7 +47,7 @@ spec: url: "" addOwnerRef: true `) - th.writeK("/manifests/gcp/basic-auth-ingress/overlays/application", ` + th.writeK("/manifests/gcp/basic-auth-ingress/overlays/application", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization bases: @@ -62,7 +62,15 @@ commonLabels: app.kubernetes.io/part-of: kubeflow app.kubernetes.io/version: v0.7.0 `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/backend-config.yaml", ` +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600`) + th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint metadata: @@ -73,7 +81,7 @@ spec: name: $(ingressName) namespace: $(istioNamespace) `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/cluster-role-binding.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/cluster-role-binding.yaml", ` apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: @@ -87,7 +95,7 @@ subjects: name: kf-admin namespace: kubeflow `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/cluster-role.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/cluster-role.yaml", ` apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: @@ -115,14 +123,14 @@ rules: - update - patch `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/config-map.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/config-map.yaml", ` apiVersion: v1 data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -133,58 +141,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} + # For debugging print out what account we are using + gcloud auth list - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - echo health check URI is ${HEALTH_CHECK_URI} + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi + echo health check URI is ${HEALTH_CHECK_URI} - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 + done kind: ConfigMap metadata: name: envoy-config @@ -220,7 +237,7 @@ metadata: name: ingress-bootstrap-config --- `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/deployment.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/deployment.yaml", ` apiVersion: apps/v1 kind: Deployment metadata: @@ -250,7 +267,7 @@ spec: successThreshold: 1 timeoutSeconds: 5 `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/ingress.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/ingress.yaml", ` apiVersion: extensions/v1beta1 # networking.k8s.io/v1beta1 kind: Ingress metadata: @@ -271,7 +288,7 @@ spec: servicePort: 80 path: /* `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/istio-mapping-svc.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/istio-mapping-svc.yaml", ` apiVersion: v1 kind: Service metadata: @@ -300,13 +317,13 @@ spec: app: istioMappingSvc type: ClusterIP `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/service-account.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/service-account.yaml", ` apiVersion: v1 kind: ServiceAccount metadata: name: kf-admin `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/service.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/service.yaml", ` apiVersion: v1 kind: Service metadata: @@ -330,7 +347,7 @@ spec: app: whoami type: ClusterIP `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/stateful-set.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/stateful-set.yaml", ` apiVersion: apps/v1 kind: StatefulSet metadata: @@ -373,7 +390,7 @@ spec: # Workaround for https://github.com/kubernetes-sigs/kustomize/issues/677 volumeClaimTemplates: [] `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/params.yaml", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/params.yaml", ` varReference: - path: metadata/name kind: Certificate @@ -408,7 +425,7 @@ varReference: - path: spec/domains kind: ManagedCertificate `) - th.writeF("/manifests/gcp/basic-auth-ingress/base/params.env", ` + th.writeF("/manifests/gcp/basic-auth-ingress/base/params.env", ` appName=kubeflow namespace=kubeflow hostname= @@ -420,10 +437,11 @@ ingressName=envoy-ingress issuer=letsencrypt-prod istioNamespace=istio-system `) - th.writeK("/manifests/gcp/basic-auth-ingress/base", ` + th.writeK("/manifests/gcp/basic-auth-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml @@ -519,32 +537,32 @@ configurations: } func TestBasicAuthIngressOverlaysApplication(t *testing.T) { - th := NewKustTestHarness(t, "/manifests/gcp/basic-auth-ingress/overlays/application") - writeBasicAuthIngressOverlaysApplication(th) - m, err := th.makeKustTarget().MakeCustomizedResMap() - if err != nil { - t.Fatalf("Err: %v", err) - } - expected, err := m.AsYaml() - if err != nil { - t.Fatalf("Err: %v", err) - } - targetPath := "../gcp/basic-auth-ingress/overlays/application" - fsys := fs.MakeRealFS() - lrc := loader.RestrictionRootOnly - _loader, loaderErr := loader.NewLoader(lrc, validators.MakeFakeValidator(), targetPath, fsys) - if loaderErr != nil { - t.Fatalf("could not load kustomize loader: %v", loaderErr) - } - rf := resmap.NewFactory(resource.NewFactory(kunstruct.NewKunstructuredFactoryImpl()), transformer.NewFactoryImpl()) - pc := plugins.DefaultPluginConfig() - kt, err := target.NewKustTarget(_loader, rf, transformer.NewFactoryImpl(), plugins.NewLoader(pc, rf)) - if err != nil { - th.t.Fatalf("Unexpected construction error %v", err) - } - actual, err := kt.MakeCustomizedResMap() - if err != nil { - t.Fatalf("Err: %v", err) - } - th.assertActualEqualsExpected(actual, string(expected)) + th := NewKustTestHarness(t, "/manifests/gcp/basic-auth-ingress/overlays/application") + writeBasicAuthIngressOverlaysApplication(th) + m, err := th.makeKustTarget().MakeCustomizedResMap() + if err != nil { + t.Fatalf("Err: %v", err) + } + expected, err := m.AsYaml() + if err != nil { + t.Fatalf("Err: %v", err) + } + targetPath := "../gcp/basic-auth-ingress/overlays/application" + fsys := fs.MakeRealFS() + lrc := loader.RestrictionRootOnly + _loader, loaderErr := loader.NewLoader(lrc, validators.MakeFakeValidator(), targetPath, fsys) + if loaderErr != nil { + t.Fatalf("could not load kustomize loader: %v", loaderErr) + } + rf := resmap.NewFactory(resource.NewFactory(kunstruct.NewKunstructuredFactoryImpl()), transformer.NewFactoryImpl()) + pc := plugins.DefaultPluginConfig() + kt, err := target.NewKustTarget(_loader, rf, transformer.NewFactoryImpl(), plugins.NewLoader(pc, rf)) + if err != nil { + th.t.Fatalf("Unexpected construction error %v", err) + } + actual, err := kt.MakeCustomizedResMap() + if err != nil { + t.Fatalf("Err: %v", err) + } + th.assertActualEqualsExpected(actual, string(expected)) } diff --git a/tests/basic-auth-ingress-overlays-certmanager_test.go b/tests/basic-auth-ingress-overlays-certmanager_test.go index a15a9ca67ea..2b6cc2a2c19 100644 --- a/tests/basic-auth-ingress-overlays-certmanager_test.go +++ b/tests/basic-auth-ingress-overlays-certmanager_test.go @@ -83,6 +83,14 @@ images: newName: gcr.io/kubeflow-images-public/ingress-setup newTag: latest `) + th.writeF("/manifests/gcp/basic-auth-ingress/base/backend-config.yaml", ` +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600`) th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint @@ -142,8 +150,8 @@ data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -154,58 +162,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} + # For debugging print out what account we are using + gcloud auth list - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - echo health check URI is ${HEALTH_CHECK_URI} + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi + echo health check URI is ${HEALTH_CHECK_URI} - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 + done kind: ConfigMap metadata: name: envoy-config @@ -445,6 +462,7 @@ istioNamespace=istio-system apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml diff --git a/tests/basic-auth-ingress-overlays-gcp-credentials_test.go b/tests/basic-auth-ingress-overlays-gcp-credentials_test.go index 7e926040035..2217d1f3c7d 100644 --- a/tests/basic-auth-ingress-overlays-gcp-credentials_test.go +++ b/tests/basic-auth-ingress-overlays-gcp-credentials_test.go @@ -45,6 +45,14 @@ bases: patchesStrategicMerge: - gcp-credentials-patch.yaml `) + th.writeF("/manifests/gcp/basic-auth-ingress/base/backend-config.yaml", ` +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600`) th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint @@ -104,8 +112,8 @@ data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -116,58 +124,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} + # For debugging print out what account we are using + gcloud auth list - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - echo health check URI is ${HEALTH_CHECK_URI} + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi + echo health check URI is ${HEALTH_CHECK_URI} - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 + done kind: ConfigMap metadata: name: envoy-config @@ -407,6 +424,7 @@ istioNamespace=istio-system apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml diff --git a/tests/basic-auth-ingress-overlays-managed-cert_test.go b/tests/basic-auth-ingress-overlays-managed-cert_test.go index 8db1a2419a3..7b48fcf5ecb 100644 --- a/tests/basic-auth-ingress-overlays-managed-cert_test.go +++ b/tests/basic-auth-ingress-overlays-managed-cert_test.go @@ -21,8 +21,7 @@ metadata: name: gke-certificate spec: domains: - - $(hostname) -`) + - $(hostname)`) th.writeK("/manifests/gcp/basic-auth-ingress/overlays/managed-cert", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization @@ -34,6 +33,14 @@ namespace: kubeflow commonLabels: kustomize.component: basic-auth-ingress `) + th.writeF("/manifests/gcp/basic-auth-ingress/base/backend-config.yaml", ` +apiVersion: cloud.google.com/v1beta1 +kind: BackendConfig +metadata: + name: basicauth-backendconfig +spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600`) th.writeF("/manifests/gcp/basic-auth-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint @@ -93,8 +100,8 @@ data: update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. - + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -105,58 +112,67 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + set_health_check() { + # Activate the service account, allow 5 retries + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') - echo node port is ${NODE_PORT} + # For debugging print out what account we are using + gcloud auth list - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}') + echo node port is ${NODE_PORT} - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - echo health check URI is ${HEALTH_CHECK_URI} + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz - fi + echo health check URI is ${HEALTH_CHECK_URI} - if [[ ${USE_ISTIO} ]]; then - # Create the route so healthcheck can pass - kubectl apply -f /var/envoy-config/healthcheck_route.yaml - fi + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz + fi - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 + if [[ ${USE_ISTIO} ]]; then + # Create the route so healthcheck can pass + kubectl apply -f /var/envoy-config/healthcheck_route.yaml + fi + } - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 + done kind: ConfigMap metadata: name: envoy-config @@ -396,6 +412,7 @@ istioNamespace=istio-system apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- backend-config.yaml - cloud-endpoint.yaml - cluster-role-binding.yaml - cluster-role.yaml diff --git a/tests/iap-ingress-base_test.go b/tests/iap-ingress-base_test.go index bada3a7e486..e2557864232 100644 --- a/tests/iap-ingress-base_test.go +++ b/tests/iap-ingress-base_test.go @@ -13,7 +13,6 @@ import ( "testing" ) - func writeIapIngressBase(th *KustTestHarness) { th.writeF("/manifests/gcp/iap-ingress/base/backend-config.yaml", ` apiVersion: cloud.google.com/v1beta1 @@ -21,6 +20,8 @@ kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: @@ -142,7 +143,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -160,9 +162,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -204,30 +212,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -239,58 +232,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config @@ -546,8 +544,7 @@ varReference: - path: data/healthcheck_route.yaml kind: ConfigMap - path: spec/domains - kind: ManagedCertificate -`) + kind: ManagedCertificate`) th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` namespace=kubeflow appName=kubeflow @@ -559,8 +556,7 @@ oauthSecretName=kubeflow-oauth project= adminSaSecretName=admin-gcp-sa tlsSecretName=envoy-ingress-tls -istioNamespace=istio-system -`) +istioNamespace=istio-system`) th.writeK("/manifests/gcp/iap-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/tests/iap-ingress-overlays-application_test.go b/tests/iap-ingress-overlays-application_test.go index b96dac56052..3fb00a46162 100644 --- a/tests/iap-ingress-overlays-application_test.go +++ b/tests/iap-ingress-overlays-application_test.go @@ -1,20 +1,20 @@ package tests_test import ( - "sigs.k8s.io/kustomize/v3/k8sdeps/kunstruct" - "sigs.k8s.io/kustomize/v3/k8sdeps/transformer" - "sigs.k8s.io/kustomize/v3/pkg/fs" - "sigs.k8s.io/kustomize/v3/pkg/loader" - "sigs.k8s.io/kustomize/v3/pkg/plugins" - "sigs.k8s.io/kustomize/v3/pkg/resmap" - "sigs.k8s.io/kustomize/v3/pkg/resource" - "sigs.k8s.io/kustomize/v3/pkg/target" - "sigs.k8s.io/kustomize/v3/pkg/validators" - "testing" + "sigs.k8s.io/kustomize/v3/k8sdeps/kunstruct" + "sigs.k8s.io/kustomize/v3/k8sdeps/transformer" + "sigs.k8s.io/kustomize/v3/pkg/fs" + "sigs.k8s.io/kustomize/v3/pkg/loader" + "sigs.k8s.io/kustomize/v3/pkg/plugins" + "sigs.k8s.io/kustomize/v3/pkg/resmap" + "sigs.k8s.io/kustomize/v3/pkg/resource" + "sigs.k8s.io/kustomize/v3/pkg/target" + "sigs.k8s.io/kustomize/v3/pkg/validators" + "testing" ) func writeIapIngressOverlaysApplication(th *KustTestHarness) { - th.writeF("/manifests/gcp/iap-ingress/overlays/application/application.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/overlays/application/application.yaml", ` apiVersion: app.k8s.io/v1beta1 kind: Application metadata: @@ -47,7 +47,7 @@ spec: url: "" addOwnerRef: true `) - th.writeK("/manifests/gcp/iap-ingress/overlays/application", ` + th.writeK("/manifests/gcp/iap-ingress/overlays/application", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization bases: @@ -62,18 +62,20 @@ commonLabels: app.kubernetes.io/part-of: kubeflow app.kubernetes.io/version: v0.7.0 `) - th.writeF("/manifests/gcp/iap-ingress/base/backend-config.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/backend-config.yaml", ` apiVersion: cloud.google.com/v1beta1 kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: secretName: $(oauthSecretName) `) - th.writeF("/manifests/gcp/iap-ingress/base/cloud-endpoint.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/cloud-endpoint.yaml", ` apiVersion: ctl.isla.solutions/v1 kind: CloudEndpoint metadata: @@ -84,7 +86,7 @@ spec: name: $(ingressName) namespace: $(istioNamespace) `) - th.writeF("/manifests/gcp/iap-ingress/base/cluster-role-binding.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/cluster-role-binding.yaml", ` apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: @@ -97,7 +99,7 @@ subjects: - kind: ServiceAccount name: kf-admin `) - th.writeF("/manifests/gcp/iap-ingress/base/cluster-role.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/cluster-role.yaml", ` apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: @@ -138,7 +140,7 @@ rules: verbs: - '*' `) - th.writeF("/manifests/gcp/iap-ingress/base/config-map.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/config-map.yaml", ` --- apiVersion: v1 data: @@ -189,7 +191,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -207,9 +210,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -251,30 +260,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -286,58 +280,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config @@ -371,7 +370,7 @@ metadata: name: ingress-bootstrap-config --- `) - th.writeF("/manifests/gcp/iap-ingress/base/deployment.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/deployment.yaml", ` --- apiVersion: apps/v1 kind: Deployment @@ -440,7 +439,7 @@ spec: name: envoy-config name: config-volume `) - th.writeF("/manifests/gcp/iap-ingress/base/ingress.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/ingress.yaml", ` apiVersion: extensions/v1beta1 # networking.k8s.io/v1beta1 kind: Ingress metadata: @@ -461,7 +460,7 @@ spec: servicePort: 80 path: /* `) - th.writeF("/manifests/gcp/iap-ingress/base/policy.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/policy.yaml", ` apiVersion: authentication.istio.io/v1alpha1 kind: Policy metadata: @@ -485,13 +484,13 @@ spec: ports: - number: 80 `) - th.writeF("/manifests/gcp/iap-ingress/base/service-account.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/service-account.yaml", ` apiVersion: v1 kind: ServiceAccount metadata: name: kf-admin `) - th.writeF("/manifests/gcp/iap-ingress/base/service.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/service.yaml", ` apiVersion: v1 kind: Service metadata: @@ -506,7 +505,7 @@ spec: app: whoami type: ClusterIP `) - th.writeF("/manifests/gcp/iap-ingress/base/stateful-set.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/stateful-set.yaml", ` apiVersion: apps/v1 kind: StatefulSet metadata: @@ -548,7 +547,7 @@ spec: name: config-volume volumeClaimTemplates: [] `) - th.writeF("/manifests/gcp/iap-ingress/base/params.yaml", ` + th.writeF("/manifests/gcp/iap-ingress/base/params.yaml", ` varReference: - path: metadata/name kind: Certificate @@ -593,9 +592,8 @@ varReference: - path: data/healthcheck_route.yaml kind: ConfigMap - path: spec/domains - kind: ManagedCertificate -`) - th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` + kind: ManagedCertificate`) + th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` namespace=kubeflow appName=kubeflow hostname= @@ -606,9 +604,8 @@ oauthSecretName=kubeflow-oauth project= adminSaSecretName=admin-gcp-sa tlsSecretName=envoy-ingress-tls -istioNamespace=istio-system -`) - th.writeK("/manifests/gcp/iap-ingress/base", ` +istioNamespace=istio-system`) + th.writeK("/manifests/gcp/iap-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: @@ -725,32 +722,32 @@ configurations: } func TestIapIngressOverlaysApplication(t *testing.T) { - th := NewKustTestHarness(t, "/manifests/gcp/iap-ingress/overlays/application") - writeIapIngressOverlaysApplication(th) - m, err := th.makeKustTarget().MakeCustomizedResMap() - if err != nil { - t.Fatalf("Err: %v", err) - } - expected, err := m.AsYaml() - if err != nil { - t.Fatalf("Err: %v", err) - } - targetPath := "../gcp/iap-ingress/overlays/application" - fsys := fs.MakeRealFS() - lrc := loader.RestrictionRootOnly - _loader, loaderErr := loader.NewLoader(lrc, validators.MakeFakeValidator(), targetPath, fsys) - if loaderErr != nil { - t.Fatalf("could not load kustomize loader: %v", loaderErr) - } - rf := resmap.NewFactory(resource.NewFactory(kunstruct.NewKunstructuredFactoryImpl()), transformer.NewFactoryImpl()) - pc := plugins.DefaultPluginConfig() - kt, err := target.NewKustTarget(_loader, rf, transformer.NewFactoryImpl(), plugins.NewLoader(pc, rf)) - if err != nil { - th.t.Fatalf("Unexpected construction error %v", err) - } - actual, err := kt.MakeCustomizedResMap() - if err != nil { - t.Fatalf("Err: %v", err) - } - th.assertActualEqualsExpected(actual, string(expected)) + th := NewKustTestHarness(t, "/manifests/gcp/iap-ingress/overlays/application") + writeIapIngressOverlaysApplication(th) + m, err := th.makeKustTarget().MakeCustomizedResMap() + if err != nil { + t.Fatalf("Err: %v", err) + } + expected, err := m.AsYaml() + if err != nil { + t.Fatalf("Err: %v", err) + } + targetPath := "../gcp/iap-ingress/overlays/application" + fsys := fs.MakeRealFS() + lrc := loader.RestrictionRootOnly + _loader, loaderErr := loader.NewLoader(lrc, validators.MakeFakeValidator(), targetPath, fsys) + if loaderErr != nil { + t.Fatalf("could not load kustomize loader: %v", loaderErr) + } + rf := resmap.NewFactory(resource.NewFactory(kunstruct.NewKunstructuredFactoryImpl()), transformer.NewFactoryImpl()) + pc := plugins.DefaultPluginConfig() + kt, err := target.NewKustTarget(_loader, rf, transformer.NewFactoryImpl(), plugins.NewLoader(pc, rf)) + if err != nil { + th.t.Fatalf("Unexpected construction error %v", err) + } + actual, err := kt.MakeCustomizedResMap() + if err != nil { + t.Fatalf("Err: %v", err) + } + th.assertActualEqualsExpected(actual, string(expected)) } diff --git a/tests/iap-ingress-overlays-certmanager_test.go b/tests/iap-ingress-overlays-certmanager_test.go index 3521288a209..2921e88a4eb 100644 --- a/tests/iap-ingress-overlays-certmanager_test.go +++ b/tests/iap-ingress-overlays-certmanager_test.go @@ -101,6 +101,8 @@ kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: @@ -222,7 +224,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -240,9 +243,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -284,30 +293,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -319,58 +313,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config @@ -626,8 +625,7 @@ varReference: - path: data/healthcheck_route.yaml kind: ConfigMap - path: spec/domains - kind: ManagedCertificate -`) + kind: ManagedCertificate`) th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` namespace=kubeflow appName=kubeflow @@ -639,8 +637,7 @@ oauthSecretName=kubeflow-oauth project= adminSaSecretName=admin-gcp-sa tlsSecretName=envoy-ingress-tls -istioNamespace=istio-system -`) +istioNamespace=istio-system`) th.writeK("/manifests/gcp/iap-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/tests/iap-ingress-overlays-gcp-credentials_test.go b/tests/iap-ingress-overlays-gcp-credentials_test.go index 0cf3ae49f47..7ae41845156 100644 --- a/tests/iap-ingress-overlays-gcp-credentials_test.go +++ b/tests/iap-ingress-overlays-gcp-credentials_test.go @@ -75,6 +75,8 @@ kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: @@ -196,7 +198,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -214,9 +217,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -258,30 +267,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -293,58 +287,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config @@ -600,8 +599,7 @@ varReference: - path: data/healthcheck_route.yaml kind: ConfigMap - path: spec/domains - kind: ManagedCertificate -`) + kind: ManagedCertificate`) th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` namespace=kubeflow appName=kubeflow @@ -613,8 +611,7 @@ oauthSecretName=kubeflow-oauth project= adminSaSecretName=admin-gcp-sa tlsSecretName=envoy-ingress-tls -istioNamespace=istio-system -`) +istioNamespace=istio-system`) th.writeK("/manifests/gcp/iap-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/tests/iap-ingress-overlays-managed-cert_test.go b/tests/iap-ingress-overlays-managed-cert_test.go index 451a69a13ec..98010d55492 100644 --- a/tests/iap-ingress-overlays-managed-cert_test.go +++ b/tests/iap-ingress-overlays-managed-cert_test.go @@ -40,6 +40,8 @@ kind: BackendConfig metadata: name: iap-backendconfig spec: + # Jupyter uses websockets so we want to increase the timeout. + timeoutSec: 3600 iap: enabled: true oauthclientCredentials: @@ -161,7 +163,8 @@ data: setup_backend.sh: | #!/usr/bin/env bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the JWT audience used with ISTIO + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 [ -z ${INGRESS_NAME} ] && echo Error INGRESS_NAME must be set && exit 1 @@ -179,9 +182,15 @@ data: fi # Activate the service account - gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then + # As of 0.7.0 we should be using workload identity and never setting GOOGLE_APPLICATION_CREDENTIALS. + # But we kept this for backwards compatibility but can remove later. + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} + fi + # Print out the config for debugging gcloud config list + gcloud auth list NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') echo "node port is ${NODE_PORT}" @@ -223,30 +232,15 @@ data: echo "Clearing lock on service annotation" kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"backendlock\": \"\" }}}" - checkBackend() { - # created by init container. - . /var/shared/healthz.env - - # If node port or backend id change, so does the JWT audience. - CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - read -ra toks <<<"$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id,timeoutSec)')" - CURR_BACKEND_ID="${toks[0]}" - CURR_BACKEND_TIMEOUT="${toks[1]}" - [[ "$BACKEND_ID" == "$CURR_BACKEND_ID" && "${CURR_BACKEND_TIMEOUT}" -eq 3600 ]] - } - - # Verify configuration every 10 seconds. + # Loop for ever; we don't want to exit because restarting the container leads users to think there might be a problem while true; do - if ! checkBackend; then - echo "$(date) WARN: Backend check failed, restarting container." - exit 1 - fi - sleep 10 + sleep 3600 done update_backend.sh: | #!/bin/bash # - # A simple shell script to configure the backend timeouts and health checks by using gcloud. + # A simple shell script to configure the health checks by using gcloud. + set -x [ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1 [ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1 @@ -258,58 +252,63 @@ data: exit 1 fi - # Activate the service account, allow 5 retries - for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done - - NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') - echo node port is ${NODE_PORT} - - while [[ -z ${BACKEND_NAME} ]]; do - BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') - echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" - BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") - echo "backend name is ${BACKEND_NAME}" - sleep 2 - done - - while [[ -z ${BACKEND_SERVICE} ]]; - do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); - echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; - done + if [[ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]]; then + # TODO(jlewi): As of 0.7 we should always be using workload identity. We can remove it post 0.7.0 once we have workload identity + # fully working + # Activate the service account, allow 5 retries + for i in {1..5}; do gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} && break || sleep 10; done + fi + + set_health_check () { + NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') + echo node port is ${NODE_PORT} + + while [[ -z ${BACKEND_NAME} ]]; do + BACKENDS=$(kubectl --namespace=${NAMESPACE} get ingress ${INGRESS_NAME} -o jsonpath='{.metadata.annotations.ingress\.kubernetes\.io/backends}') + echo "fetching backends info with ${INGRESS_NAME}: ${BACKENDS}" + BACKEND_NAME=$(echo $BACKENDS | grep -o "k8s-be-${NODE_PORT}--[0-9a-z]\+") + echo "backend name is ${BACKEND_NAME}" + sleep 2 + done + + while [[ -z ${BACKEND_SERVICE} ]]; + do BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri); + echo "Waiting for the backend-services resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + while [[ -z ${HEALTH_CHECK_URI} ]]; + do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); + echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; + sleep 2; + done + + echo health check URI is ${HEALTH_CHECK_URI} + + # Since we create the envoy-ingress ingress object before creating the envoy + # deployment object, healthcheck will not be configured correctly in the GCP + # load balancer. It will default the healthcheck request path to a value of + # / instead of the intended /healthz. + # Manually update the healthcheck request path to /healthz + if [[ ${HEALTHCHECK_PATH} ]]; then + # This is basic auth + echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} + else + # /healthz/ready is the health check path for istio-ingressgateway + echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready + # We need the nodeport for istio-ingressgateway status-port + STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') + gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} + fi + } - while [[ -z ${HEALTH_CHECK_URI} ]]; - do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~${BACKEND_NAME} --uri); - echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}..."; - sleep 2; + while true; do + set_health_check + echo "Backend updated successfully. Waiting 1 hour before updating again." + sleep 3600 done - - echo health check URI is ${HEALTH_CHECK_URI} - - # Since we create the envoy-ingress ingress object before creating the envoy - # deployment object, healthcheck will not be configured correctly in the GCP - # load balancer. It will default the healthcheck request path to a value of - # / instead of the intended /healthz. - # Manually update the healthcheck request path to /healthz - if [[ ${HEALTHCHECK_PATH} ]]; then - # This is basic auth - echo Running health checks update ${HEALTH_CHECK_URI} with ${HEALTHCHECK_PATH} - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=${HEALTHCHECK_PATH} - else - # /healthz/ready is the health check path for istio-ingressgateway - echo Running health checks update ${HEALTH_CHECK_URI} with /healthz/ready - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz/ready - # We need the nodeport for istio-ingressgateway status-port - STATUS_NODE_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="status-port")].nodePort}') - gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --port=${STATUS_NODE_PORT} - fi - - # Since JupyterHub uses websockets we want to increase the backend timeout - echo Increasing backend timeout for JupyterHub - gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600 - - echo "Backend updated successfully. Waiting 1 hour before updating again." - sleep 3600 kind: ConfigMap metadata: name: envoy-config @@ -565,8 +564,7 @@ varReference: - path: data/healthcheck_route.yaml kind: ConfigMap - path: spec/domains - kind: ManagedCertificate -`) + kind: ManagedCertificate`) th.writeF("/manifests/gcp/iap-ingress/base/params.env", ` namespace=kubeflow appName=kubeflow @@ -578,8 +576,7 @@ oauthSecretName=kubeflow-oauth project= adminSaSecretName=admin-gcp-sa tlsSecretName=envoy-ingress-tls -istioNamespace=istio-system -`) +istioNamespace=istio-system`) th.writeK("/manifests/gcp/iap-ingress/base", ` apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization