From 57647616a4809855f2a10352f65955a3a3dd73f0 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Wed, 9 Apr 2025 08:12:22 +0200 Subject: [PATCH 1/5] Refactor pgstacbootstrap job and ConfigMaps to use Helm hooks for execution order --- .../templates/pgstacboostrap/configmap.yaml | 19 +++++++++++++++ .../eoapi/templates/pgstacboostrap/job.yaml | 23 +++++++++++-------- .../eoapi/templates/services/deployment.yaml | 21 +++++------------ 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml b/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml index b3f9777b..ff059efc 100644 --- a/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml +++ b/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml @@ -1,9 +1,16 @@ {{- if .Values.pgstacBootstrap.enabled }} --- +# These ConfigMaps provide the necessary data and scripts for the pgstacbootstrap job. +# They use Helm hooks with a weight of "-6" (lower than the job's "-5") to ensure +# they are created before the job that depends on them. apiVersion: v1 kind: ConfigMap metadata: name: initdb-sql-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: initdb.sql: | {{- range $path, $bytes := $.Files.Glob "initdb-data/*.sql" -}} @@ -14,6 +21,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: initdb-json-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: {{- range $path, $bytes := $.Files.Glob "initdb-data/*.json" -}} {{- base $path | nindent 2 -}}: | {{- $.Files.Get $path | nindent 4 -}} @@ -23,6 +34,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: pgstac-setup-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: pgstac-migrate.py: | {{- range $path, $bytes := $.Files.Glob "initdb-data/*.py" -}} @@ -33,6 +48,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: initdb-sh-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: run-forever.sh: | #!/bin/bash diff --git a/helm-chart/eoapi/templates/pgstacboostrap/job.yaml b/helm-chart/eoapi/templates/pgstacboostrap/job.yaml index 07827ce4..4ebcf16b 100644 --- a/helm-chart/eoapi/templates/pgstacboostrap/job.yaml +++ b/helm-chart/eoapi/templates/pgstacboostrap/job.yaml @@ -1,11 +1,25 @@ {{- if .Values.pgstacBootstrap.enabled }} --- +# This job initializes the PostgreSQL database with the necessary schema and data. +# Instead of using an initContainer in the deployment to wait for this job to complete, +# we use Helm hooks to control the execution order: +# +# 1. The "post-install,post-upgrade" hook ensures this job runs after the postgres +# dependency is installed but before other resources (like deployments) +# 2. The "-5" hook-weight ensures this job runs after its ConfigMaps (weight -6) +# 3. The "before-hook-creation,hook-succeeded" delete policy ensures the job is recreated +# on each helm install/upgrade and is cleaned up after successful completion +# apiVersion: batch/v1 kind: Job metadata: name: pgstacbootstrap labels: app: pgstacbootstrap + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-5" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" spec: template: metadata: @@ -13,15 +27,6 @@ spec: app: pgstacbootstrap spec: restartPolicy: Never - initContainers: - - name: wait-for-db - image: busybox - command: - {{ if .Values.testing }} - ['sh', '-c', 'until nc -z {{ $.Release.Name }}-pgbouncer 5432; do echo waiting for db; sleep 10; done;'] - {{ else }} - ['sh', '-c', 'until nc -z eoapi-pgbouncer 5432; do echo waiting for db; sleep 10; done;'] - {{ end }} containers: - name: pgstacbootstrap image: {{ .Values.pgstacBootstrap.image.name }}:{{ .Values.pgstacBootstrap.image.tag }} diff --git a/helm-chart/eoapi/templates/services/deployment.yaml b/helm-chart/eoapi/templates/services/deployment.yaml index e9eba222..23facb62 100644 --- a/helm-chart/eoapi/templates/services/deployment.yaml +++ b/helm-chart/eoapi/templates/services/deployment.yaml @@ -9,10 +9,14 @@ metadata: app: {{ $serviceName }}-{{ $.Release.Name }} gitsha: {{ $.Values.gitSha }} name: {{ $serviceName }}-{{ $.Release.Name }} - {{- with index $v "annotations" }} annotations: + # These hooks ensure the deployment runs after the pgstacbootstrap job (which has weight -5) + # No initContainer is needed anymore since Helm will handle the execution order + # helm.sh/hook: "post-install,post-upgrade" + # helm.sh/hook-weight: "10" + {{- with index $v "annotations" }} {{- toYaml . | nindent 4 }} - {{- end }} + {{- end }} spec: progressDeadlineSeconds: 600 revisionHistoryLimit: 5 @@ -30,19 +34,6 @@ spec: app: {{ $serviceName }}-{{ $.Release.Name }} spec: serviceAccountName: eoapi-sa-{{ $.Release.Name }} - initContainers: - - name: wait-for-pgstacbootstrap - image: bitnami/kubectl:latest - command: - - /bin/sh - - -c - - | - echo "Waiting for pgstacbootstrap job to complete..." - while ! kubectl -n {{ $.Release.Namespace }} wait --for=condition=complete job/pgstacbootstrap --timeout=5s; do - echo "pgstacbootstrap job not completed yet. Checking again in 10 seconds..." - sleep 10 - done - echo "pgstacbootstrap job completed successfully" containers: - image: {{ index $v "image" "name" }}:{{ index $v "image" "tag" }} name: {{ $serviceName }} From 56ac1be38cd0689cf88bf009631a264611caafb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Wed, 9 Apr 2025 08:14:45 +0200 Subject: [PATCH 2/5] Update PGO_VERSION to 5.7.4 in helm-tests workflow --- .github/workflows/helm-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index a7c50b13..079ff983 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -9,7 +9,7 @@ on: env: HELM_VERSION: v3.15.2 - PGO_VERSION: 5.5.2 + PGO_VERSION: 5.7.4 jobs: helm-tests: From 919ee36ec7c767c6300e99808861f94d1e72c214 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Wed, 9 Apr 2025 08:20:08 +0200 Subject: [PATCH 3/5] Enhance debugging output for failed service boot and test failures in CI workflow --- .github/workflows/helm-tests.yml | 78 +++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 079ff983..837d5f3a 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -118,7 +118,40 @@ jobs: - name: cleanup if services fail to boot if: steps.watchservices.outcome == 'failure' run: | - echo "The previous step failed or timed out." + echo "The watchservices step failed or timed out. Extracting pod logs for debugging..." + + # Get and display all pods status + echo "===== Pod Status =====" + kubectl get pods + + # Extract logs from database pod + echo "===== Database Pod Logs =====" + kubectl get pod | grep "^db-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get database logs" + + # Extract logs from pgstacbootstrap pod + echo "===== PGSTACBootstrap Pod Logs =====" + kubectl get pod | grep "^pgstacbootstrap-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get pgstacbootstrap logs" + + # Extract logs from raster pod init container (wait-for-pgstacbootstrap) + echo "===== Raster Pod Init Container Logs (wait-for-pgstacbootstrap) =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} -c wait-for-pgstacbootstrap --tail=100 || echo "Could not get raster init container logs" + + # Extract logs from raster pod main container + echo "===== Raster Pod Main Container Logs =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get raster main container logs" + + # Extract logs from vector pod + echo "===== Vector Pod Logs =====" + kubectl get pod | grep "^vector-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get vector logs" + + # Extract logs from stac pod + echo "===== STAC Pod Logs =====" + kubectl get pod | grep "^stac-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get STAC logs" + + # Check if pods are in pending state or have issues + echo "===== Pod Descriptions for Troubleshooting =====" + kubectl get pod | grep "$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl describe pod/{} || echo "Could not describe pods" + # force GH action to show failed result exit 128 @@ -145,18 +178,51 @@ jobs: echo $RASTER_ENDPOINT echo '#################################' - pytest .github/workflows/tests/test_vector.py || kubectl logs svc/vector-$RELEASE_NAME - pytest .github/workflows/tests/test_stac.py + pytest .github/workflows/tests/test_vector.py || kubectl logs svc/vector + pytest .github/workflows/tests/test_stac.py || kubectl logs svc/stac # TODO: fix raster tests - #pytest .github/workflows/tests/test_raster.py + #pytest .github/workflows/tests/test_raster.py || kubectl logs svc/raster - name: error if tests failed if: steps.testrunner.outcome == 'failure' run: | - echo "The previous step failed or timed out." + echo "The tests failed. Extracting pod logs for debugging..." + + # Get and display all pods status + echo "===== Pod Status =====" + kubectl get pods + + # Extract logs from database pod + echo "===== Database Pod Logs =====" + kubectl get pod | grep "^db-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get database logs" + + # Extract logs from pgstacbootstrap pod + echo "===== PGSTACBootstrap Pod Logs =====" + kubectl get pod | grep "^pgstacbootstrap-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get pgstacbootstrap logs" + + # Extract logs from raster pod init container (wait-for-pgstacbootstrap) + echo "===== Raster Pod Init Container Logs (wait-for-pgstacbootstrap) =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} -c wait-for-pgstacbootstrap --tail=100 || echo "Could not get raster init container logs" + + # Extract logs from raster pod main container + echo "===== Raster Pod Main Container Logs =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get raster main container logs" + + # Extract logs from vector pod + echo "===== Vector Pod Logs =====" + kubectl get pod | grep "^vector-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get vector logs" + + # Extract logs from stac pod + echo "===== STAC Pod Logs =====" + kubectl get pod | grep "^stac-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get STAC logs" + + # Check if pods are in pending state or have issues + echo "===== Pod Descriptions for Troubleshooting =====" + kubectl get pod | grep "$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl describe pod/{} || echo "Could not describe pods" + # force GH action to show failed result exit 128 - name: helm uninstall eoapi templates run: | - helm uninstall $RELEASE_NAME + helm uninstall $RELEASE_NAME \ No newline at end of file From 7581c648fd34f3f3707fac8591f32a7382954aaf Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Wed, 9 Apr 2025 08:22:53 +0200 Subject: [PATCH 4/5] Remove unnecessary Helm hook annotations for deployment order management --- helm-chart/eoapi/templates/services/deployment.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/helm-chart/eoapi/templates/services/deployment.yaml b/helm-chart/eoapi/templates/services/deployment.yaml index 23facb62..ed8553e0 100644 --- a/helm-chart/eoapi/templates/services/deployment.yaml +++ b/helm-chart/eoapi/templates/services/deployment.yaml @@ -10,10 +10,6 @@ metadata: gitsha: {{ $.Values.gitSha }} name: {{ $serviceName }}-{{ $.Release.Name }} annotations: - # These hooks ensure the deployment runs after the pgstacbootstrap job (which has weight -5) - # No initContainer is needed anymore since Helm will handle the execution order - # helm.sh/hook: "post-install,post-upgrade" - # helm.sh/hook-weight: "10" {{- with index $v "annotations" }} {{- toYaml . | nindent 4 }} {{- end }} From 0bd8adee82556488302d644c715f149a4fb81440 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Wed, 9 Apr 2025 08:27:08 +0200 Subject: [PATCH 5/5] ensure non empty annotations --- helm-chart/eoapi/templates/services/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm-chart/eoapi/templates/services/deployment.yaml b/helm-chart/eoapi/templates/services/deployment.yaml index ed8553e0..25602129 100644 --- a/helm-chart/eoapi/templates/services/deployment.yaml +++ b/helm-chart/eoapi/templates/services/deployment.yaml @@ -9,10 +9,12 @@ metadata: app: {{ $serviceName }}-{{ $.Release.Name }} gitsha: {{ $.Values.gitSha }} name: {{ $serviceName }}-{{ $.Release.Name }} + {{- if index $v "annotations" }} annotations: {{- with index $v "annotations" }} {{- toYaml . | nindent 4 }} {{- end }} + {{- end }} spec: progressDeadlineSeconds: 600 revisionHistoryLimit: 5