diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index a7c50b13..837d5f3a 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -9,7 +9,7 @@ on: env: HELM_VERSION: v3.15.2 - PGO_VERSION: 5.5.2 + PGO_VERSION: 5.7.4 jobs: helm-tests: @@ -118,7 +118,40 @@ jobs: - name: cleanup if services fail to boot if: steps.watchservices.outcome == 'failure' run: | - echo "The previous step failed or timed out." + echo "The watchservices step failed or timed out. Extracting pod logs for debugging..." + + # Get and display all pods status + echo "===== Pod Status =====" + kubectl get pods + + # Extract logs from database pod + echo "===== Database Pod Logs =====" + kubectl get pod | grep "^db-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get database logs" + + # Extract logs from pgstacbootstrap pod + echo "===== PGSTACBootstrap Pod Logs =====" + kubectl get pod | grep "^pgstacbootstrap-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get pgstacbootstrap logs" + + # Extract logs from raster pod init container (wait-for-pgstacbootstrap) + echo "===== Raster Pod Init Container Logs (wait-for-pgstacbootstrap) =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} -c wait-for-pgstacbootstrap --tail=100 || echo "Could not get raster init container logs" + + # Extract logs from raster pod main container + echo "===== Raster Pod Main Container Logs =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get raster main container logs" + + # Extract logs from vector pod + echo "===== Vector Pod Logs =====" + kubectl get pod | grep "^vector-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get vector logs" + + # Extract logs from stac pod + echo "===== STAC Pod Logs =====" + kubectl get pod | grep "^stac-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get STAC logs" + + # Check if pods are in pending state or have issues + echo "===== Pod Descriptions for Troubleshooting =====" + kubectl get pod | grep "$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl describe pod/{} || echo "Could not describe pods" + # force GH action to show failed result exit 128 @@ -145,18 +178,51 @@ jobs: echo $RASTER_ENDPOINT echo '#################################' - pytest .github/workflows/tests/test_vector.py || kubectl logs svc/vector-$RELEASE_NAME - pytest .github/workflows/tests/test_stac.py + pytest .github/workflows/tests/test_vector.py || kubectl logs svc/vector + pytest .github/workflows/tests/test_stac.py || kubectl logs svc/stac # TODO: fix raster tests - #pytest .github/workflows/tests/test_raster.py + #pytest .github/workflows/tests/test_raster.py || kubectl logs svc/raster - name: error if tests failed if: steps.testrunner.outcome == 'failure' run: | - echo "The previous step failed or timed out." + echo "The tests failed. Extracting pod logs for debugging..." + + # Get and display all pods status + echo "===== Pod Status =====" + kubectl get pods + + # Extract logs from database pod + echo "===== Database Pod Logs =====" + kubectl get pod | grep "^db-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get database logs" + + # Extract logs from pgstacbootstrap pod + echo "===== PGSTACBootstrap Pod Logs =====" + kubectl get pod | grep "^pgstacbootstrap-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get pgstacbootstrap logs" + + # Extract logs from raster pod init container (wait-for-pgstacbootstrap) + echo "===== Raster Pod Init Container Logs (wait-for-pgstacbootstrap) =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} -c wait-for-pgstacbootstrap --tail=100 || echo "Could not get raster init container logs" + + # Extract logs from raster pod main container + echo "===== Raster Pod Main Container Logs =====" + kubectl get pod | grep "^raster-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get raster main container logs" + + # Extract logs from vector pod + echo "===== Vector Pod Logs =====" + kubectl get pod | grep "^vector-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get vector logs" + + # Extract logs from stac pod + echo "===== STAC Pod Logs =====" + kubectl get pod | grep "^stac-$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl logs pod/{} --tail=100 || echo "Could not get STAC logs" + + # Check if pods are in pending state or have issues + echo "===== Pod Descriptions for Troubleshooting =====" + kubectl get pod | grep "$RELEASE_NAME" | cut -d' ' -f1 | xargs -I{} kubectl describe pod/{} || echo "Could not describe pods" + # force GH action to show failed result exit 128 - name: helm uninstall eoapi templates run: | - helm uninstall $RELEASE_NAME + helm uninstall $RELEASE_NAME \ No newline at end of file diff --git a/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml b/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml index b3f9777b..ff059efc 100644 --- a/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml +++ b/helm-chart/eoapi/templates/pgstacboostrap/configmap.yaml @@ -1,9 +1,16 @@ {{- if .Values.pgstacBootstrap.enabled }} --- +# These ConfigMaps provide the necessary data and scripts for the pgstacbootstrap job. +# They use Helm hooks with a weight of "-6" (lower than the job's "-5") to ensure +# they are created before the job that depends on them. apiVersion: v1 kind: ConfigMap metadata: name: initdb-sql-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: initdb.sql: | {{- range $path, $bytes := $.Files.Glob "initdb-data/*.sql" -}} @@ -14,6 +21,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: initdb-json-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: {{- range $path, $bytes := $.Files.Glob "initdb-data/*.json" -}} {{- base $path | nindent 2 -}}: | {{- $.Files.Get $path | nindent 4 -}} @@ -23,6 +34,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: pgstac-setup-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: pgstac-migrate.py: | {{- range $path, $bytes := $.Files.Glob "initdb-data/*.py" -}} @@ -33,6 +48,10 @@ apiVersion: v1 kind: ConfigMap metadata: name: initdb-sh-config-{{ $.Release.Name }} + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-6" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" data: run-forever.sh: | #!/bin/bash diff --git a/helm-chart/eoapi/templates/pgstacboostrap/job.yaml b/helm-chart/eoapi/templates/pgstacboostrap/job.yaml index 07827ce4..4ebcf16b 100644 --- a/helm-chart/eoapi/templates/pgstacboostrap/job.yaml +++ b/helm-chart/eoapi/templates/pgstacboostrap/job.yaml @@ -1,11 +1,25 @@ {{- if .Values.pgstacBootstrap.enabled }} --- +# This job initializes the PostgreSQL database with the necessary schema and data. +# Instead of using an initContainer in the deployment to wait for this job to complete, +# we use Helm hooks to control the execution order: +# +# 1. The "post-install,post-upgrade" hook ensures this job runs after the postgres +# dependency is installed but before other resources (like deployments) +# 2. The "-5" hook-weight ensures this job runs after its ConfigMaps (weight -6) +# 3. The "before-hook-creation,hook-succeeded" delete policy ensures the job is recreated +# on each helm install/upgrade and is cleaned up after successful completion +# apiVersion: batch/v1 kind: Job metadata: name: pgstacbootstrap labels: app: pgstacbootstrap + annotations: + helm.sh/hook: "post-install,post-upgrade" + helm.sh/hook-weight: "-5" + helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded" spec: template: metadata: @@ -13,15 +27,6 @@ spec: app: pgstacbootstrap spec: restartPolicy: Never - initContainers: - - name: wait-for-db - image: busybox - command: - {{ if .Values.testing }} - ['sh', '-c', 'until nc -z {{ $.Release.Name }}-pgbouncer 5432; do echo waiting for db; sleep 10; done;'] - {{ else }} - ['sh', '-c', 'until nc -z eoapi-pgbouncer 5432; do echo waiting for db; sleep 10; done;'] - {{ end }} containers: - name: pgstacbootstrap image: {{ .Values.pgstacBootstrap.image.name }}:{{ .Values.pgstacBootstrap.image.tag }} diff --git a/helm-chart/eoapi/templates/services/deployment.yaml b/helm-chart/eoapi/templates/services/deployment.yaml index e9eba222..25602129 100644 --- a/helm-chart/eoapi/templates/services/deployment.yaml +++ b/helm-chart/eoapi/templates/services/deployment.yaml @@ -9,9 +9,11 @@ metadata: app: {{ $serviceName }}-{{ $.Release.Name }} gitsha: {{ $.Values.gitSha }} name: {{ $serviceName }}-{{ $.Release.Name }} - {{- with index $v "annotations" }} + {{- if index $v "annotations" }} annotations: + {{- with index $v "annotations" }} {{- toYaml . | nindent 4 }} + {{- end }} {{- end }} spec: progressDeadlineSeconds: 600 @@ -30,19 +32,6 @@ spec: app: {{ $serviceName }}-{{ $.Release.Name }} spec: serviceAccountName: eoapi-sa-{{ $.Release.Name }} - initContainers: - - name: wait-for-pgstacbootstrap - image: bitnami/kubectl:latest - command: - - /bin/sh - - -c - - | - echo "Waiting for pgstacbootstrap job to complete..." - while ! kubectl -n {{ $.Release.Namespace }} wait --for=condition=complete job/pgstacbootstrap --timeout=5s; do - echo "pgstacbootstrap job not completed yet. Checking again in 10 seconds..." - sleep 10 - done - echo "pgstacbootstrap job completed successfully" containers: - image: {{ index $v "image" "name" }}:{{ index $v "image" "tag" }} name: {{ $serviceName }}