Skip to content

Conformance External Workloads (ci-external-workloads) #3527

Conformance External Workloads (ci-external-workloads)

Conformance External Workloads (ci-external-workloads) #3527

name: Conformance External Workloads (ci-external-workloads)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run every 6 hours
schedule:
- cron: '0 4/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
vmName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}-vm
vmStartupScript: .github/gcp-vm-startup.sh
cilium_cli_ci_version:
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
commit-status-start:
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
generate-matrix:
name: Generate Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Convert YAML to JSON
run: |
work_dir=".github/actions/gke"
destination_directory="/tmp/generated/gke"
mkdir -p "${destination_directory}"
yq -o=json ${work_dir}/k8s-versions.yaml | jq . > "${destination_directory}/gke.json"
- name: Generate Matrix
id: set-matrix
run: |
cd /tmp/generated/gke
# Use complete matrix in case of scheduled run
# main -> event_name = schedule
# other stable branches -> PR-number starting with v (e.g. v1.14)
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
jq '{ "include": [ .k8s[] ] }' gke.json > /tmp/matrix.json
else
jq '{ "include": [ .k8s[] | select(.default) ] }' gke.json > /tmp/matrix.json
fi
echo "Generated matrix:"
cat /tmp/matrix.json
echo "matrix=$(jq -c . < /tmp/matrix.json)" >> $GITHUB_OUTPUT
installation-and-connectivity:
name: Installation and Connectivity Test
needs: generate-matrix
runs-on: ubuntu-latest
timeout-minutes: 45
env:
job_name: "Installation and Connectivity Test"
preemptible: ${{ github.event_name != 'schedule' && '--preemptible' || '' }}
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ inputs.SHA }}
chart-dir: ./untrusted/install/kubernetes/cilium
- name: Set up job variables
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
OWNER="${{ inputs.PR-number }}"
else
OWNER="${{ github.ref_name }}"
OWNER="${OWNER/./-}"
fi
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set cluster.name=${{ env.clusterName }} \
--datapath-mode=tunnel \
--helm-set kubeProxyReplacement=true"
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure \
--external-target google.com --external-cidr 8.0.0.0/8 --external-ip 8.8.4.4 --external-other-ip 8.8.8.8"
# Explicitly specify LoadBalancer service type since the default type is NodePort in Helm mode.
# Ref: https://github.com/cilium/cilium-cli/pull/1527#discussion_r1177244379
#
# In Helm mode, externalWorkloads.enabled is set to false by default. You need to pass
# --enable-external-workloads flag to enable it.
# Ref: https://github.com/cilium/cilium/pull/25259
CLUSTERMESH_ENABLE_DEFAULTS="--service-type LoadBalancer --enable-external-workloads"
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo clustermesh_enable_defaults=${CLUSTERMESH_ENABLE_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@ca0d5a23b842d78ce3b5c79a80d546867493daf7 # v0.16.0
with:
repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
release-version: ${{ env.CILIUM_CLI_VERSION }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Set up gcloud credentials
id: 'auth'
uses: google-github-actions/auth@55bd3a7c6e2ae7cf1877fd1ccb9d54c0503c457c # v2.1.2
with:
workload_identity_provider: ${{ secrets.GCP_PR_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_PR_SA }}
create_credentials_file: true
export_environment_variables: true
- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@98ddc00a17442e89a24bbf282954a3b65ce6d200 # v2.1.0
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
version: "405.0.0"
- name: Install gke-gcloud-auth-plugin
run: |
gcloud components install gke-gcloud-auth-plugin
- name: Display gcloud CLI info
run: |
gcloud info
- name: Create GCP VM
uses: nick-invision/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
with:
retry_on: error
timeout_minutes: 1
max_attempts: 10
command: |
gcloud compute instances create ${{ env.vmName }}-${{ matrix.vmIndex }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ matrix.zone }} \
--machine-type e2-custom-2-4096 \
--boot-disk-type pd-standard \
--boot-disk-size 10GB \
${{ env.preemptible }} \
--image-project ubuntu-os-cloud \
--image-family ubuntu-2004-lts \
--metadata hostname=${{ env.vmName }}-${{ matrix.vmIndex }} \
--metadata-from-file startup-script=${{ env.vmStartupScript}}
- name: Create GKE cluster
run: |
gcloud container clusters create ${{ env.clusterName }} \
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \
--zone ${{ matrix.zone }} \
--cluster-version ${{ matrix.version }} \
--enable-ip-alias \
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \
--cluster-ipv4-cidr="/21" \
--services-ipv4-cidr="/24" \
--image-type COS_CONTAINERD \
--num-nodes 2 \
--machine-type e2-custom-2-4096 \
--disk-type pd-standard \
--disk-size 20GB \
${{ env.preemptible }}
- name: Get cluster credentials
run: |
gcloud container clusters get-credentials ${{ env.clusterName }} --zone ${{ matrix.zone }}
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-generic-ci hubble-relay-ci clustermesh-apiserver-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Install Cilium in cluster
id: install-cilium
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Enable cluster mesh
run: |
cilium clustermesh enable ${{ steps.vars.outputs.clustermesh_enable_defaults }}
- name: Wait for cluster mesh status to be ready
run: |
cilium clustermesh status --wait
- name: Add VM to cluster mesh
run: |
cilium clustermesh vm create ${{ env.vmName }}-${{ matrix.vmIndex }} -n default --ipv4-alloc-cidr 10.192.1.0/30
cilium clustermesh vm status
- name: Install Cilium on VM
run: |
cilium clustermesh vm install install-external-workload.sh --config debug
gcloud compute scp install-external-workload.sh ${{ env.vmName }}-${{ matrix.vmIndex }}:~/ --zone ${{ matrix.zone }}
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "~/install-external-workload.sh"
sleep 5s
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "sudo cilium-dbg status"
- name: Verify cluster DNS on VM
# Limit nslookup to the first (global) DNS server setting
run: |
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "nslookup -d2 -retry=10 -timeout=5 -norecurse clustermesh-apiserver.kube-system.svc.cluster.local \$(systemd-resolve --status | grep -m 1 \"Current DNS Server:\" | cut -d':' -f2)"
- name: Ping clustermesh-apiserver from VM
run: |
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} \
--command "ping -c 3 \$(sudo cilium-dbg service list get -o jsonpath='{[?(@.spec.flags.name==\"clustermesh-apiserver\")].spec.backend-addresses[0].ip}')"
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() && steps.install-cilium.outcome != 'skipped' }}
run: |
kubectl get pods --all-namespaces -o wide
kubectl get cew --all-namespaces -o wide
kubectl get cep --all-namespaces -o wide
cilium status
cilium clustermesh status
cilium clustermesh vm status
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo cilium status"
gcloud compute ssh ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --command "sudo docker logs cilium --timestamps"
cilium sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Clean up GKE cluster and VM
if: ${{ always() }}
run: |
while [ "$(gcloud container operations list --zone ${{ matrix.zone }} --filter="status=RUNNING AND targetLink~${{ env.clusterName }}" --format="value(name)")" ];do
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15
done
gcloud container clusters delete ${{ env.clusterName }} --zone ${{ matrix.zone }} --quiet --async
gcloud compute instances delete ${{ env.vmName }}-${{ matrix.vmIndex }} --zone ${{ matrix.zone }} --quiet
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: cilium-sysdumps-${{ matrix.vmIndex }}
path: cilium-sysdump-*.zip
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: cilium-junits-${{ matrix.vmIndex }}
path: cilium-junits/*.xml
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
merge-upload:
if: ${{ always() }}
name: Merge and Upload Artifacts
runs-on: ubuntu-latest
needs: installation-and-connectivity
steps:
- name: Merge Sysdumps
if: ${{ needs.installation-and-connectivity.result == 'failure' }}
uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: cilium-sysdumps
pattern: cilium-sysdumps-*
retention-days: 5
delete-merged: true
continue-on-error: true
- name: Merge JUnits
uses: actions/upload-artifact/merge@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: cilium-junits
pattern: cilium-junits-*
retention-days: 5
delete-merged: true
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}