From 2576cd4ade9151722b0fa025e2a09b25a17567bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20T=C3=B6lle?= Date: Tue, 10 Oct 2023 10:00:46 +0200 Subject: [PATCH] test(e2e): add robot server setup This commit adds setup code to initialize & use Robot servers in e2e tests. It uses installimage with autosetup to prepare the server and then uses k3sup similar to our existing setup to join the server into the existing kubernetes cluster. Right now it only supports joining Robot servers as worker nodes, not for the control-plane. Also, it only supports non-Private Network setups. --- .github/workflows/test_e2e.yml | 121 +++++++++++++++++++++- README.md | 9 ++ hack/dev-up.sh | 2 + hack/robot-e2e/ansible.cfg | 6 ++ hack/robot-e2e/autosetup.j2 | 6 ++ hack/robot-e2e/e2e-setup-robot-server.yml | 98 ++++++++++++++++++ hack/robot-e2e/inventory.yml | 8 ++ hack/robot-e2e/requirements.yml | 2 + tests/e2e/cloud_test.go | 2 +- tests/e2e/robot_test.go | 93 +++++++++++++++++ tests/e2e/testing.go | 10 ++ 11 files changed, 353 insertions(+), 4 deletions(-) create mode 100644 hack/robot-e2e/ansible.cfg create mode 100644 hack/robot-e2e/autosetup.j2 create mode 100644 hack/robot-e2e/e2e-setup-robot-server.yml create mode 100644 hack/robot-e2e/inventory.yml create mode 100644 hack/robot-e2e/requirements.yml create mode 100644 tests/e2e/robot_test.go diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index 4e90eed66..fe3c0d8be 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -1,11 +1,11 @@ -name: Run e2e tests +name: e2e tests on: pull_request: {} push: branches: [main] jobs: - k3s: - name: k3s ${{ matrix.k3s }} + cloud: + name: Cloud ${{ matrix.k3s }} permissions: id-token: write runs-on: ubuntu-latest @@ -51,6 +51,7 @@ jobs: # make exported env variables available to following jobs echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" - name: Build and Deploy HCCM run: | @@ -102,3 +103,117 @@ jobs: with: name: debug-logs-${{ env.SCOPE }} path: debug-logs/ + + robot: + name: Robot + permissions: + id-token: write + + # Make sure that only one Job is using the server at a time + concurrency: robot-test-server + environment: e2e-robot + + env: + K3S_CHANNEL: 1.28 + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot + + # Disable routes in dev-env, not supported for Robot. + ROUTES_ENABLED: "false" + ROBOT_ENABLED: "true" + SERVER_NUMBER: ${{ vars.SERVER_NUMBER }} + + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v4 + with: + go-version: "1.21" + - uses: actions/checkout@master + - uses: hetznercloud/tps-action@main + with: + token: ${{ secrets.HCLOUD_TOKEN }} + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.3 + with: + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.28.1 + skaffold: v2.3.0 + - name: Install k3sup + run: | + curl -sLS https://get.k3sup.dev | sh + + - name: Setup test environment + run: | + source <(hack/dev-up.sh) + + # make exported env variables available to following jobs + echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" + echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" + + - name: Build and Deploy HCCM + run: | + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy \ + --profile=robot \ + --images=hetznercloud/hcloud-cloud-controller-manager=$tag + + - name: Setup Robot Server + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + working-directory: hack/robot-e2e + run: | + ansible-galaxy install -r requirements.yml + echo "::group::ansible-playbook e2e-setup-robot-server.yml" + ansible-playbook e2e-setup-robot-server.yml -e scope=$SCOPE -e server_number=$SERVER_NUMBER -vvv + echo "::endgroup::" + + - name: Run tests + run: | + go test ./tests/e2e -tags e2e,robot -v -timeout 60m + + - name: Download logs & events + if: always() + continue-on-error: true + run: | + mkdir debug-logs + kubectl logs \ + --namespace kube-system \ + --selector app.kubernetes.io/name=hcloud-cloud-controller-manager \ + --all-containers \ + --prefix=true \ + --tail=-1 \ + > debug-logs/hccm.log + + kubectl get events \ + --all-namespaces \ + --sort-by=.firstTimestamp \ + --output yaml \ + > debug-logs/events.yaml + + - name: Show HCCM Logs on Failure + if: failure() + continue-on-error: true + run: | + echo "::group::hccm.log" + cat debug-logs/hccm.log + echo "::endgroup::" + + - name: Cleanup test environment + if: always() + continue-on-error: true + run: | + hack/dev-down.sh + + - name: Persist debug artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: debug-logs-${{ env.SCOPE }} + path: debug-logs/ diff --git a/README.md b/README.md index 40f79f975..305a604e0 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,15 @@ alias kgp="kubectl get pods" alias kgs="kubectl get services" ``` +The test suite is split in three parts: + +- **General Part**: Sets up the test env & checks if the HCCM Pod is properly running + - Build Tag: `e2e` +- **Cloud Part**: Tests regular functionality against a Cloud-only environment + - Build Tag: `e2e && !robot` +- **Robot Part**: Tests Robot functionality against a Cloud+Robot environment + - Build Tag: `e2e && robot` + ## Local test setup This repository provides [skaffold](https://skaffold.dev/) to easily deploy / debug this controller on demand diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 77ea86dd3..b4596711f 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -182,8 +182,10 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi echo "Success - cluster fully initialized and ready, why not see for yourself?" echo '$ kubectl get nodes' kubectl get nodes + export CONTROL_IP=$(hcloud server ip "$scope_name-1") } >&2 echo "export KUBECONFIG=$KUBECONFIG" $SCRIPT_DIR/registry-port-forward.sh echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" +echo "export CONTROL_IP=$CONTROL_IP" diff --git a/hack/robot-e2e/ansible.cfg b/hack/robot-e2e/ansible.cfg new file mode 100644 index 000000000..fa8ced0c8 --- /dev/null +++ b/hack/robot-e2e/ansible.cfg @@ -0,0 +1,6 @@ +[defaults] +inventory = ${PWD}/inventory.yml +host_key_checking = False + +[ssh_connection] +pipelining = True diff --git a/hack/robot-e2e/autosetup.j2 b/hack/robot-e2e/autosetup.j2 new file mode 100644 index 000000000..6d3e7ae32 --- /dev/null +++ b/hack/robot-e2e/autosetup.j2 @@ -0,0 +1,6 @@ +HOSTNAME {{ server_name }} + +DRIVE1 /dev/sda +PART / ext4 all + +IMAGE /root/.oldroot/nfs/images/Ubuntu-2204-jammy-amd64-base.tar.gz diff --git a/hack/robot-e2e/e2e-setup-robot-server.yml b/hack/robot-e2e/e2e-setup-robot-server.yml new file mode 100644 index 000000000..a65254355 --- /dev/null +++ b/hack/robot-e2e/e2e-setup-robot-server.yml @@ -0,0 +1,98 @@ +--- +- name: Prepare Reinstall + hosts: localhost + connection: local + gather_facts: false + + vars: + scope: dev + # Additional SSH keys to add to the server for debugging. Must already exist in Robot. + authorized_keys: [] + + module_defaults: + group/community.hrobot.robot: + hetzner_user: "{{ lookup('ansible.builtin.env', 'ROBOT_USER') }}" + hetzner_password: "{{ lookup('ansible.builtin.env', 'ROBOT_PASSWORD') }}" + + tasks: + - name: Get Server Info + community.hrobot.server_info: + server_number: "{{ server_number }}" + register: server_info + + - name: Set Server Facts + ansible.builtin.set_fact: + server_ip: "{{ server_info.servers[0].server_ip }}" + server_name: "{{ server_info.servers[0].server_name }}" + + - name: Create SSH Key + community.hrobot.ssh_key: + name: "hccm-{{ scope }}" + public_key: "{{ lookup('file', '../.ssh-{{ scope }}.pub') }}" + state: present + register: ssh_key + + - name: Enable Rescue System + community.hrobot.boot: + server_number: "{{ server_number }}" + rescue: + authorized_keys: "{{ authorized_keys + [ ssh_key.fingerprint ] }}" + os: linux + + - name: Reset Server (to get to Rescue System) + community.hrobot.reset: + server_number: "{{ server_number }}" + reset_type: hardware # only type that does not require a separate reset for starting again + + - name: Wait for SSH + ansible.builtin.wait_for: + host: "{{ server_ip }}" + port: "{{ 22 }}" + search_regex: SSH + +- name: Install OS to Server + hosts: all + gather_facts: false + tasks: + - name: Write autosetup + ansible.builtin.template: + src: autosetup.j2 + dest: /autosetup + vars: + server_name: "{{ hostvars['localhost']['server_name'] }}" + + - name: installimage + # -t => Take over rescue system SSH public keys + ansible.builtin.command: /root/.oldroot/nfs/install/installimage -t yes + + - name: Reboot + ansible.builtin.reboot: + + - name: Create k3s directory + ansible.builtin.file: + path: /etc/rancher/k3s + state: directory + + - name: Prepare Local Registry + ansible.builtin.copy: + src: ../k3s-registries.yaml + dest: /etc/rancher/k3s/registries.yaml + +- name: Join Kubernetes Cluster + hosts: localhost + connection: local + gather_facts: false + vars: + control_ip: "{{ lookup('ansible.builtin.env', 'CONTROL_IP') }}" + k3s_channel: stable + scope: dev + + tasks: + - name: k3sup + ansible.builtin.command: >- + k3sup join + --server-ip={{ control_ip | ansible.builtin.mandatory }} + --ip={{ server_ip }} + --k3s-channel={{ k3s_channel }} + --k3s-extra-args="--kubelet-arg cloud-provider=external --node-label instance.hetzner.cloud/is-root-server=true" + --ssh-key ../.ssh-{{ scope }} diff --git a/hack/robot-e2e/inventory.yml b/hack/robot-e2e/inventory.yml new file mode 100644 index 000000000..c3f8c2f02 --- /dev/null +++ b/hack/robot-e2e/inventory.yml @@ -0,0 +1,8 @@ +all: + hosts: + # TODO: Dynamic inventory + hccm-test: + ansible_host: 142.132.203.104 + ansible_user: root + ansible_ssh_private_key_file: ../.ssh-{{ scope }} + ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' diff --git a/hack/robot-e2e/requirements.yml b/hack/robot-e2e/requirements.yml new file mode 100644 index 000000000..6cc860776 --- /dev/null +++ b/hack/robot-e2e/requirements.yml @@ -0,0 +1,2 @@ +collections: + - name: community.hrobot diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index d18ab167f..ac2b7fdf6 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -1,4 +1,4 @@ -//go:build e2e +//go:build e2e && !robot package e2e diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go new file mode 100644 index 000000000..58de11015 --- /dev/null +++ b/tests/e2e/robot_test.go @@ -0,0 +1,93 @@ +//go:build e2e && robot + +package e2e + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" +) + +func TestRobotClientIsAvailable(t *testing.T) { + assert.NotNil(t, testCluster.hrobot) +} + +func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { + t.Parallel() + ctx := context.Background() + + // Get a random Robot server from all Nodes in the cluster + nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "instance.hetzner.cloud/is-root-server=true", + }) + assert.NoError(t, err) + assert.GreaterOrEqual(t, len(nodes.Items), 1) + node := nodes.Items[0] + + // Parse the server number from the ProviderID + id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) + assert.NoError(t, err) + assert.False(t, isCloudServer) + + // Get the server from the Robot API to cross-check Labels + server, err := testCluster.hrobot.ServerGet(int(id)) + assert.NoError(t, err) + + labels := node.Labels + expectedLabels := map[string]string{ + "kubernetes.io/hostname": server.Name, + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + } + for expectedLabel, expectedValue := range expectedLabels { + assert.Equal(t, expectedValue, labels[expectedLabel], "node does not have expected label %s", expectedLabel) + } + + expectedLabelsSet := []string{ + "node.kubernetes.io/instance-type", + "topology.kubernetes.io/region", + "topology.kubernetes.io/zone", + } + for _, expectedLabel := range expectedLabelsSet { + _, ok := labels[expectedLabel] + assert.True(t, ok, "node is missing expected label %s", expectedLabel) + } + + for _, address := range node.Status.Addresses { + if address.Type == corev1.NodeExternalIP { + expectedIP := server.ServerIP + assert.Equal(t, expectedIP, address.Address, "node has unexpected external ip") + } + } +} + +func TestServiceLoadBalancersRobot(t *testing.T) { + t.Parallel() + + lbTest := lbTestHelper{ + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-robot-only", + } + + pod := lbTest.DeployTestPod() + + lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ + string(annotation.LBLocation): "nbg1", + // Only add the Robot server as a Load Balancer target + string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true", + }) + + lbSvc, err := lbTest.CreateService(lbSvc) + assert.NoError(t, err) + + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + + lbTest.TearDown() +} diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index ddf02a4dc..6bc232351 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -14,6 +14,7 @@ import ( "testing" "time" + hrobot "github.com/syself/hrobot-go" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,6 +36,7 @@ func init() { type TestCluster struct { hcloud *hcloud.Client + hrobot hrobot.RobotClient k8sClient *kubernetes.Clientset certificates []*hcloud.Certificate scope string @@ -48,6 +50,7 @@ func (tc *TestCluster) Start() error { } tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") + // Hetzner Cloud Client token := os.Getenv("HCLOUD_TOKEN") if token == "" { buf, err := os.ReadFile(fmt.Sprintf("../../hack/.token-%s", tc.scope)) @@ -68,6 +71,13 @@ func (tc *TestCluster) Start() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient + // Hetzner Robot Client + if enabled := os.Getenv("ROBOT_ENABLED"); enabled == "true" { + robotUser := os.Getenv("ROBOT_USER") + robotPassword := os.Getenv("ROBOT_PASSWORD") + tc.hrobot = hrobot.NewBasicAuthClient(robotUser, robotPassword) + } + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-"+tc.scope) if err != nil { return err