diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index 4e90eed66..0379944f5 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -1,11 +1,11 @@ -name: Run e2e tests +name: e2e tests on: pull_request: {} push: branches: [main] jobs: - k3s: - name: k3s ${{ matrix.k3s }} + cloud: + name: Cloud ${{ matrix.k3s }} permissions: id-token: write runs-on: ubuntu-latest @@ -51,6 +51,7 @@ jobs: # make exported env variables available to following jobs echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" - name: Build and Deploy HCCM run: | @@ -102,3 +103,127 @@ jobs: with: name: debug-logs-${{ env.SCOPE }} path: debug-logs/ + + robot: + name: Robot + permissions: + id-token: write + + # Make sure that only one Job is using the server at a time + concurrency: robot-test-server + environment: e2e-robot + + env: + K3S_CHANNEL: v1.28 + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot + + # Disable routes in dev-env, not supported for Robot. + ROUTES_ENABLED: "false" + ROBOT_ENABLED: "true" + SERVER_NUMBER: ${{ vars.SERVER_NUMBER }} + + runs-on: ubuntu-latest + steps: + - uses: actions/setup-go@v4 + with: + go-version: "1.21" + - uses: actions/checkout@master + - uses: hetznercloud/tps-action@main + with: + token: ${{ secrets.HCLOUD_TOKEN }} + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.3 + with: + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.28.1 + skaffold: v2.3.0 + - name: Install k3sup + run: | + curl -sLS https://get.k3sup.dev | sh + + - name: Setup test environment + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + run: | + source <(hack/dev-up.sh) + + # make exported env variables available to following jobs + echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV" + echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV" + echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV" + + - name: Build and Deploy HCCM + run: | + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy \ + --profile=robot \ + --images=hetznercloud/hcloud-cloud-controller-manager=$tag + + - name: Setup Robot Server + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + + # Nicer output + PY_COLORS: true + ANSIBLE_FORCE_COLOR: true + working-directory: hack/robot-e2e + run: | + ansible-galaxy install -r requirements.yml + echo "::group::ansible-playbook e2e-setup-robot-server.yml" + ansible-playbook e2e-setup-robot-server.yml -e scope=$SCOPE -e server_number=$SERVER_NUMBER -vvv + echo "::endgroup::" + + - name: Run tests + env: + ROBOT_USER: ${{ secrets.ROBOT_USER }} + ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }} + run: | + go test ./tests/e2e -tags e2e,robot -v -timeout 60m + + - name: Download logs & events + if: always() + continue-on-error: true + run: | + mkdir debug-logs + kubectl logs \ + --namespace kube-system \ + --selector app.kubernetes.io/name=hcloud-cloud-controller-manager \ + --all-containers \ + --prefix=true \ + --tail=-1 \ + > debug-logs/hccm.log + + kubectl get events \ + --all-namespaces \ + --sort-by=.firstTimestamp \ + --output yaml \ + > debug-logs/events.yaml + + - name: Show HCCM Logs on Failure + if: failure() + continue-on-error: true + run: | + echo "::group::hccm.log" + cat debug-logs/hccm.log + echo "::endgroup::" + + - name: Cleanup test environment + if: always() + continue-on-error: true + run: | + hack/dev-down.sh + + - name: Persist debug artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@v3 + with: + name: debug-logs-${{ env.SCOPE }} + path: debug-logs/ diff --git a/README.md b/README.md index 40f79f975..305a604e0 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,15 @@ alias kgp="kubectl get pods" alias kgs="kubectl get services" ``` +The test suite is split in three parts: + +- **General Part**: Sets up the test env & checks if the HCCM Pod is properly running + - Build Tag: `e2e` +- **Cloud Part**: Tests regular functionality against a Cloud-only environment + - Build Tag: `e2e && !robot` +- **Robot Part**: Tests Robot functionality against a Cloud+Robot environment + - Build Tag: `e2e && robot` + ## Local test setup This repository provides [skaffold](https://skaffold.dev/) to easily deploy / debug this controller on demand diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 77ea86dd3..b4596711f 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -182,8 +182,10 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi echo "Success - cluster fully initialized and ready, why not see for yourself?" echo '$ kubectl get nodes' kubectl get nodes + export CONTROL_IP=$(hcloud server ip "$scope_name-1") } >&2 echo "export KUBECONFIG=$KUBECONFIG" $SCRIPT_DIR/registry-port-forward.sh echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" +echo "export CONTROL_IP=$CONTROL_IP" diff --git a/hack/robot-e2e/ansible.cfg b/hack/robot-e2e/ansible.cfg new file mode 100644 index 000000000..0fdad75d3 --- /dev/null +++ b/hack/robot-e2e/ansible.cfg @@ -0,0 +1,7 @@ +[defaults] +inventory = ${PWD}/inventory.yml +host_key_checking = False +stdout_callback = community.general.yaml + +[ssh_connection] +pipelining = True diff --git a/hack/robot-e2e/autosetup.j2 b/hack/robot-e2e/autosetup.j2 new file mode 100644 index 000000000..3ec7cb285 --- /dev/null +++ b/hack/robot-e2e/autosetup.j2 @@ -0,0 +1,12 @@ +HOSTNAME {{ server_name }} + +DRIVE1 /dev/sda +DRIVE2 /dev/sdb + +SWRAID 1 +# We do not care at all about data consistency/availability, as we reprovision for every test run +SWRAIDLEVEL 0 + +PART / ext4 all + +IMAGE /root/.oldroot/nfs/images/Ubuntu-2204-jammy-amd64-base.tar.gz diff --git a/hack/robot-e2e/e2e-setup-robot-server.yml b/hack/robot-e2e/e2e-setup-robot-server.yml new file mode 100644 index 000000000..4414965d1 --- /dev/null +++ b/hack/robot-e2e/e2e-setup-robot-server.yml @@ -0,0 +1,101 @@ +--- +- name: Prepare Reinstall + hosts: localhost + connection: local + gather_facts: false + + vars: + scope: dev + # Additional SSH keys to add to the server for debugging. Must already exist in Robot. + authorized_keys: [] + + module_defaults: + group/community.hrobot.robot: + hetzner_user: "{{ lookup('ansible.builtin.env', 'ROBOT_USER') }}" + hetzner_password: "{{ lookup('ansible.builtin.env', 'ROBOT_PASSWORD') }}" + + tasks: + - name: Get Server Info + community.hrobot.server_info: + server_number: "{{ server_number }}" + register: server_info + + - name: Set Server Facts + ansible.builtin.set_fact: + server_ip: "{{ server_info.servers[0].server_ip }}" + server_name: "{{ server_info.servers[0].server_name }}" + + - name: Create SSH Key + community.hrobot.ssh_key: + name: "hccm-{{ scope }}" + public_key: "{{ lookup('file', '../.ssh-{{ scope }}.pub') }}" + state: present + register: ssh_key + + - name: Enable Rescue System + community.hrobot.boot: + server_number: "{{ server_number }}" + rescue: + authorized_keys: "{{ authorized_keys + [ ssh_key.fingerprint ] }}" + os: linux + + - name: Reset Server (to get to Rescue System) + community.hrobot.reset: + server_number: "{{ server_number }}" + reset_type: hardware # only type that does not require a separate reset for starting again + + - name: Wait for SSH + ansible.builtin.wait_for: + host: "{{ server_ip }}" + port: "{{ 22 }}" + search_regex: SSH + +- name: Install OS to Server + hosts: all + gather_facts: false + tasks: + - name: Write autosetup + ansible.builtin.template: + src: autosetup.j2 + dest: /autosetup + vars: + server_name: "{{ hostvars['localhost']['server_name'] }}" + + - name: installimage + # -t => Take over rescue system SSH public keys + ansible.builtin.command: /root/.oldroot/nfs/install/installimage -t yes + + - name: Reboot + ansible.builtin.reboot: + # 5 minutes should be enough for a reboot, and in case + # there is some issue, we can abort earlier. + reboot_timeout: 300 + + - name: Create k3s directory + ansible.builtin.file: + path: /etc/rancher/k3s + state: directory + + - name: Prepare Local Registry + ansible.builtin.copy: + src: ../k3s-registries.yaml + dest: /etc/rancher/k3s/registries.yaml + +- name: Join Kubernetes Cluster + hosts: localhost + connection: local + gather_facts: false + vars: + control_ip: "{{ lookup('ansible.builtin.env', 'CONTROL_IP') }}" + k3s_channel: stable + scope: dev + + tasks: + - name: k3sup + ansible.builtin.command: >- + k3sup join + --server-ip={{ control_ip | ansible.builtin.mandatory }} + --ip={{ server_ip }} + --k3s-channel={{ k3s_channel }} + --k3s-extra-args="--kubelet-arg cloud-provider=external --node-label instance.hetzner.cloud/is-root-server=true" + --ssh-key ../.ssh-{{ scope }} diff --git a/hack/robot-e2e/inventory.yml b/hack/robot-e2e/inventory.yml new file mode 100644 index 000000000..c3f8c2f02 --- /dev/null +++ b/hack/robot-e2e/inventory.yml @@ -0,0 +1,8 @@ +all: + hosts: + # TODO: Dynamic inventory + hccm-test: + ansible_host: 142.132.203.104 + ansible_user: root + ansible_ssh_private_key_file: ../.ssh-{{ scope }} + ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' diff --git a/hack/robot-e2e/requirements.yml b/hack/robot-e2e/requirements.yml new file mode 100644 index 000000000..d2df60aa8 --- /dev/null +++ b/hack/robot-e2e/requirements.yml @@ -0,0 +1,3 @@ +collections: + - name: community.hrobot + - name: community.general \ No newline at end of file diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index d18ab167f..ac2b7fdf6 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -1,4 +1,4 @@ -//go:build e2e +//go:build e2e && !robot package e2e diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go new file mode 100644 index 000000000..58de11015 --- /dev/null +++ b/tests/e2e/robot_test.go @@ -0,0 +1,93 @@ +//go:build e2e && robot + +package e2e + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation" + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid" +) + +func TestRobotClientIsAvailable(t *testing.T) { + assert.NotNil(t, testCluster.hrobot) +} + +func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { + t.Parallel() + ctx := context.Background() + + // Get a random Robot server from all Nodes in the cluster + nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "instance.hetzner.cloud/is-root-server=true", + }) + assert.NoError(t, err) + assert.GreaterOrEqual(t, len(nodes.Items), 1) + node := nodes.Items[0] + + // Parse the server number from the ProviderID + id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID) + assert.NoError(t, err) + assert.False(t, isCloudServer) + + // Get the server from the Robot API to cross-check Labels + server, err := testCluster.hrobot.ServerGet(int(id)) + assert.NoError(t, err) + + labels := node.Labels + expectedLabels := map[string]string{ + "kubernetes.io/hostname": server.Name, + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + } + for expectedLabel, expectedValue := range expectedLabels { + assert.Equal(t, expectedValue, labels[expectedLabel], "node does not have expected label %s", expectedLabel) + } + + expectedLabelsSet := []string{ + "node.kubernetes.io/instance-type", + "topology.kubernetes.io/region", + "topology.kubernetes.io/zone", + } + for _, expectedLabel := range expectedLabelsSet { + _, ok := labels[expectedLabel] + assert.True(t, ok, "node is missing expected label %s", expectedLabel) + } + + for _, address := range node.Status.Addresses { + if address.Type == corev1.NodeExternalIP { + expectedIP := server.ServerIP + assert.Equal(t, expectedIP, address.Address, "node has unexpected external ip") + } + } +} + +func TestServiceLoadBalancersRobot(t *testing.T) { + t.Parallel() + + lbTest := lbTestHelper{ + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-robot-only", + } + + pod := lbTest.DeployTestPod() + + lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ + string(annotation.LBLocation): "nbg1", + // Only add the Robot server as a Load Balancer target + string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true", + }) + + lbSvc, err := lbTest.CreateService(lbSvc) + assert.NoError(t, err) + + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + + lbTest.TearDown() +} diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index ddf02a4dc..6bc232351 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -14,6 +14,7 @@ import ( "testing" "time" + hrobot "github.com/syself/hrobot-go" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,6 +36,7 @@ func init() { type TestCluster struct { hcloud *hcloud.Client + hrobot hrobot.RobotClient k8sClient *kubernetes.Clientset certificates []*hcloud.Certificate scope string @@ -48,6 +50,7 @@ func (tc *TestCluster) Start() error { } tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") + // Hetzner Cloud Client token := os.Getenv("HCLOUD_TOKEN") if token == "" { buf, err := os.ReadFile(fmt.Sprintf("../../hack/.token-%s", tc.scope)) @@ -68,6 +71,13 @@ func (tc *TestCluster) Start() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient + // Hetzner Robot Client + if enabled := os.Getenv("ROBOT_ENABLED"); enabled == "true" { + robotUser := os.Getenv("ROBOT_USER") + robotPassword := os.Getenv("ROBOT_PASSWORD") + tc.hrobot = hrobot.NewBasicAuthClient(robotUser, robotPassword) + } + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-"+tc.scope) if err != nil { return err