Skip to content

Commit

Permalink
test(e2e): add robot server setup
Browse files Browse the repository at this point in the history
This commit adds setup code to initialize & use Robot servers in e2e
tests.

It uses installimage with autosetup to prepare the server and then uses
k3sup similar to our existing setup to join the server into the
existing kubernetes cluster.

Right now it only supports joining Robot servers as worker nodes, not
for the control-plane.

Also, it only supports non-Private Network setups.
  • Loading branch information
apricote committed Nov 17, 2023
1 parent d26adcf commit 2576cd4
Show file tree
Hide file tree
Showing 11 changed files with 353 additions and 4 deletions.
121 changes: 118 additions & 3 deletions .github/workflows/test_e2e.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name: Run e2e tests
name: e2e tests
on:
pull_request: {}
push:
branches: [main]
jobs:
k3s:
name: k3s ${{ matrix.k3s }}
cloud:
name: Cloud ${{ matrix.k3s }}
permissions:
id-token: write
runs-on: ubuntu-latest
Expand Down Expand Up @@ -51,6 +51,7 @@ jobs:
# make exported env variables available to following jobs
echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV"
echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV"
echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV"
- name: Build and Deploy HCCM
run: |
Expand Down Expand Up @@ -102,3 +103,117 @@ jobs:
with:
name: debug-logs-${{ env.SCOPE }}
path: debug-logs/

robot:
name: Robot
permissions:
id-token: write

# Make sure that only one Job is using the server at a time
concurrency: robot-test-server
environment: e2e-robot

env:
K3S_CHANNEL: 1.28
SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-robot

# Disable routes in dev-env, not supported for Robot.
ROUTES_ENABLED: "false"
ROBOT_ENABLED: "true"
SERVER_NUMBER: ${{ vars.SERVER_NUMBER }}

runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v4
with:
go-version: "1.21"
- uses: actions/checkout@master
- uses: hetznercloud/tps-action@main
with:
token: ${{ secrets.HCLOUD_TOKEN }}
- uses: 3bit/setup-hcloud@v2
- uses: yokawasa/action-setup-kube-tools@v0.9.3
with:
setup-tools: |
helm
kubectl
skaffold
helm: v3.11.2
kubectl: v1.28.1
skaffold: v2.3.0
- name: Install k3sup
run: |
curl -sLS https://get.k3sup.dev | sh
- name: Setup test environment
run: |
source <(hack/dev-up.sh)
# make exported env variables available to following jobs
echo "KUBECONFIG=$KUBECONFIG" >> "$GITHUB_ENV"
echo "SKAFFOLD_DEFAULT_REPO=$SKAFFOLD_DEFAULT_REPO" >> "$GITHUB_ENV"
echo "CONTROL_IP=$CONTROL_IP" >> "$GITHUB_ENV"
- name: Build and Deploy HCCM
run: |
skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}"
tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}")
skaffold deploy \
--profile=robot \
--images=hetznercloud/hcloud-cloud-controller-manager=$tag
- name: Setup Robot Server
env:
ROBOT_USER: ${{ secrets.ROBOT_USER }}
ROBOT_PASSWORD: ${{ secrets.ROBOT_PASSWORD }}
working-directory: hack/robot-e2e
run: |
ansible-galaxy install -r requirements.yml
echo "::group::ansible-playbook e2e-setup-robot-server.yml"
ansible-playbook e2e-setup-robot-server.yml -e scope=$SCOPE -e server_number=$SERVER_NUMBER -vvv
echo "::endgroup::"
- name: Run tests
run: |
go test ./tests/e2e -tags e2e,robot -v -timeout 60m
- name: Download logs & events
if: always()
continue-on-error: true
run: |
mkdir debug-logs
kubectl logs \
--namespace kube-system \
--selector app.kubernetes.io/name=hcloud-cloud-controller-manager \
--all-containers \
--prefix=true \
--tail=-1 \
> debug-logs/hccm.log
kubectl get events \
--all-namespaces \
--sort-by=.firstTimestamp \
--output yaml \
> debug-logs/events.yaml
- name: Show HCCM Logs on Failure
if: failure()
continue-on-error: true
run: |
echo "::group::hccm.log"
cat debug-logs/hccm.log
echo "::endgroup::"
- name: Cleanup test environment
if: always()
continue-on-error: true
run: |
hack/dev-down.sh
- name: Persist debug artifacts
if: always()
continue-on-error: true
uses: actions/upload-artifact@v3
with:
name: debug-logs-${{ env.SCOPE }}
path: debug-logs/
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ alias kgp="kubectl get pods"
alias kgs="kubectl get services"
```

The test suite is split in three parts:

- **General Part**: Sets up the test env & checks if the HCCM Pod is properly running
- Build Tag: `e2e`
- **Cloud Part**: Tests regular functionality against a Cloud-only environment
- Build Tag: `e2e && !robot`
- **Robot Part**: Tests Robot functionality against a Cloud+Robot environment
- Build Tag: `e2e && robot`

## Local test setup
This repository provides [skaffold](https://skaffold.dev/) to easily deploy / debug this controller on demand

Expand Down
2 changes: 2 additions & 0 deletions hack/dev-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,10 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi
echo "Success - cluster fully initialized and ready, why not see for yourself?"
echo '$ kubectl get nodes'
kubectl get nodes
export CONTROL_IP=$(hcloud server ip "$scope_name-1")
} >&2

echo "export KUBECONFIG=$KUBECONFIG"
$SCRIPT_DIR/registry-port-forward.sh
echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666"
echo "export CONTROL_IP=$CONTROL_IP"
6 changes: 6 additions & 0 deletions hack/robot-e2e/ansible.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[defaults]
inventory = ${PWD}/inventory.yml
host_key_checking = False

[ssh_connection]
pipelining = True
6 changes: 6 additions & 0 deletions hack/robot-e2e/autosetup.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
HOSTNAME {{ server_name }}

DRIVE1 /dev/sda
PART / ext4 all

IMAGE /root/.oldroot/nfs/images/Ubuntu-2204-jammy-amd64-base.tar.gz
98 changes: 98 additions & 0 deletions hack/robot-e2e/e2e-setup-robot-server.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
---
- name: Prepare Reinstall
hosts: localhost
connection: local
gather_facts: false

vars:
scope: dev
# Additional SSH keys to add to the server for debugging. Must already exist in Robot.
authorized_keys: []

module_defaults:
group/community.hrobot.robot:
hetzner_user: "{{ lookup('ansible.builtin.env', 'ROBOT_USER') }}"
hetzner_password: "{{ lookup('ansible.builtin.env', 'ROBOT_PASSWORD') }}"

tasks:
- name: Get Server Info
community.hrobot.server_info:
server_number: "{{ server_number }}"
register: server_info

- name: Set Server Facts
ansible.builtin.set_fact:
server_ip: "{{ server_info.servers[0].server_ip }}"
server_name: "{{ server_info.servers[0].server_name }}"

- name: Create SSH Key
community.hrobot.ssh_key:
name: "hccm-{{ scope }}"
public_key: "{{ lookup('file', '../.ssh-{{ scope }}.pub') }}"
state: present
register: ssh_key

- name: Enable Rescue System
community.hrobot.boot:
server_number: "{{ server_number }}"
rescue:
authorized_keys: "{{ authorized_keys + [ ssh_key.fingerprint ] }}"
os: linux

- name: Reset Server (to get to Rescue System)
community.hrobot.reset:
server_number: "{{ server_number }}"
reset_type: hardware # only type that does not require a separate reset for starting again

- name: Wait for SSH
ansible.builtin.wait_for:
host: "{{ server_ip }}"
port: "{{ 22 }}"
search_regex: SSH

- name: Install OS to Server
hosts: all
gather_facts: false
tasks:
- name: Write autosetup
ansible.builtin.template:
src: autosetup.j2
dest: /autosetup
vars:
server_name: "{{ hostvars['localhost']['server_name'] }}"

- name: installimage
# -t => Take over rescue system SSH public keys
ansible.builtin.command: /root/.oldroot/nfs/install/installimage -t yes

- name: Reboot
ansible.builtin.reboot:

- name: Create k3s directory
ansible.builtin.file:
path: /etc/rancher/k3s
state: directory

- name: Prepare Local Registry
ansible.builtin.copy:
src: ../k3s-registries.yaml
dest: /etc/rancher/k3s/registries.yaml

- name: Join Kubernetes Cluster
hosts: localhost
connection: local
gather_facts: false
vars:
control_ip: "{{ lookup('ansible.builtin.env', 'CONTROL_IP') }}"
k3s_channel: stable
scope: dev

tasks:
- name: k3sup
ansible.builtin.command: >-
k3sup join
--server-ip={{ control_ip | ansible.builtin.mandatory }}
--ip={{ server_ip }}
--k3s-channel={{ k3s_channel }}
--k3s-extra-args="--kubelet-arg cloud-provider=external --node-label instance.hetzner.cloud/is-root-server=true"
--ssh-key ../.ssh-{{ scope }}
8 changes: 8 additions & 0 deletions hack/robot-e2e/inventory.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
all:
hosts:
# TODO: Dynamic inventory
hccm-test:
ansible_host: 142.132.203.104
ansible_user: root
ansible_ssh_private_key_file: ../.ssh-{{ scope }}
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
2 changes: 2 additions & 0 deletions hack/robot-e2e/requirements.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
collections:
- name: community.hrobot
2 changes: 1 addition & 1 deletion tests/e2e/cloud_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//go:build e2e
//go:build e2e && !robot

package e2e

Expand Down
93 changes: 93 additions & 0 deletions tests/e2e/robot_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//go:build e2e && robot

package e2e

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation"
"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/providerid"
)

func TestRobotClientIsAvailable(t *testing.T) {
assert.NotNil(t, testCluster.hrobot)
}

func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) {
t.Parallel()
ctx := context.Background()

// Get a random Robot server from all Nodes in the cluster
nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{
LabelSelector: "instance.hetzner.cloud/is-root-server=true",
})
assert.NoError(t, err)
assert.GreaterOrEqual(t, len(nodes.Items), 1)
node := nodes.Items[0]

// Parse the server number from the ProviderID
id, isCloudServer, err := providerid.ToServerID(node.Spec.ProviderID)
assert.NoError(t, err)
assert.False(t, isCloudServer)

// Get the server from the Robot API to cross-check Labels
server, err := testCluster.hrobot.ServerGet(int(id))
assert.NoError(t, err)

labels := node.Labels
expectedLabels := map[string]string{
"kubernetes.io/hostname": server.Name,
"kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
}
for expectedLabel, expectedValue := range expectedLabels {
assert.Equal(t, expectedValue, labels[expectedLabel], "node does not have expected label %s", expectedLabel)
}

expectedLabelsSet := []string{
"node.kubernetes.io/instance-type",
"topology.kubernetes.io/region",
"topology.kubernetes.io/zone",
}
for _, expectedLabel := range expectedLabelsSet {
_, ok := labels[expectedLabel]
assert.True(t, ok, "node is missing expected label %s", expectedLabel)
}

for _, address := range node.Status.Addresses {
if address.Type == corev1.NodeExternalIP {
expectedIP := server.ServerIP
assert.Equal(t, expectedIP, address.Address, "node has unexpected external ip")
}
}
}

func TestServiceLoadBalancersRobot(t *testing.T) {
t.Parallel()

lbTest := lbTestHelper{
t: t,
K8sClient: testCluster.k8sClient,
podName: "loadbalancer-robot-only",
}

pod := lbTest.DeployTestPod()

lbSvc := lbTest.ServiceDefinition(pod, map[string]string{
string(annotation.LBLocation): "nbg1",
// Only add the Robot server as a Load Balancer target
string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true",
})

lbSvc, err := lbTest.CreateService(lbSvc)
assert.NoError(t, err)

WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false)

lbTest.TearDown()
}

0 comments on commit 2576cd4

Please sign in to comment.