Skip to content

Commit

Permalink
contrib/kind: enable XDP_TX from pod veth
Browse files Browse the repository at this point in the history
Both veth in a pair require an XDP program installed for XDP_TX
to work. Since the host side veth created by kind doesn't have
an XDP program attached we can't run any tests in CI that require
XDP_TX.

The workaround itself is just an ip link set and ethtool away,
the problem is figuring out which interfaces we need to do the
magic to.

Use the approach used by kind-network-plugins and create our own
docker network with a specific name for the bridge device. We
can then iterate all children of the bridge and do our fixups.

We tell kind to use our own network by setting the (undocumented?)
KIND_EXPERIMENTAL_DOCKER_NETWORK environment variable.

See https://github.com/aojea/kind-networking-plugins

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
  • Loading branch information
lmb committed Mar 20, 2023
1 parent 3fcaa50 commit 19d3892
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 9 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/conformance-datapath-v1.13.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,15 +286,15 @@ jobs:
provision: 'false'
cmd: |
cd /host/
./contrib/scripts/kind.sh "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./contrib/scripts/kind.sh --xdp "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./cilium-cli install ${{ steps.vars.outputs.cilium_install_defaults }}
./cilium-cli status --wait
./cilium-cli connectivity test --datapath --collect-sysdump-on-failure \
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>"
./cilium-cli connectivity test --collect-sysdump-on-failure \
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>"
kind delete cluster
./contrib/scripts/kind-down.sh --keep-registry
- name: Run encryption tests
if: ${{ matrix.encryption != 'disabled' }}
Expand All @@ -303,7 +303,7 @@ jobs:
provision: 'false'
cmd: |
cd /host/
./contrib/scripts/kind.sh "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./contrib/scripts/kind.sh --xdp "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./cilium-cli install ${{ steps.vars.outputs.cilium_install_defaults }} \
--encryption=${{ matrix.encryption }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/conformance-datapath.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ jobs:
provision: 'false'
cmd: |
cd /host/
./contrib/scripts/kind.sh "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./contrib/scripts/kind.sh --xdp "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"
./cilium-cli install ${{ steps.vars.outputs.cilium_install_defaults }}
./cilium-cli status --wait
Expand All @@ -389,7 +389,7 @@ jobs:
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>"
./cilium-cli connectivity test --collect-sysdump-on-failure \
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>"
kind delete cluster
./contrib/scripts/kind-down.sh --keep-registry
- name: Fetch artifacts
if: ${{ !success() }}
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,6 @@ images/*/Dockerfile.dockerignore

# Clangd cache for indexed bpf code
bpf/.cache

# Include dummy bpf object necessary for XDP_TX
!test/l4lb/bpf_xdp_veth_host.o
8 changes: 6 additions & 2 deletions contrib/scripts/kind-down.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ if ! have_kind; then
echo " https://kind.sigs.k8s.io/docs/user/quick-start/#installation"
fi

if [ "${1:-}" != "--keep-registry" ]; then
docker kill kind-registry && \
docker rm kind-registry
fi

kind delete clusters kind && \
docker kill kind-registry && \
docker rm kind-registry
docker network rm kind-cilium
46 changes: 44 additions & 2 deletions contrib/scripts/kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,17 @@ default_pod_subnet=""
default_service_subnet=""
default_agent_port_prefix="234"
default_operator_port_prefix="235"
default_network="kind-cilium"

PROG=${0}

xdp=false
if [ "${1:-}" = "--xdp" ]; then
xdp=true
shift
fi
readonly xdp

controlplanes="${1:-${CONTROLPLANES:=${default_controlplanes}}}"
workers="${2:-${WORKERS:=${default_workers}}}"
cluster_name="${3:-${CLUSTER_NAME:=${default_cluster_name}}}"
Expand All @@ -27,10 +36,13 @@ pod_subnet="${PODSUBNET:=${default_pod_subnet}}"
service_subnet="${SERVICESUBNET:=${default_service_subnet}}"
agent_port_prefix="${AGENTPORTPREFIX:=${default_agent_port_prefix}}"
operator_port_prefix="${OPERATORPORTPREFIX:=${default_operator_port_prefix}}"

bridge_dev="br-${default_network}"
v6_prefix="fc00:c111::/64"
CILIUM_ROOT="$(git rev-parse --show-toplevel)"

usage() {
echo "Usage: ${PROG} [control-plane node count] [worker node count] [cluster-name] [node image] [kube-proxy mode] [ip-family]"
echo "Usage: ${PROG} [--xdp] [control-plane node count] [worker node count] [cluster-name] [node image] [kube-proxy mode] [ip-family]"
}

have_kind() {
Expand Down Expand Up @@ -117,6 +129,16 @@ workers() {
done
}

# create a custom network so we can control the name of the bridge device.
# Inspired by https://github.com/kubernetes-sigs/kind/blob/6b58c9dfcbdb1b3a0d48754d043d59ca7073589b/pkg/cluster/internal/providers/docker/network.go#L149-L161
docker network create -d=bridge \
-o "com.docker.network.bridge.enable_ip_masquerade=true" \
-o "com.docker.network.bridge.name=${bridge_dev}" \
--ipv6 --subnet "${v6_prefix}" \
"${default_network}"

export KIND_EXPERIMENTAL_DOCKER_NETWORK="${default_network}"

# create a cluster with the local registry enabled in containerd
cat <<EOF | ${kind_cmd} --config=-
kind: Cluster
Expand Down Expand Up @@ -144,7 +166,27 @@ containerdConfigPatches:
endpoint = ["http://${reg_name}:${reg_port}"]
EOF

docker network connect "kind" "${reg_name}" || true
if [ "${xdp}" = true ]; then
if ! [ -f "${CILIUM_ROOT}/test/l4lb/bpf_xdp_veth_host.o" ]; then
pushd "${CILIUM_ROOT}/test/l4lb/" > /dev/null
clang -O2 -Wall -target bpf -c bpf_xdp_veth_host.c -o bpf_xdp_veth_host.o
popd > /dev/null
fi

for ifc in /sys/class/net/"${bridge_dev}"/brif/*; do
ifc="${ifc#"/sys/class/net/${bridge_dev}/brif/"}"

# Attach a dummy XDP prog to the host side of the veth so that XDP_TX in the
# pod side works.
sudo ip link set dev "${ifc}" xdp obj "${CILIUM_ROOT}/test/l4lb/bpf_xdp_veth_host.o"

# Disable TX and RX csum offloading, as veth does not support it. Otherwise,
# the forwarded packets by the LB to the worker node will have invalid csums.
sudo ethtool -K "${ifc}" rx off tx off > /dev/null
done
fi

docker network connect "${default_network}" "${reg_name}" || true

for node in $(kubectl get nodes --no-headers -o custom-columns=:.metadata.name); do
kubectl annotate node "${node}" "kind.x-k8s.io/registry=localhost:${reg_port}";
Expand Down
Binary file added test/l4lb/bpf_xdp_veth_host.o
Binary file not shown.

0 comments on commit 19d3892

Please sign in to comment.