Skip to content

Commit

Permalink
Fix Kind support (Linux hosts only)
Browse files Browse the repository at this point in the history
The following changes were required:
 * Disable TX HW checksum offload in containers. This is done in the
   Antrea CNI server when setting-up Pod networking, using an ioctl
   ethtool system call.
 * Disable TX HW checksum offload in the Linux host for the veth
   interface of each Kind Node. This must be done by invoking an
   additional script (hack/kind_linux.sh) after creating the Kind
   cluster.
 * Create a secondary br-phy bridge on each Node, as required by OVS
   userspace tunneling.
 * Use a new version of start_ovs (start_ovs_netdev) which modifies the
   ovs-ctl script in-place to avoid loading the kernel module.

Refer to #14 for the rationale for all the above bullet points.

A new test "provider" was added to the e2e test framework so that all
the e2e tests can be run on Kind clusters. As part of this, some
changes to the framework had to be performed. For example it is
impractical to run SSH commands on Kind Nodes - as they do not have an
SSH server - so instead we use "docker exec".

Fixes #14
Fixes #13
  • Loading branch information
antoninbas committed Nov 27, 2019
1 parent e34cc0e commit c7c497d
Show file tree
Hide file tree
Showing 22 changed files with 507 additions and 50 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/kind.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Kind
on:
pull_request:
branches:
- master
- release-*
jobs:
test-unit:
name: E2e tests on a Kind cluster on Linux
runs-on: [ubuntu-18.04]
steps:
- uses: actions/checkout@v1
- uses: actions/setup-go@v1
with:
go-version: 1.12
- name: Build Antrea image
run: make
- name: Install Kind
env:
KIND_VERSION: v0.6.0
run: |
curl -Lo ./kind https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-$(uname)-amd64
chmod +x ./kind
sudo mv kind /usr/local/bin
- name: Create Kind cluster
run: |
kind create cluster --config ci/kind/config.yml
kind get nodes | xargs ./hack/kind_linux.sh
- name: Deploy Antrea
# kubectl is installed on the Github Ubuntu 18.04 worker
run: |
kind load docker-image antrea/antrea-ubuntu:latest
./hack/generate-manifest.sh --kind | kubectl apply -f -
- name: Printing some debug information
run: |
sleep 30
kubectl get -A all
kubectl -n kube-system logs --all-containers -l app=antrea
- name: Run e2e tests
run: |
./hack/generate-manifest.sh --kind | docker exec -i kind-control-plane dd of=/root/antrea.yml
go test github.com/vmware-tanzu/antrea/test/e2e -provider=kind
8 changes: 8 additions & 0 deletions build/images/ethtool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM ubuntu:18.04

LABEL maintainer="Antrea <projectantrea-dev@googlegroups.com>"
LABEL description="A Docker image based on Ubuntu 18.04 which includes ethtool and ip tools."

RUN apt-get update && \
apt-get install -y --no-install-recommends ethtool iproute2 && \
rm -rf /var/cache/apt/* /var/lib/apt/lists/*
16 changes: 16 additions & 0 deletions build/images/ethtool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# images/ethtool

This Docker image is a very lightweight image based on Ubuntu 18.04 which
includes ethtool and the ip tools.

If you need to build a new version of the image and push it to Dockerhub, you
can run the following:

```bash
cd build/images/ethtool
docker build -t antrea/ethtool:latest .
docker push antrea/ethtool:latest
```

The `docker push` command will fail if you do not have permission to push to the
`antrea` Dockerhub repository.
96 changes: 96 additions & 0 deletions build/images/scripts/start_ovs_netdev
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env bash

source logging
source daemon_status

CONTAINER_NAME="antrea-ovs"
OVS_DB_FILE="/var/run/openvswitch/conf.db"

set -euo pipefail

hwaddr=$(ip link show eth0 | grep link/ether | awk '{print $2}')
inet=$(ip addr show eth0 | grep "inet " | awk '{ print $2 }')
gw=$(ip route | grep default | awk '{ print $3 }')

# Modify ovs-ctl so that the kernel module is no longer loaded since it is not
# needed when using OVS in userspace mode. It also enables running OVS with the
# netdev datapath type on platforms which do not have the OVS kernel module.
# This is easier than starting daemons manually...
function fix_ovs_ctl {
sed -i 's/\(\w*\)\(insert_mod_if_required || return 1\)/\1# \2/' /usr/share/openvswitch/scripts/ovs-ctl
}

# See http://docs.openvswitch.org/en/latest/howto/userspace-tunneling/
function add_br_phy {
log_info $CONTAINER_NAME "Creating OVS br-phy bridge for netdev datapath type"
ovs-vsctl --may-exist add-br br-phy \
-- set Bridge br-phy datapath_type=netdev \
-- br-set-external-id br-phy bridge-id br-phy \
-- set bridge br-phy fail-mode=standalone \
other_config:hwaddr="$hwaddr"

ovs-vsctl --timeout 10 add-port br-phy eth0
ip addr add "$inet" dev br-phy
ip link set br-phy up
ip addr flush dev eth0 2>/dev/null
ip link set eth0 up
ip route add default via "$gw" dev br-phy
}

function del_br_phy {
log_info $CONTAINER_NAME "Deleting OVS br-phy bridge"
ovs-vsctl del-port br-phy eth0
ovs-vsctl del-br br-phy
ip addr add "$inet" dev eth0
ip link set eth0 up
ip route add default via "$gw" dev eth0
}

function start_ovs {
log_info $CONTAINER_NAME "Starting OVS"
/usr/share/openvswitch/scripts/ovs-ctl --system-id=random start --db-file=$OVS_DB_FILE
}

function stop_ovs {
log_info $CONTAINER_NAME "Stopping OVS"
/usr/share/openvswitch/scripts/ovs-ctl stop
}

SLEEP_PID=

function quit {
log_info $CONTAINER_NAME "Stopping OVS before quit"
# delete the bridge and move IP address back to eth0 to restore connectivity
# when OVS is stopped.
del_br_phy
stop_ovs
# kill background sleep process
if [ "$SLEEP_PID" != "" ]; then kill $SLEEP_PID > /dev/null 2>&1 || true; fi
exit 0
}

# Do not trap EXIT as it would then ignore the "exit 0" statement in quit and
# exit with code 128 + SIGNAL
trap "quit" INT TERM

fix_ovs_ctl

start_ovs
add_br_phy

log_info $CONTAINER_NAME "Started the loop that checks OVS status every 30 seconds"
while true; do
# we run sleep in the background so that we can immediately exit when we
# receive SIGINT / SIGTERM
# see https://stackoverflow.com/questions/32041674/linux-how-to-kill-sleep
sleep 30 &
SLEEP_PID=$!
wait $SLEEP_PID

if ! check_ovs_status ; then
# OVS was stopped in the container.
log_warning $CONTAINER_NAME "OVS was stopped. Starting it again"

start_ovs
fi
done
10 changes: 10 additions & 0 deletions build/yamls/patches/kind/startOvs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: antrea-agent
spec:
template:
spec:
containers:
- name: antrea-ovs
command: ["start_ovs_netdev"]
8 changes: 8 additions & 0 deletions ci/kind/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
kind: Cluster
apiVersion: kind.sigs.k8s.io/v1alpha3
networking:
disableDefaultCNI: true
podSubnet: 10.10.0.0/16
nodes:
- role: control-plane
- role: worker
1 change: 1 addition & 0 deletions cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func run(o *Options) error {
o.config.CNISocket,
o.config.HostProcPathPrefix,
o.config.DefaultMTU,
o.config.OVSDatapathType,
nodeConfig,
ovsBridgeClient,
ofClient,
Expand Down
19 changes: 18 additions & 1 deletion docs/kind.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Deploying Antrea on a Kind cluster

At the moment Kind is only supported on Linux hosts. We are working on
supporting macOS hosts as well.

## Create a Kind cluster and deploy Antrea in a few seconds

### Create a Kind cluster
Expand Down Expand Up @@ -31,6 +34,8 @@ These instructions assume that you have built the `antrea/antrea-ubuntu` Docker
image locally (e.g. by running `make` from the root of the repository).

```bash
# "fix" the host's veth interfaces (for the different Kind Nodes)
kind get nodes | xargs ./hack/kind_linux.sh
# load the Antrea Docker image in the Nodes
kind load docker-image antrea/antrea-ubuntu:latest
# deploy Antrea
Expand All @@ -39,7 +44,7 @@ kind load docker-image antrea/antrea-ubuntu:latest

### Check that everything is working

After a few seconds you sould be able to observe the following when running
After a few seconds you should be able to observe the following when running
`kubectl get -n kube-system pods -l app=antrea`:
```bash
NAME READY STATUS RESTARTS AGE
Expand All @@ -65,3 +70,15 @@ requires some changes to the way Antrea is deployed. Most notably:
(`netdev`) OVS datapath type is used
* the Antrea agent's Init Container no longer needs to load the `openvswitch`
kernel module
* the `start_ovs` script used by the `antrea-ovs` container needs to be
replaced with the `start_ovs_netdev` script, which creates an additional
bridge (`br-phy`) as required for [OVS userspace
tunneling](http://docs.openvswitch.org/en/latest/howto/userspace-tunneling/)

### Why do I need to run the `hack/kind_linux.sh` script on my host?

The script is required for Antrea to work properly in a Kind cluster on
Linux. It takes care of disabling TX hardware checksum offload for the veth
interface (in the host's network namespace) of each Kind Node. This is required
when using OVS in userspace mode. Refer to this [Antrea Github issue
#14](https://github.com/vmware-tanzu/antrea/issues/14) for more information.
3 changes: 3 additions & 0 deletions hack/generate-manifest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ if $KIND; then
$KUSTOMIZE edit add patch tunDevice.yml
# edit antrea Agent configuration to use the netdev datapath
$KUSTOMIZE edit add patch ovsDatapath.yml
# antrea-ovs should use start_ovs_netdev instead of start_ovs to ensure that the br_phy bridge
# is created.
$KUSTOMIZE edit add patch startOvs.yml
# change initContainer script and remove SYS_MODULE capability
$KUSTOMIZE edit add patch installCni.yml
fi
Expand Down
30 changes: 30 additions & 0 deletions hack/kind_linux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash

# Copyright 2019 Antrea Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is required for Antrea to work properly in a Kind cluster on Linux. It takes care of
# disabling TX hardware checksum offload for the veth interface (in the host's network namespace) of
# each Kind Node. This is required when using OVS in userspace mode. Refer to
# https://github.com/vmware-tanzu/antrea/issues/14 for more information.

# The script uses the antrea/ethtool Docker image (so that ethtool does not need to be installed on
# the Linux host).

for node in "$@"; do
peerIdx=$(docker exec "$node" ip link | grep eth0 | awk -F[@:] '{ print $3 }' | cut -c 3-)
peerName=$(docker run --net=host antrea/ethtool:latest ip link | grep "$peerIdx": | awk -F[:@] '{ print $2 }' | cut -c 2-)
echo "Disabling TX checksum offload for node $node ($peerName)"
docker run --net=host --privileged antrea/ethtool:latest ethtool -K "$peerName" tx off
done
41 changes: 26 additions & 15 deletions pkg/agent/cniserver/pod_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/vmware-tanzu/antrea/pkg/agent/interfacestore"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow"
"github.com/vmware-tanzu/antrea/pkg/agent/util"
"github.com/vmware-tanzu/antrea/pkg/agent/util/ethtool"
"github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig"
)

Expand All @@ -57,6 +58,24 @@ const (
ovsExternalIDPodNamespace = "pod-namespace"
)

type podConfigurator struct {
ovsBridgeClient ovsconfig.OVSBridgeClient
ofClient openflow.Client
ifaceStore interfacestore.InterfaceStore
gatewayMAC net.HardwareAddr
ovsDatapathType string
}

func newPodConfigurator(
ovsBridgeClient ovsconfig.OVSBridgeClient,
ofClient openflow.Client,
ifaceStore interfacestore.InterfaceStore,
gatewayMAC net.HardwareAddr,
ovsDatapathType string,
) *podConfigurator {
return &podConfigurator{ovsBridgeClient, ofClient, ifaceStore, gatewayMAC, ovsDatapathType}
}

// setupInterfaces creates a veth pair: containerIface is in the container
// network namespace and hostIface is in the host network namespace.
func (pc *podConfigurator) setupInterfaces(
Expand All @@ -78,6 +97,13 @@ func (pc *podConfigurator) setupInterfaces(
containerIface.Sandbox = netns.Path()
hostIface.Name = hostVeth.Name
hostIface.Mac = hostVeth.HardwareAddr.String()
// OVS netdev datapath doesn't support TX checksum offloading, i.e. if packet
// arrives with bad/no checksum it will be sent to the output port with same bad/no checksum.
if pc.ovsDatapathType == ovsconfig.OVSDatapathNetdev {
if err := ethtool.EthtoolTXHWCsumOff(containerVeth.Name); err != nil {
return fmt.Errorf("error when disabling TX checksum offload on container veth: %v", err)
}
}
return nil
}); err != nil {
return nil, nil, err
Expand Down Expand Up @@ -264,21 +290,6 @@ func ParseOVSPortInterfaceConfig(portData *ovsconfig.OVSPortData, portConfig *in
PodNamespace: podNamespace}
}

type podConfigurator struct {
ovsBridgeClient ovsconfig.OVSBridgeClient
ofClient openflow.Client
ifaceStore interfacestore.InterfaceStore
gatewayMAC net.HardwareAddr
}

func newPodConfigurator(
ovsBridgeClient ovsconfig.OVSBridgeClient,
ofClient openflow.Client,
ifaceStore interfacestore.InterfaceStore,
gatewayMAC net.HardwareAddr) *podConfigurator {
return &podConfigurator{ovsBridgeClient, ofClient, ifaceStore, gatewayMAC}
}

func (pc *podConfigurator) configureInterface(
podName string,
podNameSpace string,
Expand Down
4 changes: 3 additions & 1 deletion pkg/agent/cniserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ func (s *CNIServer) CmdCheck(ctx context.Context, request *cnipb.CniCmdRequest)
func New(
cniSocket, hostProcPathPrefix string,
defaultMTU int,
ovsDatapathType string,
nodeConfig *types.NodeConfig,
ovsBridgeClient ovsconfig.OVSBridgeClient,
ofClient openflow.Client,
Expand All @@ -494,9 +495,10 @@ func New(
nodeConfig: nodeConfig,
hostProcPathPrefix: hostProcPathPrefix,
defaultMTU: defaultMTU,
ovsDapathType: ovsDatapathType,
kubeClient: kubeClient,
containerAccess: newContainerAccessArbitrator(),
podConfigurator: newPodConfigurator(ovsBridgeClient, ofClient, ifaceStore, nodeConfig.GatewayConfig.MAC),
podConfigurator: newPodConfigurator(ovsBridgeClient, ofClient, ifaceStore, nodeConfig.GatewayConfig.MAC, ovsDatapathType),
}
}

Expand Down
Loading

0 comments on commit c7c497d

Please sign in to comment.