Skip to content

Commit

Permalink
backend/vxlan: Add support for "direct routing"
Browse files Browse the repository at this point in the history
This skips vxlan encapsulation if the hosts are on the same subnet
  • Loading branch information
tomdee committed Aug 14, 2017
1 parent ea2cb64 commit 74b4346
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 50 deletions.
1 change: 1 addition & 0 deletions Documentation/backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Type and options:
* `VNI` (number): VXLAN Identifier (VNI) to be used. Defaults to 1.
* `Port` (number): UDP port to use for sending encapsulated packets. Defaults to kernel default, currently 8472.
* `GBP` (Boolean): Enable [VXLAN Group Based Policy](https://github.com/torvalds/linux/commit/3511494ce2f3d3b77544c79b87511a4ddb61dc89). Defaults to `false`.
* `DirectRouting` (Boolean): Enable direct routes (like `host-gw`) when the hosts are on the same subnet. VXLAN will only be used to encapsulate packets to hosts on different subnets. Defaults to `false`.

### host-gw

Expand Down
3 changes: 2 additions & 1 deletion backend/vxlan/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ type vxlanDeviceAttrs struct {
}

type vxlanDevice struct {
link *netlink.Vxlan
link *netlink.Vxlan
directRouting bool
}

func newVXLANDevice(devAttrs *vxlanDeviceAttrs) (*vxlanDevice, error) {
Expand Down
14 changes: 11 additions & 3 deletions backend/vxlan/vxlan.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,17 @@ package vxlan
// 3) Create an FDB entry with the VTEP MAC and the public IP of the remote flannel daemon.
//
// In this scheme the scaling of table entries is linear to the number of remote hosts - 1 route, 1 arp entry and 1 FDB entry per host
//
// In this newest scheme, there is also the option of skipping the use of vxlan for hosts that are on the same subnet,
// this is called "directRouting"

import (
"encoding/json"
"fmt"
"net"

log "github.com/golang/glog"

"golang.org/x/net/context"

"github.com/coreos/flannel/backend"
Expand Down Expand Up @@ -98,9 +103,10 @@ func newSubnetAttrs(publicIP net.IP, mac net.HardwareAddr) (*subnet.LeaseAttrs,
func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Config) (backend.Network, error) {
// Parse our configuration
cfg := struct {
VNI int
Port int
GBP bool
VNI int
Port int
GBP bool
DirectRouting bool
}{
VNI: defaultVNI,
}
Expand All @@ -110,6 +116,7 @@ func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Conf
return nil, fmt.Errorf("error decoding VXLAN backend config: %v", err)
}
}
log.Infof("VXLAN config: VNI=%d Port=%d GBP=%v DirectRouting=%v", cfg.VNI, cfg.Port, cfg.GBP, cfg.DirectRouting)

devAttrs := vxlanDeviceAttrs{
vni: uint32(cfg.VNI),
Expand All @@ -124,6 +131,7 @@ func (be *VXLANBackend) RegisterNetwork(ctx context.Context, config *subnet.Conf
if err != nil {
return nil, err
}
dev.directRouting = cfg.DirectRouting

subnetAttrs, err := newSubnetAttrs(be.extIface.ExtAddr, dev.MACAddr())
if err != nil {
Expand Down
122 changes: 78 additions & 44 deletions backend/vxlan/vxlan_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,78 +85,112 @@ type vxlanLeaseAttrs struct {

func (nw *network) handleSubnetEvents(batch []subnet.Event) {
for _, event := range batch {
if event.Lease.Attrs.BackendType != "vxlan" {
log.Warningf("ignoring non-vxlan subnet(%s): type=%v", event.Lease.Subnet, event.Lease.Attrs.BackendType)
sn := event.Lease.Subnet
attrs := event.Lease.Attrs
if attrs.BackendType != "vxlan" {
log.Warningf("ignoring non-vxlan subnet(%s): type=%v", sn, attrs.BackendType)
continue
}

var attrs vxlanLeaseAttrs
if err := json.Unmarshal(event.Lease.Attrs.BackendData, &attrs); err != nil {
var vxlanAttrs vxlanLeaseAttrs
if err := json.Unmarshal(attrs.BackendData, &vxlanAttrs); err != nil {
log.Error("error decoding subnet lease JSON: ", err)
continue
}

route := netlink.Route{
// This route is used when traffic should be vxlan encapsulated
vxlanRoute := netlink.Route{
LinkIndex: nw.dev.link.Attrs().Index,
Scope: netlink.SCOPE_UNIVERSE,
Dst: event.Lease.Subnet.ToIPNet(),
Gw: event.Lease.Subnet.IP.ToIP(),
Dst: sn.ToIPNet(),
Gw: sn.IP.ToIP(),
}
route.SetFlag(syscall.RTNH_F_ONLINK)
vxlanRoute.SetFlag(syscall.RTNH_F_ONLINK)

switch event.Type {
case subnet.EventAdded:
log.V(2).Infof("adding subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC))

if err := nw.dev.AddARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("AddARP failed: ", err)
// directRouting is where the remote host is on the same subnet so vxlan isn't required.
directRoute := netlink.Route{
Dst: sn.ToIPNet(),
Gw: attrs.PublicIP.ToIP(),
}
var directRoutingOK = false
if nw.dev.directRouting {
routes, err := netlink.RouteGet(attrs.PublicIP.ToIP())
if err != nil {
log.Errorf("Couldn't lookup route to %v: %v", attrs.PublicIP, err)
continue
}
if len(routes) == 1 && routes[0].Gw == nil {
// There is only a single route and there's no gateway (i.e. it's directly connected)
directRoutingOK = true
}
}

if err := nw.dev.AddFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("AddFDB failed: ", err)
switch event.Type {
case subnet.EventAdded:
if directRoutingOK {
log.V(2).Infof("Adding direct route to subnet: %s PublicIP: %s", sn, attrs.PublicIP)

// Try to clean up the ARP entry then continue
if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
if err := netlink.RouteReplace(&directRoute); err != nil {
log.Errorf("Error adding route to %v via %v: %v", sn, attrs.PublicIP, err)
continue
}
} else {
log.V(2).Infof("adding subnet: %s PublicIP: %s VtepMAC: %s", sn, attrs.PublicIP, net.HardwareAddr(vxlanAttrs.VtepMAC))
if err := nw.dev.AddARP(neighbor{IP: sn.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("AddARP failed: ", err)
continue
}

continue
}
if err := nw.dev.AddFDB(neighbor{IP: attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("AddFDB failed: ", err)

// Set the route - the kernel would ARP for the Gw IP address if it hadn't already been set above so make sure
// this is done last.
if err := netlink.RouteReplace(&route); err != nil {
log.Errorf("failed to add route (%s -> %s): %v", route.Dst, route.Gw, err)
// Try to clean up the ARP entry then continue
if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
}

// Try to clean up both the ARP and FDB entries then continue
if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
continue
}

if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("DelFDB failed: ", err)
}
// Set the route - the kernel would ARP for the Gw IP address if it hadn't already been set above so make sure
// this is done last.
if err := netlink.RouteReplace(&vxlanRoute); err != nil {
log.Errorf("failed to add vxlanRoute (%s -> %s): %v", vxlanRoute.Dst, vxlanRoute.Gw, err)

continue
}
// Try to clean up both the ARP and FDB entries then continue
if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
}

case subnet.EventRemoved:
log.V(2).Infof("removing subnet: %s PublicIP: %s VtepMAC: %s", event.Lease.Subnet, event.Lease.Attrs.PublicIP, net.HardwareAddr(attrs.VtepMAC))
if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("DelFDB failed: ", err)
}

// Try to remove all entries - don't bail out if one of them fails.
if err := nw.dev.DelARP(neighbor{IP: event.Lease.Subnet.IP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
continue
}
}
case subnet.EventRemoved:
if directRoutingOK {
log.V(2).Infof("Removing direct route to subnet: %s PublicIP: %s", sn, attrs.PublicIP)
if err := netlink.RouteDel(&directRoute); err != nil {
log.Errorf("Error deleting route to %v via %v: %v", sn, attrs.PublicIP, err)
}
} else {
log.V(2).Infof("removing subnet: %s PublicIP: %s VtepMAC: %s", sn, attrs.PublicIP, net.HardwareAddr(vxlanAttrs.VtepMAC))

if err := nw.dev.DelFDB(neighbor{IP: event.Lease.Attrs.PublicIP, MAC: net.HardwareAddr(attrs.VtepMAC)}); err != nil {
log.Error("DelFDB failed: ", err)
}
// Try to remove all entries - don't bail out if one of them fails.
if err := nw.dev.DelARP(neighbor{IP: sn.IP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("DelARP failed: ", err)
}

if err := netlink.RouteDel(&route); err != nil {
log.Errorf("failed to delete route (%s -> %s): %v", route.Dst, route.Gw, err)
}
if err := nw.dev.DelFDB(neighbor{IP: attrs.PublicIP, MAC: net.HardwareAddr(vxlanAttrs.VtepMAC)}); err != nil {
log.Error("DelFDB failed: ", err)
}

if err := netlink.RouteDel(&vxlanRoute); err != nil {
log.Errorf("failed to delete vxlanRoute (%s -> %s): %v", vxlanRoute.Dst, vxlanRoute.Gw, err)
}
}
default:
log.Error("internal error: unknown event type: ", int(event.Type))
}
Expand Down
4 changes: 2 additions & 2 deletions dist/functional-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ multi_test() {
docker run --name=flannel-host$host -d -it --privileged --entrypoint /bin/sh $flannel_img

# Start two flanneld instances
docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -subnet-file /vxlan.env -etcd-prefix=/vxlan/network --etcd-endpoints=$etcd_endpt 2>vxlan.log"
docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -subnet-file /hostgw.env -etcd-prefix=/hostgw/network --etcd-endpoints=$etcd_endpt 2>hostgw.log"
docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -v 10 -subnet-file /vxlan.env -etcd-prefix=/vxlan/network --etcd-endpoints=$etcd_endpt 2>vxlan.log"
docker exec -d flannel-host$host sh -c "/opt/bin/flanneld -v 10 -subnet-file /hostgw.env -etcd-prefix=/hostgw/network --etcd-endpoints=$etcd_endpt 2>hostgw.log"
done

echo flannels running
Expand Down

0 comments on commit 74b4346

Please sign in to comment.