Skip to content

Commit

Permalink
VXLAN Tunnel Endpoint (VTEP) Integration
Browse files Browse the repository at this point in the history
In enterprise on-prem or hybrid cloud datacenter, traditional
load balancer devices like BIG-IP are used for north-south
load balancing to Kubernetes cluster POD either through routing
mode or VXLAN tunnel mode. These VXLAN devices usually has VNI
key based implementation https://datatracker.ietf.org/doc/html/rfc7348.
Cilium VXLAN implementation does not use VNI key and VTEP
IP/MAC mapping to direct tunnel traffic, This feature enables
Cilium VXLAN Tunnel Endpoint (VTEP) Integration. Add Cilium agent
option EnableVTEP to enable this feature and it is disabled by
default. This feature support Cilium tunnel and route mode

1 In VXLAN tunnel mode, the egress packets from Cilium-managed pod
before encapsulation use host namespace side MAC address as
the destination MAC address, when egress packet arrive at
VTEP device, the MAC address does not match the VTEP
MAC address of the VTEP device and the packet is
dropped. Thus we need to rewrite the inner packet destination
MAC address to remote VTEP MAC address.

2 Cilium VXLAN use pod identity as the VXLAN tunnel key that
does not match pre-configured VTEP device VNI key

This patch addresses above two points.
In cilium#17106 discussion, We decided to pre-populate IPCache map with
VTEP devices CIDR, VNI, MAC, IP. When packets egress to VTEP devices,
use the pre-populated VTEP device entry in IPCache map to
encapsulate the packet.

One issue observed when using eth_store_daddr() from
bpf/lib/eth.h to re-write the inner packet destination MAC
address,it failed to pass BPF verifier check with
“R1 invalid mem access ‘inv’”, see full detail in issue
kernel, both issues are resolved by initializing vtep_mac to 0

Example to enable this feature in configmap cilium-config:

enable-vtep: "true"
vtep-endpoint: "10.169.72.14    10.169.72.15"
vtep-cidr: "10.1.99.0/24        10.1.88.0/24"
vtep-mac: "52:54:00:3e:3f:c1   52:54:00:4e:01:a6"

VTEP devices must use cilium reserved world id "2" as VNI

Suggested-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Vincent Li <v.li@f5.com>
  • Loading branch information
vincentmli committed Oct 25, 2021
1 parent a93ae9f commit a6739a8
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 7 deletions.
10 changes: 10 additions & 0 deletions bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ MAX_OVERLAY_OPTIONS = $(MAX_BASE_OPTIONS) -DENCAP_IFINDEX=1 -DTUNNEL_MODE=1
ifneq ("$(KERNEL)","49")
MAX_OVERLAY_OPTIONS += -DLB_SELECTION=1 -DLB_SELECTION_MAGLEV=1
endif
ifeq ("$(KERNEL)","54")
MAX_OVERLAY_OPTIONS+= -DENABLE_VTEP=1
else ifeq ("$(KERNEL)","netnext")
MAX_OVERLAY_OPTIONS+= -DENABLE_VTEP=1
endif
endif

bpf_overlay.ll: bpf_overlay.c $(LIB)
Expand Down Expand Up @@ -261,6 +266,11 @@ MAX_LXC_OPTIONS = $(MAX_BASE_OPTIONS) -DENCAP_IFINDEX=1 -DTUNNEL_MODE=1
ifneq ("$(KERNEL)","49")
MAX_LXC_OPTIONS += -DENABLE_EGRESS_GATEWAY=1
endif
ifeq ("$(KERNEL)","54")
MAX_LXC_OPTIONS+= -DENABLE_VTEP=1
else ifeq ("$(KERNEL)","netnext")
MAX_LXC_OPTIONS+= -DENABLE_VTEP=1
endif
endif

bpf_lxc.ll: bpf_lxc.c $(LIB)
Expand Down
1 change: 1 addition & 0 deletions bpf/bpf_alignchecker.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/* Copyright (C) 2018-2020 Authors of Cilium */

/* Ensure declaration of notification event types */
#define ENABLE_VTEP
#define DEBUG
#define TRACE_NOTIFY
#define DROP_NOTIFY
Expand Down
15 changes: 15 additions & 0 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
struct ct_state ct_state = {};
__be32 orig_dip;
__u32 tunnel_endpoint = 0;
mac_t vtep_mac __maybe_unused = 0;
__u8 encrypt_key = 0;
__u32 monitor = 0;
__u8 reason;
Expand Down Expand Up @@ -622,6 +623,9 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
*dst_id = info->sec_label;
tunnel_endpoint = info->tunnel_endpoint;
encrypt_key = get_min_encrypt_key(info->key);
#ifdef ENABLE_VTEP
vtep_mac = info->vtep_mac;
#endif
#ifdef ENABLE_WIREGUARD
/* If we detect that the dst is a remote endpoint, we
* need to mark the packet. The ip rule which matches
Expand Down Expand Up @@ -816,6 +820,17 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
skip_egress_gateway:
#endif

#ifdef ENABLE_VTEP
{
if (vtep_mac && tunnel_endpoint) {
if (eth_store_daddr(ctx, (__u8 *)&vtep_mac, 0) < 0)
return DROP_WRITE_ERROR;
return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint,
WORLD_ID, monitor);
}
}
#endif

#ifdef TUNNEL_MODE
# ifdef ENABLE_WIREGUARD
/* In the tunnel mode we encapsulate pod2pod traffic only via Wireguard
Expand Down
11 changes: 11 additions & 0 deletions bpf/bpf_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx, __u32 *identity)

if (*identity == HOST_ID)
return DROP_INVALID_IDENTITY;
#ifdef ENABLE_VTEP
{
struct remote_endpoint_info *info;

info = lookup_ip4_remote_endpoint(ip4->saddr);
if (info && info->vtep_mac) {
if (*identity != WORLD_ID)
return DROP_INVALID_VNI;
}
}
#endif
}

cilium_dbg(ctx, DBG_DECAP, key.tunnel_id, key.tunnel_label);
Expand Down
4 changes: 4 additions & 0 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ struct remote_endpoint_info {
__u32 sec_label;
__u32 tunnel_endpoint;
__u8 key;
#ifdef ENABLE_VTEP
mac_t vtep_mac;
#endif
};

struct policy_key {
Expand Down Expand Up @@ -431,6 +434,7 @@ enum {
#define DROP_PROXY_UNKNOWN_PROTO -180
#define DROP_POLICY_DENY -181
#define DROP_VLAN_FILTERED -182
#define DROP_INVALID_VNI -183

#define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED

Expand Down
26 changes: 26 additions & 0 deletions daemon/cmd/datapath.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,13 @@ func (d *Daemon) syncEndpointsAndHostIPs() error {
}
}

if option.Config.EnableVTEP {
err := setupIPCacheVTEPMapping()
if err != nil {
return err
}
}

return nil
}

Expand Down Expand Up @@ -458,6 +465,25 @@ func setupIPSec() (int, uint8, error) {
return authKeySize, spi, nil
}

func setupIPCacheVTEPMapping() error {
encryptKey := uint8(0) // no encrypt support
vtepID := uint32(identity.ReservedIdentityWorld) //network policy identity for VTEP

for i, ep := range option.Config.VtepEndpoints {
log.WithFields(logrus.Fields{
logfields.IPAddr: ep,
}).Debug("Updating ipcache map entry for VTEP")

err := ipcachemap.UpdateIPCacheMapping(option.Config.VtepCIDRs[i], ep, vtepID, option.Config.VtepMACs[i], encryptKey)
if err != nil {
return fmt.Errorf("Unable to set up VTEP ipcache mappings: %w", err)
}

}
return nil

}

// Datapath returns a reference to the datapath implementation.
func (d *Daemon) Datapath() datapath.Datapath {
return d.datapath
Expand Down
4 changes: 4 additions & 0 deletions pkg/datapath/linux/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,10 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC
cDefinesMap["ENABLE_CUSTOM_CALLS"] = "1"
}

if option.Config.EnableVTEP {
cDefinesMap["ENABLE_VTEP"] = "1"
}

vlanFilter, err := vlanFilterMacros()
if err != nil {
return err
Expand Down
39 changes: 32 additions & 7 deletions pkg/maps/ipcache/ipcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ import (
"unsafe"

"github.com/cilium/cilium/pkg/bpf"
"github.com/cilium/cilium/pkg/cidr"
"github.com/cilium/cilium/pkg/logging"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/mac"
"github.com/cilium/cilium/pkg/types"

"golang.org/x/sys/unix"
Expand Down Expand Up @@ -115,18 +117,17 @@ func NewKey(ip net.IP, mask net.IPMask) Key {
return result
}

// RemoteEndpointInfo implements the bpf.MapValue interface. It contains the
// security identity of a remote endpoint.
// +k8s:deepcopy-gen=true
// +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue
type RemoteEndpointInfo struct {
SecurityIdentity uint32 `align:"sec_label"`
TunnelEndpoint types.IPv4 `align:"tunnel_endpoint"`
Key uint8 `align:"key"`
SecurityIdentity uint32 `align:"sec_label"`
TunnelEndpoint types.IPv4 `align:"tunnel_endpoint"`
Key uint8 `align:"key"`
VtepMAC mac.Uint64MAC `align:"vtep_mac"`
}

func (v *RemoteEndpointInfo) String() string {
return fmt.Sprintf("%d %d %s", v.SecurityIdentity, v.Key, v.TunnelEndpoint)
return fmt.Sprintf("identity=%d encryptkey=%d tunnelendpoint=%s vtepmac=%s",
v.SecurityIdentity, v.Key, v.TunnelEndpoint, v.VtepMAC)
}

// GetValuePtr returns the unsafe pointer to the BPF value.
Expand Down Expand Up @@ -248,3 +249,27 @@ var (
func Reopen() error {
return IPCache.Map.Reopen()
}

// Function to update IPCache map with node PodCIDR, VTEP CIDR
func UpdateIPCacheMapping(newCIDR *cidr.CIDR, newTunnelEndpoint net.IP,
securityIdentity uint32, vtepMAC mac.MAC, encryptKey uint8) error {

key := NewKey(newCIDR.IP, newCIDR.Mask)

mac, err := vtepMAC.Uint64()
if err != nil {
return err
}

value := RemoteEndpointInfo{
SecurityIdentity: securityIdentity,
VtepMAC: mac,
Key: encryptKey,
}
if ip4 := newTunnelEndpoint.To4(); ip4 != nil {
copy(value.TunnelEndpoint[:], ip4)
}

return IPCache.Update(&key, &value)

}
1 change: 1 addition & 0 deletions pkg/monitor/api/drop.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ var errors = map[uint8]string{
180: "Proxy redirection not supported for protocol",
181: "Policy denied by denylist",
182: "VLAN traffic disallowed by VLAN filter",
183: "Incorrect VNI from VTEP",
}

// DropReason prints the drop reason in a human readable string
Expand Down

0 comments on commit a6739a8

Please sign in to comment.