Skip to content

Commit

Permalink
bpf: Encap with cilium_{vxlan,geneve} before passing to WG
Browse files Browse the repository at this point in the history
So that a src security ID can be transferred to a remote node (e.g., for
netpol checks).

This commit changes a pkt path when WireGuard + tunneling are enabled
AND the newly introduced --wireguard-encapsulate is set.

Previously, we had the following:

  ┌──────┐     1.       ┌──────┐   4.
  │ lxc0 ├──────────────► eth0 ├──────►
  └──────┘              └─┬───▲┘
                          │   │
                          │   │
                        2.│   │ 3.
                          │   │
┌───────────────┐     ┌───▼───┴────┐
│ cilium_vxlan  │     │cilium_wg0  │
└───────────────┘     └────────────┘

With this change:

  ┌──────┐              ┌──────┐
  │ lxc0 │   ┌──────────► eth0 ├─────►
  └───┬──┘   │          └─┬───▲┘  5.
      │      │            │   │
      │      │            │   │
    1.│    2.│         3. │   │ 4.
      │      │            │   │
┌─────▼──────┴──┐     ┌───▼───┴────┐
│ cilium_vxlan  │     │cilium_wg0  │
└───────────────┘     └────────────┘

A side effect of this change is that host-to-remote-pod traffic is going
to be encrypted (previously it was not).

The change was first made available in v1.14 [1] (controlled w/
--wireguard-encapsulate, which defaults to false). To avoid breaking
connections during an upgrade from v1.14 to v1.15 (due to missing node
IPs within allowed-ips), in v1.14 we populate those IPs regardless
whether the feature is enabled.

[1]: #28917

Signed-off-by: Martynas Pumputis <m@lambda.lt>
  • Loading branch information
brb committed Nov 6, 2023
1 parent df969b7 commit 96eb25c
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 22 deletions.
15 changes: 0 additions & 15 deletions bpf/lib/encap.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,6 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_un
int ifindex;
int ret = 0;

#if defined(ENABLE_WIREGUARD) && __ctx_is == __ctx_skb
/* Redirect the packet to the WireGuard tunnel device for encryption
* if needed.
*
* A packet which previously was a subject to VXLAN/Geneve
* encapsulation (e.g., pod2pod) is going to be encapsulated only once,
* i.e., by the WireGuard tunnel netdev. This is so just to be
* compatible with < the v1.13 behavior in which the pod2pod bypassed
* VXLAN/Geneve encapsulation when the WG feature was on.
*/
ret = wg_maybe_redirect_to_encrypt(ctx);
if (IS_ERR(ret) || ret == CTX_ACT_REDIRECT)
return ret;
#endif /* defined(ENABLE_WIREGUARD) && __ctx_is == __ctx_skb */

ret = __encap_with_nodeid(ctx, src_ip, 0, tunnel_endpoint, seclabel, dstid,
vni, trace->reason, trace->monitor,
&ifindex);
Expand Down
36 changes: 35 additions & 1 deletion bpf/lib/wireguard.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
__u16 proto = 0;
struct ipv6hdr __maybe_unused *ip6;
struct iphdr __maybe_unused *ip4;
__u8 __maybe_unused icmp_type = 0;
bool from_tunnel __maybe_unused = false;

if (!validate_ethertype(ctx, &proto))
return DROP_UNSUPPORTED_L2;
Expand Down Expand Up @@ -60,6 +62,31 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
case bpf_htons(ETH_P_IP):
if (!revalidate_data(ctx, &data, &data_end, &ip4))
return DROP_INVALID;
# if defined(TUNNEL_MODE)
/* A rudimentary check (inspired by is_enap()) whether a pkt
* is coming from tunnel device. In tunneling mode WG needs to
* encrypt such pkts, so that src sec ID can be transferred.
*
* This also handles IPv6, as IPv6 pkts are encapsulated w/
* IPv4 tunneling.
*/
if (ip4->protocol == IPPROTO_UDP) {
int l4_off = ETH_HLEN + ipv4_hdrlen(ip4);
__be16 dport;

if (l4_load_port(ctx, l4_off + UDP_DPORT_OFF, &dport) < 0) {
/* IP fragmentation is not expected after the
* encap. So this is non-Cilium's pkt.
*/
break;
}

if (dport == bpf_htons(TUNNEL_PORT)) {
from_tunnel = true;
break;
}
}
# endif /* TUNNEL_MODE */
dst = lookup_ip4_remote_endpoint(ip4->daddr, 0);
src = lookup_ip4_remote_endpoint(ip4->saddr, 0);
break;
Expand All @@ -81,6 +108,11 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
if ((ctx->mark & MARK_MAGIC_WG_ENCRYPTED) == MARK_MAGIC_WG_ENCRYPTED)
goto out;

#if defined(TUNNEL_MODE)
if (from_tunnel)
goto encrypt;
#endif /* TUNNEL_MODE */

/* Unless node encryption is enabled, we don't want to encrypt
* traffic from the hostns.
*
Expand All @@ -105,8 +137,10 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
/* Redirect to the WireGuard tunnel device if the encryption is
* required.
*/
if (dst && dst->key)
if (dst && dst->key) {
encrypt: __maybe_unused
return ctx_redirect(ctx, WG_IFINDEX, 0);
}

out:
return CTX_ACT_OK;
Expand Down
15 changes: 10 additions & 5 deletions pkg/wireguard/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ type Agent struct {

cleanup []func()

nodeToNodeEncryption bool
optOut bool
requireNodesInPeerList bool
}

// NewAgent creates a new WireGuard Agent
Expand Down Expand Up @@ -111,7 +112,11 @@ func NewAgent(privKeyPath string, localNodeStore *node.LocalNodeStore) (*Agent,

cleanup: []func(){},

nodeToNodeEncryption: option.Config.EncryptNode && !optOut,
optOut: optOut,
requireNodesInPeerList: (option.Config.EncryptNode && !optOut) ||
// Enapsulated pkt is encrypted in tunneling mode. So, outer
// src/dst IP (= nodes IP) needs to be in the WG peer list.
option.Config.TunnelingEnabled(),
}, nil
}

Expand Down Expand Up @@ -359,7 +364,7 @@ func (a *Agent) UpdatePeer(nodeName, pubKeyHex string, nodeIPv4, nodeIPv6 net.IP
var lookupIPv4, lookupIPv6 net.IP
if option.Config.EnableIPv4 && nodeIPv4 != nil {
lookupIPv4 = nodeIPv4
if a.nodeToNodeEncryption {
if a.requireNodesInPeerList {
allowedIPs = append(allowedIPs, net.IPNet{
IP: nodeIPv4,
Mask: net.CIDRMask(net.IPv4len*8, net.IPv4len*8),
Expand All @@ -368,7 +373,7 @@ func (a *Agent) UpdatePeer(nodeName, pubKeyHex string, nodeIPv4, nodeIPv6 net.IP
}
if option.Config.EnableIPv6 && nodeIPv6 != nil {
lookupIPv6 = nodeIPv6
if a.nodeToNodeEncryption {
if a.requireNodesInPeerList {
allowedIPs = append(allowedIPs, net.IPNet{
IP: nodeIPv6,
Mask: net.CIDRMask(net.IPv6len*8, net.IPv6len*8),
Expand Down Expand Up @@ -616,7 +621,7 @@ func (a *Agent) Status(withPeers bool) (*models.WireguardStatus, error) {

var nodeEncryptionStatus = "Disabled"
if option.Config.EncryptNode {
if !a.nodeToNodeEncryption {
if a.optOut {
nodeEncryptionStatus = "OptedOut"
} else {
nodeEncryptionStatus = "Enabled"
Expand Down
2 changes: 1 addition & 1 deletion pkg/wireguard/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func (a *AgentSuite) TestAgent_PeerConfig_WithEncryptNode(c *C) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
wgAgent, ipCache := newTestAgent(ctx)
wgAgent.nodeToNodeEncryption = true
wgAgent.requireNodesInPeerList = true
defer ipCache.Shutdown()

ipCache.Upsert(pod1IPv4Str, k8s1NodeIPv4, 0, nil, ipcache.Identity{ID: 1, Source: source.Kubernetes})
Expand Down

0 comments on commit 96eb25c

Please sign in to comment.