From b6989723a7cc0ebc07345d78d28b63bf3825f7ba Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 12 Mar 2019 10:18:40 -0700 Subject: [PATCH] cilium: ipsec, support rolling updates Currently, rolling updates may get stuck due to a time period between when some set of nodes have started with encryption enabled and another set exist without encryption enabled. At this point these two sets of nodes can only communicate from non-encrypted to encrypted set. The set with encryption enabled will encrypt traffic that will in turn be dropped by the set that has yet to enable encryption. To resolve we make encryption a property of the endpoint id. Keeping the key identifier with the endpoint to inform cilium which key should be used during an upgrade. Because we use the mark space to identify keys we limit the key space to two keys currently. After this key secrets will need to be updated to include an id field as follows, keys: "1 hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef" Where '1' is the id here. IDs are enforced to be less than 16. This is a bit arbitrary we could go as high as 256 without hitting mark bit limits. However, 16 feels sufficient and we cant take bits back later so start low and bump up if needed. The id '0' is a special id and should not be used it represents the absence of keys in the datapath. Signed-off-by: John Fastabend --- bpf/bpf_ipsec.c | 4 +- bpf/bpf_lxc.c | 7 +- bpf/bpf_netdev.c | 2 + bpf/bpf_overlay.c | 3 + bpf/lib/common.h | 42 +++++-- bpf/lib/encap.h | 46 ++++--- daemon/bpf.sha | 2 +- daemon/daemon.go | 7 +- daemon/k8s_watcher.go | 7 +- pkg/bpf/endpoint.go | 6 +- pkg/datapath/ipcache/listener.go | 3 +- pkg/datapath/linux/ipsec/ipsec_linux.go | 118 +++++++++++------- pkg/datapath/linux/ipsec/ipsec_linux_test.go | 25 ++-- .../linux/linux_defaults/linux_defaults.go | 8 +- pkg/datapath/linux/node.go | 39 +++--- pkg/endpoint/policy.go | 3 +- pkg/envoy/resources.go | 4 +- pkg/identity/identity.go | 1 + pkg/ipcache/cidr.go | 2 +- pkg/ipcache/ipcache.go | 38 ++++-- pkg/ipcache/ipcache_test.go | 12 +- pkg/ipcache/kvstore.go | 6 +- pkg/ipcache/listener.go | 2 +- pkg/logging/logfields/logfields.go | 3 + pkg/maps/ipcache/ipcache.go | 3 +- pkg/maps/tunnel/logfields.go | 1 + pkg/maps/tunnel/tunnel.go | 5 +- pkg/node/node_address.go | 13 ++ 28 files changed, 261 insertions(+), 151 deletions(-) diff --git a/bpf/bpf_ipsec.c b/bpf/bpf_ipsec.c index 1c00ea81b636..fa45a5cfe7d1 100644 --- a/bpf/bpf_ipsec.c +++ b/bpf/bpf_ipsec.c @@ -27,8 +27,8 @@ __section("from-netdev") int from_netdev(struct __sk_buff *skb) { - if (skb->cb[0] == MARK_MAGIC_ENCRYPT) { - skb->mark = MARK_MAGIC_ENCRYPT; + if ((skb->cb[0] & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_ENCRYPT) { + skb->mark = skb->cb[0]; set_identity(skb, skb->cb[1]); } return TC_ACT_OK; diff --git a/bpf/bpf_lxc.c b/bpf/bpf_lxc.c index 0d9eba7642b3..a05eecba3524 100644 --- a/bpf/bpf_lxc.c +++ b/bpf/bpf_lxc.c @@ -146,6 +146,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, void *data, *data_end; union v6addr *daddr, orig_dip; __u32 tunnel_endpoint = 0; + __u8 encrypt_key = 0; __u32 monitor = 0; if (unlikely(!is_valid_lxc_src_ip(ip6))) @@ -221,6 +222,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, if (info != NULL && info->sec_label) { *dstID = info->sec_label; tunnel_endpoint = info->tunnel_endpoint; + encrypt_key = info->key; } else { *dstID = WORLD_ID; } @@ -331,7 +333,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, /* The packet goes to a peer not managed by this agent instance */ #ifdef ENCAP_IFINDEX if (tunnel_endpoint) { - ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, SECLABEL, monitor); + ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, encrypt_key, SECLABEL, monitor); /* If not redirected noteable due to IPSEC then pass up to stack * for further processing. */ @@ -475,6 +477,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) struct ct_state ct_state = {}; __be32 orig_dip; __u32 tunnel_endpoint = 0; + __u8 encrypt_key = 0; __u32 monitor = 0; if (!revalidate_data(skb, &data, &data_end, &ip4)) @@ -537,6 +540,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) if (info != NULL && info->sec_label) { *dstID = info->sec_label; tunnel_endpoint = info->tunnel_endpoint; + encrypt_key = info->key; } else { *dstID = WORLD_ID; } @@ -649,6 +653,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) #ifdef ENCAP_IFINDEX if (tunnel_endpoint) { int ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, + encrypt_key, SECLABEL, monitor); /* If not redirected noteably due to IPSEC then pass up to stack * for further processing. diff --git a/bpf/bpf_netdev.c b/bpf/bpf_netdev.c index 93789e9ead3e..99f91a9e244a 100644 --- a/bpf/bpf_netdev.c +++ b/bpf/bpf_netdev.c @@ -242,6 +242,7 @@ static inline int handle_ipv6(struct __sk_buff *skb, __u32 src_identity) info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN); if (info != NULL && info->tunnel_endpoint != 0) { int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, + info->key, secctx, TRACE_PAYLOAD_LEN); /* If IPSEC is needed recirc through ingress to use xfrm stack @@ -443,6 +444,7 @@ static inline int handle_ipv4(struct __sk_buff *skb, __u32 src_identity) info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN); if (info != NULL && info->tunnel_endpoint != 0) { int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, + info->key, secctx, TRACE_PAYLOAD_LEN); if (ret == IPSEC_ENDPOINT) diff --git a/bpf/bpf_overlay.c b/bpf/bpf_overlay.c index 6b6887c50bbd..9673bef06118 100644 --- a/bpf/bpf_overlay.c +++ b/bpf/bpf_overlay.c @@ -64,6 +64,8 @@ static inline int handle_ipv6(struct __sk_buff *skb) */ if (ip6->nexthdr != IPPROTO_ESP) goto not_esp; + + /* Decrypt "key" is determined by SPI */ skb->mark = MARK_MAGIC_DECRYPT; set_identity(skb, key.tunnel_id); /* To IPSec stack on cilium_vxlan we are going to pass @@ -148,6 +150,7 @@ static inline int handle_ipv4(struct __sk_buff *skb) */ if (ip4->protocol != IPPROTO_ESP) goto not_esp; + /* Decrypt "key" is determined by SPI */ skb->mark = MARK_MAGIC_DECRYPT; set_identity(skb, key.tunnel_id); /* To IPSec stack on cilium_vxlan we are going to pass diff --git a/bpf/lib/common.h b/bpf/lib/common.h index 404dc74ad695..1080ef14d3df 100644 --- a/bpf/lib/common.h +++ b/bpf/lib/common.h @@ -162,7 +162,7 @@ struct endpoint_key { union v6addr ip6; }; __u8 family; - __u8 pad4; + __u8 key; __u16 pad5; } __attribute__((packed)); @@ -182,6 +182,7 @@ struct endpoint_info { struct remote_endpoint_info { __u32 sec_label; __u32 tunnel_endpoint; + __u8 key; }; struct policy_key { @@ -298,17 +299,24 @@ enum { * packets security identity. The lower/upper halves are swapped to recover * the identity. * - * The 4 bits at 0XF00 provide + * The 4 bits at 0X0F00 provide * - the magic marker values which indicate whether the packet is coming from * an ingress or egress proxy, a local process and its current encryption * status. + * + * The 4 bits at 0xF000 provide + * - the key index to use for encryption when multiple keys are in-flight. + * In the IPsec case this becomes the SPI on the wire. */ -#define MARK_MAGIC_HOST_MASK 0xF00 -#define MARK_MAGIC_PROXY_INGRESS 0xA00 -#define MARK_MAGIC_PROXY_EGRESS 0xB00 -#define MARK_MAGIC_HOST 0xC00 -#define MARK_MAGIC_DECRYPT 0xD00 -#define MARK_MAGIC_ENCRYPT 0xE00 +#define MARK_MAGIC_HOST_MASK 0x0F00 +#define MARK_MAGIC_PROXY_INGRESS 0x0A00 +#define MARK_MAGIC_PROXY_EGRESS 0x0B00 +#define MARK_MAGIC_HOST 0x0C00 +#define MARK_MAGIC_DECRYPT 0x0D00 +#define MARK_MAGIC_ENCRYPT 0x0E00 + +#define MARK_MAGIC_KEY_ID 0xF000 +#define MARK_MAGIC_KEY_MASK 0xFF00 /** * get_identity - returns source identity from the mark field @@ -323,10 +331,26 @@ static inline int __inline__ get_identity(struct __sk_buff *skb) */ static inline void __inline__ set_identity(struct __sk_buff *skb, __u32 identity) { - skb->mark = skb->mark & MARK_MAGIC_HOST_MASK; + skb->mark = skb->mark & MARK_MAGIC_KEY_MASK; skb->mark |= ((identity & 0xFFFF) << 16) | ((identity & 0xFF0000) >> 16); } +/** + * or_encrypt_key - mask and shift key into encryption format + */ +static inline __u32 __inline__ or_encrypt_key(__u8 key) +{ + return (((__u32)key & 0x0F) << 12) | MARK_MAGIC_ENCRYPT; +} + +/** + * set_encrypt_key - pushes 8 bit key and encryption marker into skb mark value. + */ +static inline void __inline__ set_encrypt_key(struct __sk_buff *skb, __u8 key) +{ + skb->mark = or_encrypt_key(key); +} + /* * skb->tc_index uses * diff --git a/bpf/lib/encap.h b/bpf/lib/encap.h index e3f194c93632..25cf969d893e 100644 --- a/bpf/lib/encap.h +++ b/bpf/lib/encap.h @@ -24,7 +24,7 @@ #ifdef ENCAP_IFINDEX #ifdef ENABLE_IPSEC static inline int __inline__ -enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, +enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { /* Traffic from local host in tunnel mode will be passed to @@ -40,14 +40,14 @@ enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, * in both cases because xfrm layer would overwrite them. We * use cb[4] here so it doesn't need to be reset by bpf_ipsec. */ - skb->cb[0] = MARK_MAGIC_ENCRYPT; + skb->cb[0] = or_encrypt_key(key); skb->cb[1] = seclabel; skb->cb[4] = tunnel_endpoint; return IPSEC_ENDPOINT; } static inline int __inline__ -encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, +encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { /* IPSec is performed by the stack on any packets with the @@ -57,7 +57,7 @@ encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, * label is stashed in the mark and extracted in bpf_netdev * to send skb onto tunnel for encap. */ - skb->mark = MARK_MAGIC_ENCRYPT; + set_encrypt_key(skb, key); set_identity(skb, seclabel); skb->cb[4] = tunnel_endpoint; return IPSEC_ENDPOINT; @@ -96,24 +96,24 @@ __encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, */ static inline int __inline__ encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, - __u32 seclabel, __u32 monitor) + __u8 key, __u32 seclabel, __u32 monitor) { #ifdef ENABLE_IPSEC - return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); + if (key) + return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, key, seclabel, monitor); #endif + return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); } static inline int __inline__ -encap_and_redirect_with_nodeid_from_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, +encap_and_redirect_with_nodeid_from_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { #ifdef ENABLE_IPSEC - return encap_and_redirect_ipsec(skb, tunnel_endpoint, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); + if (key) + return encap_and_redirect_ipsec(skb, tunnel_endpoint, key, seclabel, monitor); #endif + return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); } /* encap_and_redirect based on ENABLE_IPSEC flag and from_host bool will decide @@ -137,14 +137,22 @@ encap_and_redirect(struct __sk_buff *skb, struct endpoint_key *k, } #ifdef ENABLE_IPSEC - if (from_host) - return enacap_and_redirect_nomark_ipsec(skb, tunnel->ip4, - seclabel, monitor); - else - return encap_and_redirect_ipsec(skb, tunnel->ip4, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); + if (tunnel->key) { + if (from_host) + return enacap_and_redirect_nomark_ipsec(skb, + tunnel->ip4, + tunnel->key, + seclabel, + monitor); + else + return encap_and_redirect_ipsec(skb, + tunnel->ip4, + tunnel->key, + seclabel, + monitor); + } #endif + return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); } #endif /* ENCAP_IFINDEX */ #endif /* __LIB_ENCAP_H_ */ diff --git a/daemon/bpf.sha b/daemon/bpf.sha index 8f232abf6fb7..27cbd2ede5f7 100644 --- a/daemon/bpf.sha +++ b/daemon/bpf.sha @@ -1,2 +1,2 @@ -GO_BINDATA_SHA1SUM=af1ee3c89109c45ca4e8546e542ed854ce4fcc8f +GO_BINDATA_SHA1SUM=c2043b709a8ebc82155515aa31ac4637162eb3ec BPF_FILES=../bpf/COPYING ../bpf/Makefile ../bpf/Makefile.bpf ../bpf/bpf_alignchecker.c ../bpf/bpf_features.h ../bpf/bpf_ipsec.c ../bpf/bpf_lb.c ../bpf/bpf_lxc.c ../bpf/bpf_netdev.c ../bpf/bpf_overlay.c ../bpf/bpf_xdp.c ../bpf/cilium-map-migrate.c ../bpf/filter_config.h ../bpf/include/bpf/api.h ../bpf/include/elf/elf.h ../bpf/include/elf/gelf.h ../bpf/include/elf/libelf.h ../bpf/include/iproute2/bpf_elf.h ../bpf/include/linux/bpf.h ../bpf/include/linux/bpf_common.h ../bpf/include/linux/byteorder.h ../bpf/include/linux/byteorder/big_endian.h ../bpf/include/linux/byteorder/little_endian.h ../bpf/include/linux/icmp.h ../bpf/include/linux/icmpv6.h ../bpf/include/linux/if_arp.h ../bpf/include/linux/if_ether.h ../bpf/include/linux/if_packet.h ../bpf/include/linux/in.h ../bpf/include/linux/in6.h ../bpf/include/linux/ioctl.h ../bpf/include/linux/ip.h ../bpf/include/linux/ipv6.h ../bpf/include/linux/perf_event.h ../bpf/include/linux/swab.h ../bpf/include/linux/tcp.h ../bpf/include/linux/type_mapper.h ../bpf/include/linux/udp.h ../bpf/init.sh ../bpf/lib/arp.h ../bpf/lib/common.h ../bpf/lib/config.h ../bpf/lib/conntrack.h ../bpf/lib/csum.h ../bpf/lib/dbg.h ../bpf/lib/drop.h ../bpf/lib/encap.h ../bpf/lib/eps.h ../bpf/lib/eth.h ../bpf/lib/events.h ../bpf/lib/icmp6.h ../bpf/lib/ipv4.h ../bpf/lib/ipv6.h ../bpf/lib/l3.h ../bpf/lib/l4.h ../bpf/lib/lb.h ../bpf/lib/lxc.h ../bpf/lib/maps.h ../bpf/lib/metrics.h ../bpf/lib/nat46.h ../bpf/lib/policy.h ../bpf/lib/tailcall.h ../bpf/lib/trace.h ../bpf/lib/utils.h ../bpf/lib/xdp.h ../bpf/lxc_config.h ../bpf/netdev_config.h ../bpf/node_config.h ../bpf/probes/raw_change_tail.t ../bpf/probes/raw_insn.h ../bpf/probes/raw_invalidate_hash.t ../bpf/probes/raw_lpm_map.t ../bpf/probes/raw_lru_map.t ../bpf/probes/raw_main.c ../bpf/probes/raw_map_val_adj.t ../bpf/probes/raw_mark_map_val.t ../bpf/run_probes.sh ../bpf/sockops/Makefile ../bpf/sockops/bpf_redir.c ../bpf/sockops/bpf_sockops.c ../bpf/sockops/bpf_sockops.h ../bpf/sockops/sockops_config.h ../bpf/spawn_netns.sh diff --git a/daemon/daemon.go b/daemon/daemon.go index abd07446fec6..f6f21d779ac9 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -792,6 +792,7 @@ func (d *Daemon) syncLXCMap() error { } for _, ipIDPair := range specialIdentities { + hostKey := node.GetIPsecKeyIdentity() isHost := ipIDPair.ID == identity.ReservedIdentityHost if isHost { added, err := lxcmap.SyncHostEntry(ipIDPair.IP) @@ -807,7 +808,7 @@ func (d *Daemon) syncLXCMap() error { // Upsert will not propagate (reserved:foo->ID) mappings across the cluster, // and we specifically don't want to do so. - ipcache.IPIdentityCache.Upsert(ipIDPair.PrefixString(), nil, ipcache.Identity{ + ipcache.IPIdentityCache.Upsert(ipIDPair.PrefixString(), nil, hostKey, ipcache.Identity{ ID: ipIDPair.ID, Source: ipcache.FromAgentLocal, }) @@ -926,7 +927,8 @@ func NewDaemon(dp datapath.Datapath) (*Daemon, *endpointRestoreState, error) { mtuConfig := mtu.NewConfiguration(option.Config.Tunnel != option.TunnelDisabled, option.Config.MTU) if option.Config.EnableIPSec { - if err := ipsec.LoadIPSecKeysFile(option.Config.IPSecKeyFile); err != nil { + spi, err := ipsec.LoadIPSecKeysFile(option.Config.IPSecKeyFile) + if err != nil { return nil, nil, err } if option.Config.EnableIPv6 { @@ -934,6 +936,7 @@ func NewDaemon(dp datapath.Datapath) (*Daemon, *endpointRestoreState, error) { return nil, nil, err } } + node.SetIPsecKeyIdentity(spi) } nodeMngr, err := nodemanager.NewManager("all", dp.Node()) diff --git a/daemon/k8s_watcher.go b/daemon/k8s_watcher.go index 8008b9d0b15d..fbf74db5daa6 100644 --- a/daemon/k8s_watcher.go +++ b/daemon/k8s_watcher.go @@ -1511,10 +1511,12 @@ func (d *Daemon) updatePodHostIP(pod *types.Pod) (bool, error) { return true, fmt.Errorf("no/invalid PodIP: %s", pod.StatusPodIP) } + hostKey := node.GetIPsecKeyIdentity() + // Initial mapping of podIP <-> hostIP <-> identity. The mapping is // later updated once the allocator has determined the real identity. // If the endpoint remains unmanaged, the identity remains untouched. - selfOwned := ipcache.IPIdentityCache.Upsert(pod.StatusPodIP, hostIP, ipcache.Identity{ + selfOwned := ipcache.IPIdentityCache.Upsert(pod.StatusPodIP, hostIP, hostKey, ipcache.Identity{ ID: identity.ReservedIdentityUnmanaged, Source: ipcache.FromKubernetes, }) @@ -1741,7 +1743,8 @@ func (d *Daemon) updateK8sNodeTunneling(k8sNodeOld, k8sNodeNew *types.Node) erro } } - selfOwned := ipcache.IPIdentityCache.Upsert(ciliumIPStrNew, hostIPNew, ipcache.Identity{ + hostKey := node.GetIPsecKeyIdentity() + selfOwned := ipcache.IPIdentityCache.Upsert(ciliumIPStrNew, hostIPNew, hostKey, ipcache.Identity{ ID: identity.ReservedIdentityHost, Source: ipcache.FromKubernetes, }) diff --git a/pkg/bpf/endpoint.go b/pkg/bpf/endpoint.go index dadb57b4f425..6a6baca1c062 100644 --- a/pkg/bpf/endpoint.go +++ b/pkg/bpf/endpoint.go @@ -15,6 +15,7 @@ package bpf import ( + "fmt" "net" "unsafe" @@ -34,7 +35,7 @@ type EndpointKey struct { // represents both IPv6 and IPv4 (in the lowest four bytes) IP types.IPv6 `align:"$union0"` Family uint8 `align:"family"` - Pad1 uint8 `align:"pad4"` + Key uint8 `align:"key"` Pad2 uint16 `align:"pad5"` } @@ -57,6 +58,7 @@ func NewEndpointKey(ip net.IP) EndpointKey { result.Family = EndpointKeyIPv6 copy(result.IP[:], ip) } + result.Key = 0 return result } @@ -75,7 +77,7 @@ func (k EndpointKey) ToIP() net.IP { // String provides a string representation of the EndpointKey. func (k EndpointKey) String() string { if ip := k.ToIP(); ip != nil { - return ip.String() + return fmt.Sprintf("%s:%d", ip.String(), k.Key) } return "nil" } diff --git a/pkg/datapath/ipcache/listener.go b/pkg/datapath/ipcache/listener.go index d1fe4061c23a..6f310ad08c27 100644 --- a/pkg/datapath/ipcache/listener.go +++ b/pkg/datapath/ipcache/listener.go @@ -77,7 +77,7 @@ func NewListener(d datapath) *BPFListener { // IP->ID mapping will replace any existing contents; knowledge of the old pair // is not required to upsert the new pair. func (l *BPFListener) OnIPIdentityCacheChange(modType ipcache.CacheModification, cidr net.IPNet, - oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity) { + oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) { scopedLog := log.WithFields(logrus.Fields{ logfields.IPAddr: cidr, logfields.Identity: newID, @@ -98,6 +98,7 @@ func (l *BPFListener) OnIPIdentityCacheChange(modType ipcache.CacheModification, case ipcache.Upsert: value := ipcacheMap.RemoteEndpointInfo{ SecurityIdentity: uint32(newID), + Key: encryptKey, } if newHostIP != nil { diff --git a/pkg/datapath/linux/ipsec/ipsec_linux.go b/pkg/datapath/linux/ipsec/ipsec_linux.go index 2a2bea5ade32..860a59212f25 100644 --- a/pkg/datapath/linux/ipsec/ipsec_linux.go +++ b/pkg/datapath/linux/ipsec/ipsec_linux.go @@ -25,6 +25,7 @@ import ( "net" "os" "path/filepath" + "strconv" "strings" "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" @@ -43,7 +44,7 @@ const ( ) type ipSecKey struct { - Spi int + Spi uint8 ReqID int Auth *netlink.XfrmStateAlgo Crypt *netlink.XfrmStateAlgo @@ -79,7 +80,7 @@ func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, d tmpl := netlink.XfrmPolicyTmpl{ Proto: netlink.XFRM_PROTO_ESP, Mode: netlink.XFRM_MODE_TUNNEL, - Spi: keys.Spi, + Spi: int(keys.Spi), Reqid: keys.ReqID, Dst: dstIP, Src: srcIP, @@ -91,22 +92,20 @@ func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, d func ipSecJoinState(state *netlink.XfrmState, keys *ipSecKey) { state.Auth = keys.Auth state.Crypt = keys.Crypt - state.Spi = keys.Spi + state.Spi = int(keys.Spi) state.Reqid = keys.ReqID } -func ipSecReplaceState(remoteIP, localIP net.IP, spi int) error { - state := ipSecNewState() - +func ipSecReplaceState(remoteIP, localIP net.IP) (uint8, error) { key := getIPSecKeys(localIP) if key == nil { - return fmt.Errorf("IPSec key missing") + return 0, fmt.Errorf("IPSec key missing") } - key.Spi = spi + state := ipSecNewState() ipSecJoinState(state, key) state.Src = localIP state.Dst = remoteIP - return netlink.XfrmStateAdd(state) + return key.Spi, netlink.XfrmStateAdd(state) } func ipSecReplacePolicyIn(src, dst *net.IPNet) error { @@ -119,35 +118,42 @@ func ipSecReplacePolicyIn(src, dst *net.IPNet) error { } func ipSecReplacePolicyInFwd(src, dst *net.IPNet, dir netlink.Dir) error { + var spiWide uint32 + + key := getIPSecKeys(dst.IP) + if key == nil { + return fmt.Errorf("IPSec key missing") + } + spiWide = uint32(key.Spi) + policy := ipSecNewPolicy() policy.Dir = dir policy.Src = src policy.Dst = dst policy.Mark = &netlink.XfrmMark{ - Value: linux_defaults.RouteMarkDecrypt, - Mask: linux_defaults.RouteMarkMask, + Value: ((spiWide << 12) | linux_defaults.RouteMarkDecrypt), + Mask: linux_defaults.IPsecMarkMask, } + ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) + return netlink.XfrmPolicyUpdate(policy) +} + +func ipSecReplacePolicyOut(src, dst *net.IPNet, dir IPSecDir) error { + var spiWide uint32 key := getIPSecKeys(dst.IP) if key == nil { return fmt.Errorf("IPSec key missing") } - ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) - return netlink.XfrmPolicyUpdate(policy) -} + spiWide = uint32(key.Spi) -func ipSecReplacePolicyOut(src, dst *net.IPNet) error { policy := ipSecNewPolicy() policy.Dir = netlink.XFRM_DIR_OUT policy.Src = src policy.Dst = dst policy.Mark = &netlink.XfrmMark{ - Value: linux_defaults.RouteMarkEncrypt, - Mask: linux_defaults.RouteMarkMask, - } - key := getIPSecKeys(dst.IP) - if key == nil { - return fmt.Errorf("IPSec key missing") + Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt), + Mask: linux_defaults.IPsecMarkMask, } ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) return netlink.XfrmPolicyUpdate(policy) @@ -175,7 +181,7 @@ func ipSecDeletePolicy(src, local net.IP) error { return nil } -/* UpsertIPSecEndpoint updates the IPSec context for a new endpoint inserted in +/* UpsertIPsecEndpoint updates the IPSec context for a new endpoint inserted in * the ipcache. Currently we support a global crypt/auth keyset that will encrypt * all traffic between endpoints. An IPSec context consists of two pieces a policy * and a state, the security policy database (SPD) and security association @@ -215,7 +221,10 @@ func ipSecDeletePolicy(src, local net.IP) error { * state space. Basic idea would be to reference a state using any key generated * from BPF program allowing for a single state per security ctx. */ -func UpsertIPSecEndpoint(local, remote *net.IPNet, spi int, dir IPSecDir) error { +func UpsertIPsecEndpoint(local, remote *net.IPNet, dir IPSecDir) (uint8, error) { + var spi uint8 + var err error + /* TODO: state reference ID is (dip,spi) which can be duplicated in the current global * mode. The duplication is on _all_ ingress states because dst_ip == host_ip in this * case and only a single spi entry is in use. Currently no check is done to avoid @@ -229,33 +238,33 @@ func UpsertIPSecEndpoint(local, remote *net.IPNet, spi int, dir IPSecDir) error */ if !local.IP.Equal(remote.IP) { if dir == IPSecDirIn || dir == IPSecDirBoth { - if err := ipSecReplaceState(local.IP, remote.IP, spi); err != nil { + if spi, err = ipSecReplaceState(local.IP, remote.IP); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace local state: %s", err) + return 0, fmt.Errorf("unable to replace local state: %s", err) } } - if err := ipSecReplacePolicyIn(remote, local); err != nil { + if err = ipSecReplacePolicyIn(remote, local); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace policy in: %s", err) + return 0, fmt.Errorf("unable to replace policy in: %s", err) } } } if dir == IPSecDirOut || dir == IPSecDirBoth { - if err := ipSecReplaceState(remote.IP, local.IP, spi); err != nil { + if spi, err = ipSecReplaceState(remote.IP, local.IP); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace remote state: %s", err) + return 0, fmt.Errorf("unable to replace remote state: %s", err) } } - if err := ipSecReplacePolicyOut(local, remote); err != nil { + if err = ipSecReplacePolicyOut(local, remote, dir); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace policy out: %s", err) + return 0, fmt.Errorf("unable to replace policy out: %s", err) } } } } - return nil + return spi, nil } // DeleteIPSecEndpoint deletes the endpoint from IPSec SPD and SAD @@ -289,21 +298,24 @@ func decodeIPSecKey(keyRaw string) ([]byte, error) { // LoadIPSecKeysFile imports IPSec auth and crypt keys from a file. The format // is to put a key per line as follows, (auth-algo auth-key enc-algo enc-key) -func LoadIPSecKeysFile(path string) error { +func LoadIPSecKeysFile(path string) (uint8, error) { file, err := os.Open(path) if err != nil { - return err + return 0, err } defer file.Close() return loadIPSecKeys(file) } -func loadIPSecKeys(r io.Reader) error { +func loadIPSecKeys(r io.Reader) (uint8, error) { + var spi uint8 + scanner := bufio.NewScanner(r) scanner.Split(bufio.ScanLines) for scanner.Scan() { + offset := 0 + ipSecKey := &ipSecKey{ - Spi: 1, ReqID: 1, } @@ -311,20 +323,32 @@ func loadIPSecKeys(r io.Reader) error { // auth-algo auth-key enc-algo enc-key s := strings.Split(scanner.Text(), " ") if len(s) < 4 { - return fmt.Errorf("missing IPSec keys or invalid format") + return 0, fmt.Errorf("missing IPSec keys or invalid format") } - authkey, err := decodeIPSecKey(s[1]) + spiI, err := strconv.Atoi(s[0]) if err != nil { - return fmt.Errorf("unable to decode authkey string %q", s[1]) + // If no version info is provided assume using key format without + // versioning and assign SPI. + spiI = 1 + offset = -1 + } + if spiI > linux_defaults.IPsecMaxKeyVersion { + return 0, fmt.Errorf("encryption Key space exhausted, id must be less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0]) } - authname := s[0] + spi = uint8(spiI) - enckey, err := decodeIPSecKey(s[3]) + authkey, err := decodeIPSecKey(s[2+offset]) if err != nil { - return fmt.Errorf("unable to decode enckey string %q", s[3]) + return 0, fmt.Errorf("unable to decode authkey string %q", s[1+offset]) } - encname := s[2] + authname := s[1+offset] + + enckey, err := decodeIPSecKey(s[4+offset]) + if err != nil { + return 0, fmt.Errorf("unable to decode enckey string %q", s[3+offset]) + } + encname := s[3+offset] ipSecKey.Auth = &netlink.XfrmStateAlgo{ Name: authname, @@ -334,13 +358,15 @@ func loadIPSecKeys(r io.Reader) error { Name: encname, Key: enckey, } - if len(s) == 5 { - ipSecKeysGlobal[s[4]] = ipSecKey + ipSecKey.Spi = spi + + if len(s) == 6+offset { + ipSecKeysGlobal[s[5+offset]] = ipSecKey } else { ipSecKeysGlobal[""] = ipSecKey } } - return nil + return spi, nil } // EnableIPv6Forwarding sets proc file to enable IPv6 forwarding diff --git a/pkg/datapath/linux/ipsec/ipsec_linux_test.go b/pkg/datapath/linux/ipsec/ipsec_linux_test.go index 6f6f14410459..955aaa9d05f1 100644 --- a/pkg/datapath/linux/ipsec/ipsec_linux_test.go +++ b/pkg/datapath/linux/ipsec/ipsec_linux_test.go @@ -36,19 +36,18 @@ var _ = Suite(&IPSecSuitePrivileged{}) var ( path = "ipsec_keys_test" - keysDat = []byte("hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef\nhmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef foobar\n") - invalidKeysDat = []byte("test abcdefghijklmnopqrstuvwzyzABCDEF test abcdefghijklmnopqrstuvwzyzABCDEF\n") + keysDat = []byte("1 hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef\n1 hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef foobar\n") + invalidKeysDat = []byte("1 test abcdefghijklmnopqrstuvwzyzABCDEF test abcdefghijklmnopqrstuvwzyzABCDEF\n") ) func (p *IPSecSuitePrivileged) TestLoadKeysNoFile(c *C) { - err := LoadIPSecKeysFile(path) + _, err := LoadIPSecKeysFile(path) c.Assert(os.IsNotExist(err), Equals, true) } func (p *IPSecSuitePrivileged) TestInvalidLoadKeys(c *C) { keys := bytes.NewReader(invalidKeysDat) - err := loadIPSecKeys(keys) - spi := 1 + _, err := loadIPSecKeys(keys) c.Assert(err, NotNil) _, local, err := net.ParseCIDR("1.1.3.4/16") @@ -56,19 +55,17 @@ func (p *IPSecSuitePrivileged) TestInvalidLoadKeys(c *C) { _, remote, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, NotNil) } func (p *IPSecSuitePrivileged) TestLoadKeys(c *C) { keys := bytes.NewReader(keysDat) - err := loadIPSecKeys(keys) + _, err := loadIPSecKeys(keys) c.Assert(err, IsNil) } func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") @@ -84,7 +81,7 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { ipSecKeysGlobal["1.2.3.4"] = key ipSecKeysGlobal[""] = key - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, IsNil) err = DeleteIPSecEndpoint(remote.IP, local.IP) @@ -96,8 +93,6 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { } func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.1.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") @@ -114,7 +109,7 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { ipSecKeysGlobal["1.2.3.4"] = key ipSecKeysGlobal[""] = key - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, IsNil) err = DeleteIPSecEndpoint(remote.IP, local.IP) @@ -126,14 +121,12 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { } func (p *IPSecSuitePrivileged) TestUpsertIPSecKeyMissing(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.1.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, ErrorMatches, "unable to replace local state: IPSec key missing") err = DeleteIPSecEndpoint(remote.IP, local.IP) diff --git a/pkg/datapath/linux/linux_defaults/linux_defaults.go b/pkg/datapath/linux/linux_defaults/linux_defaults.go index c3b985aa516b..3644a37ff85f 100644 --- a/pkg/datapath/linux/linux_defaults/linux_defaults.go +++ b/pkg/datapath/linux/linux_defaults/linux_defaults.go @@ -36,9 +36,9 @@ const ( // TunnelDeviceName the default name of the tunnel device when using vxlan TunnelDeviceName = "cilium_vxlan" - // IPSec SPI value for endpoint rules - IPSecEndpointSPI = 1 + // IPSec offset value for node rules + IPsecMaxKeyVersion = 16 - // IPSec SPI value for node rules - IPSecNodeSPI = 2 + // IPsecMarkMask is the mask required for the IPsec SPI and encrypt/decrypt bits + IPsecMarkMask = 0xFF00 ) diff --git a/pkg/datapath/linux/node.go b/pkg/datapath/linux/node.go index 27e02e3f41c2..e935f9013274 100644 --- a/pkg/datapath/linux/node.go +++ b/pkg/datapath/linux/node.go @@ -60,7 +60,7 @@ func NewNodeHandler(datapathConfig DatapathConfiguration, nodeAddressing datapat // with encapsulation mode enabled. The CIDR and IP of both the old and new // node are provided as context. The caller expects the tunnel mapping in the // datapath to be updated. -func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, firstAddition, encapEnabled bool) { +func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, firstAddition, encapEnabled bool, encryptKey uint8) { if !encapEnabled { // When the protocol family is disabled, the initial node addition will // trigger a deletion to clean up leftover entries. The deletion happens @@ -78,7 +78,7 @@ func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, first "allocCIDR": newCIDR, }).Debug("Updating tunnel map entry") - if err := tunnel.TunnelMap.SetTunnelEndpoint(newCIDR.IP, newIP); err != nil { + if err := tunnel.TunnelMap.SetTunnelEndpoint(encryptKey, newCIDR.IP, newIP); err != nil { log.WithError(err).WithFields(logrus.Fields{ "allocCIDR": newCIDR, }).Error("bpf: Unable to update in tunnel endpoint map") @@ -425,12 +425,15 @@ func (n *linuxNodeHandler) NodeUpdate(oldNode, newNode node.Node) error { return nil } -func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { - upsertIPsecLog := func(err error, spec string, loc, rem *net.IPNet) { +func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) uint8 { + var spi uint8 + var err error + upsertIPsecLog := func(err error, spec string, loc, rem *net.IPNet, spi uint8) { scopedLog := log.WithFields(logrus.Fields{ logfields.Reason: spec, "local-ip": loc, "remote-ip": rem, + "spi": spi, }) if err != nil { scopedLog.WithError(err).Error("IPsec enable failed") @@ -443,7 +446,7 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { n.replaceHostRules() } - if n.nodeConfig.EnableIPv4 { + if n.nodeConfig.EnableIPv4 && newNode.IPv4AllocCIDR != nil { new4Net := &net.IPNet{IP: newNode.IPv4AllocCIDR.IP, Mask: newNode.IPv4AllocCIDR.Mask} if newNode.IsLocal() { n.replaceNodeIPSecInRoute(new4Net) @@ -451,21 +454,21 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { if ciliumInternalIPv4 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv4().Router(), Mask: n.nodeAddressing.IPv4().AllocationCIDR().Mask} ipsecIPv4Wildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv4), Mask: net.IPv4Mask(0, 0, 0, 0)} - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecIPv4Wildcard, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirIn) - upsertIPsecLog(err, "local IPv4", ipsecLocal, ipsecIPv4Wildcard) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecIPv4Wildcard, ipsec.IPSecDirIn) + upsertIPsecLog(err, "local IPv4", ipsecLocal, ipsecIPv4Wildcard, spi) } } else { if ciliumInternalIPv4 := newNode.GetCiliumInternalIP(false); ciliumInternalIPv4 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv4().Router(), Mask: n.nodeAddressing.IPv4().AllocationCIDR().Mask} ipsecRemote := &net.IPNet{IP: ciliumInternalIPv4, Mask: newNode.IPv4AllocCIDR.Mask} n.replaceNodeIPSecOutRoute(new4Net) - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecRemote, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirOut) - upsertIPsecLog(err, "IPv4", ipsecLocal, ipsecRemote) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecRemote, ipsec.IPSecDirOut) + upsertIPsecLog(err, "IPv4", ipsecLocal, ipsecRemote, spi) } } } - if n.nodeConfig.EnableIPv6 { + if n.nodeConfig.EnableIPv6 && newNode.IPv6AllocCIDR != nil { new6Net := &net.IPNet{IP: newNode.IPv6AllocCIDR.IP, Mask: newNode.IPv6AllocCIDR.Mask} if newNode.IsLocal() { n.replaceHostRules() @@ -474,19 +477,20 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { if ciliumInternalIPv6 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv6().Router(), Mask: n.nodeAddressing.IPv6().AllocationCIDR().Mask} ipsecIPv6Wildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv6), Mask: net.CIDRMask(0, 0)} - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecIPv6Wildcard, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirIn) - upsertIPsecLog(err, "local IPv6", ipsecLocal, ipsecIPv6Wildcard) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecIPv6Wildcard, ipsec.IPSecDirIn) + upsertIPsecLog(err, "local IPv6", ipsecLocal, ipsecIPv6Wildcard, spi) } } else { if ciliumInternalIPv6 := newNode.GetCiliumInternalIP(true); ciliumInternalIPv6 != nil { ipsecLocalWildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv6), Mask: net.CIDRMask(0, 0)} ipsecRemote := &net.IPNet{IP: ciliumInternalIPv6, Mask: newNode.IPv6AllocCIDR.Mask} n.replaceNodeIPSecOutRoute(new6Net) - err := ipsec.UpsertIPSecEndpoint(ipsecLocalWildcard, ipsecRemote, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirOut) - upsertIPsecLog(err, "IPv6", ipsecLocalWildcard, ipsecRemote) + spi, err := ipsec.UpsertIPsecEndpoint(ipsecLocalWildcard, ipsecRemote, ipsec.IPSecDirOut) + upsertIPsecLog(err, "IPv6", ipsecLocalWildcard, ipsecRemote, spi) } } } + return spi } func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition bool) error { @@ -495,6 +499,7 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition oldIP4, oldIP6 net.IP newIP4 = newNode.GetNodeIP(false) newIP6 = newNode.GetNodeIP(true) + encryptKey uint8 ) if oldNode != nil { @@ -505,7 +510,7 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition } if n.nodeConfig.EnableIPSec { - n.enableIPsec(newNode) + encryptKey = n.enableIPsec(newNode) } if newNode.IsLocal() { @@ -526,9 +531,9 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition // Update the tunnel mapping of the node. In case the // node has changed its CIDR range, a new entry in the // map is created and the old entry is removed. - updateTunnelMapping(oldIP4Cidr, newNode.IPv4AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv4) + updateTunnelMapping(oldIP4Cidr, newNode.IPv4AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv4, encryptKey) // Not a typo, the IPv4 host IP is used to build the IPv6 overlay - updateTunnelMapping(oldIP6Cidr, newNode.IPv6AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv6) + updateTunnelMapping(oldIP6Cidr, newNode.IPv6AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv6, encryptKey) if !n.nodeConfig.UseSingleClusterRoute { n.updateOrRemoveNodeRoutes([]*cidr.CIDR{oldIP4Cidr}, []*cidr.CIDR{newNode.IPv4AllocCIDR}) diff --git a/pkg/endpoint/policy.go b/pkg/endpoint/policy.go index 909c6c82ba2a..8516d8608e24 100644 --- a/pkg/endpoint/policy.go +++ b/pkg/endpoint/policy.go @@ -511,13 +511,14 @@ func (e *Endpoint) runIPIdentitySync(endpointIP addressing.CiliumIP) { IP := endpointIP.IP() ID := e.SecurityIdentity.ID hostIP := node.GetExternalIPv4() + key := node.GetIPsecKeyIdentity() metadata := e.FormatGlobalEndpointID() // Release lock as we do not want to have long-lasting key-value // store operations resulting in lock being held for a long time. e.RUnlock() - if err := ipcache.UpsertIPToKVStore(ctx, IP, hostIP, ID, metadata); err != nil { + if err := ipcache.UpsertIPToKVStore(ctx, IP, hostIP, ID, key, metadata); err != nil { return fmt.Errorf("unable to add endpoint IP mapping '%s'->'%d': %s", IP.String(), ID, err) } return nil diff --git a/pkg/envoy/resources.go b/pkg/envoy/resources.go index 909e10a363b5..d63cfb487bbf 100644 --- a/pkg/envoy/resources.go +++ b/pkg/envoy/resources.go @@ -63,7 +63,7 @@ func (cache *NPHDSCache) OnIPIdentityCacheGC() { // OnIPIdentityCacheChange pushes modifications to the IP<->Identity mapping // into the Network Policy Host Discovery Service (NPHDS). func (cache *NPHDSCache) OnIPIdentityCacheChange(modType ipcache.CacheModification, cidr net.IPNet, - oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity) { + oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) { // An upsert where an existing pair exists should translate into a // delete (for the old Identity) followed by an upsert (for the new). if oldID != nil && modType == ipcache.Upsert { @@ -72,7 +72,7 @@ func (cache *NPHDSCache) OnIPIdentityCacheChange(modType ipcache.CacheModificati return } - cache.OnIPIdentityCacheChange(ipcache.Delete, cidr, nil, nil, nil, *oldID) + cache.OnIPIdentityCacheChange(ipcache.Delete, cidr, nil, nil, nil, *oldID, encryptKey) } cidrStr := cidr.String() diff --git a/pkg/identity/identity.go b/pkg/identity/identity.go index 52d0c14ab3da..33551d1ff26e 100644 --- a/pkg/identity/identity.go +++ b/pkg/identity/identity.go @@ -64,6 +64,7 @@ type IPIdentityPair struct { Mask net.IPMask `json:"Mask"` HostIP net.IP `json:"HostIP"` ID NumericIdentity `json:"ID"` + Key uint8 `json:"Key"` Metadata string `json:"Metadata"` } diff --git a/pkg/ipcache/cidr.go b/pkg/ipcache/cidr.go index dd396b654037..5fd2fb58549f 100644 --- a/pkg/ipcache/cidr.go +++ b/pkg/ipcache/cidr.go @@ -61,7 +61,7 @@ func AllocateCIDRs(impl Implementation, prefixes []*net.IPNet) error { } for prefixString, id := range allocatedIdentities { - IPIdentityCache.Upsert(prefixString, nil, Identity{ + IPIdentityCache.Upsert(prefixString, nil, 0, Identity{ ID: id.ID, Source: FromCIDR, }) diff --git a/pkg/ipcache/ipcache.go b/pkg/ipcache/ipcache.go index d0b6eca7877a..452d25ae0603 100644 --- a/pkg/ipcache/ipcache.go +++ b/pkg/ipcache/ipcache.go @@ -63,6 +63,12 @@ type Identity struct { Source Source } +// IPKeyPair is the (IP, key) pair used of the identity +type IPKeyPair struct { + IP net.IP + Key uint8 +} + // IPCache is a collection of mappings: // - mapping of endpoint IP or CIDR to security identities of all endpoints // which are part of the same cluster, and vice-versa @@ -71,7 +77,7 @@ type IPCache struct { mutex lock.RWMutex ipToIdentityCache map[string]Identity identityToIPCache map[identity.NumericIdentity]map[string]struct{} - ipToHostIPCache map[string]net.IP + ipToHostIPCache map[string]IPKeyPair // prefixLengths reference-count the number of CIDRs that use // particular prefix lengths for the mask. @@ -94,7 +100,7 @@ func NewIPCache() *IPCache { return &IPCache{ ipToIdentityCache: map[string]Identity{}, identityToIPCache: map[identity.NumericIdentity]map[string]struct{}{}, - ipToHostIPCache: map[string]net.IP{}, + ipToHostIPCache: map[string]IPKeyPair{}, v4PrefixLengths: map[int]int{}, v6PrefixLengths: map[int]int{}, } @@ -211,6 +217,11 @@ func endpointIPToCIDR(ip net.IP) *net.IPNet { } } +func (ipc *IPCache) getHostIPCache(ip string) (net.IP, uint8) { + ipKeyPair := ipc.ipToHostIPCache[ip] + return ipKeyPair.IP, ipKeyPair.Key +} + // Upsert adds / updates the provided IP (endpoint or CIDR prefix) and identity // into the IPCache. // @@ -219,10 +230,11 @@ func endpointIPToCIDR(ip net.IP) *net.IPNet { // managed by the kvstore layer. See allowOverwrite() for rules on ownership. // hostIP is the location of the given IP. It is optional (may be nil) and is // propagated to the listeners. -func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool { +func (ipc *IPCache) Upsert(ip string, hostIP net.IP, hostKey uint8, newIdentity Identity) bool { scopedLog := log.WithFields(logrus.Fields{ logfields.IPAddr: ip, logfields.Identity: newIdentity, + logfields.Key: hostKey, }) ipc.mutex.Lock() @@ -232,7 +244,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool var oldIdentity *identity.NumericIdentity callbackListeners := true - oldHostIP := ipc.ipToHostIPCache[ip] + oldHostIP, oldHostKey := ipc.getHostIPCache(ip) cachedIdentity, found := ipc.ipToIdentityCache[ip] if found { @@ -242,7 +254,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool // Skip update if IP is already mapped to the given identity // and the host IP hasn't changed. - if cachedIdentity == newIdentity && bytes.Compare(oldHostIP, hostIP) == 0 { + if cachedIdentity == newIdentity && bytes.Compare(oldHostIP, hostIP) == 0 && hostKey == oldHostKey { return true } @@ -280,7 +292,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool if !found { cidrStr := cidr.String() if cidrIdentity, cidrFound := ipc.ipToIdentityCache[cidrStr]; cidrFound { - oldHostIP = ipc.ipToHostIPCache[cidrStr] + oldHostIP, _ = ipc.getHostIPCache(cidrStr) if cidrIdentity.ID != newIdentity.ID || bytes.Compare(oldHostIP, hostIP) != 0 { scopedLog.Debug("New endpoint IP started shadowing existing CIDR to identity mapping") oldIdentity = &cidrIdentity.ID @@ -316,12 +328,12 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool if hostIP == nil { delete(ipc.ipToHostIPCache, ip) } else { - ipc.ipToHostIPCache[ip] = hostIP + ipc.ipToHostIPCache[ip] = IPKeyPair{IP: hostIP, Key: hostKey} } if callbackListeners { for _, listener := range ipc.listeners { - listener.OnIPIdentityCacheChange(Upsert, *cidr, oldHostIP, hostIP, oldIdentity, newIdentity.ID) + listener.OnIPIdentityCacheChange(Upsert, *cidr, oldHostIP, hostIP, oldIdentity, newIdentity.ID, hostKey) } } @@ -332,13 +344,13 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool // the listener's "OnIPIdentityCacheChange" method for each entry in the cache. func (ipc *IPCache) DumpToListenerLocked(listener IPIdentityMappingListener) { for ip, identity := range ipc.ipToIdentityCache { - hostIP := ipc.ipToHostIPCache[ip] + hostIP, encryptKey := ipc.getHostIPCache(ip) _, cidr, err := net.ParseCIDR(ip) if err != nil { endpointIP := net.ParseIP(ip) cidr = endpointIPToCIDR(endpointIP) } - listener.OnIPIdentityCacheChange(Upsert, *cidr, nil, hostIP, nil, identity.ID) + listener.OnIPIdentityCacheChange(Upsert, *cidr, nil, hostIP, nil, identity.ID, encryptKey) } } @@ -363,7 +375,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { var cidr *net.IPNet cacheModification := Delete - oldHostIP := ipc.ipToHostIPCache[ip] + oldHostIP, encryptKey := ipc.getHostIPCache(ip) var newHostIP net.IP var oldIdentity *identity.NumericIdentity newIdentity := cachedIdentity @@ -402,7 +414,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { // restore its mapping with the listeners if that was the case. cidrStr := cidr.String() if cidrIdentity, cidrFound := ipc.ipToIdentityCache[cidrStr]; cidrFound { - newHostIP = ipc.ipToHostIPCache[cidrStr] + newHostIP, _ = ipc.getHostIPCache(cidrStr) if cidrIdentity.ID != cachedIdentity.ID || bytes.Compare(oldHostIP, newHostIP) != 0 { scopedLog.Debug("Removal of endpoint IP revives shadowed CIDR to identity mapping") cacheModification = Upsert @@ -431,7 +443,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { if callbackListeners { for _, listener := range ipc.listeners { listener.OnIPIdentityCacheChange(cacheModification, *cidr, oldHostIP, newHostIP, - oldIdentity, newIdentity.ID) + oldIdentity, newIdentity.ID, encryptKey) } } } diff --git a/pkg/ipcache/ipcache_test.go b/pkg/ipcache/ipcache_test.go index 70912159857d..6279dc55f3ec 100644 --- a/pkg/ipcache/ipcache_test.go +++ b/pkg/ipcache/ipcache_test.go @@ -47,7 +47,7 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { // Deletion of key that doesn't exist doesn't cause panic. IPIdentityCache.Delete(endpointIP, FromKVStore) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) @@ -62,13 +62,13 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { c.Assert(cachedIdentity.Source, Equals, FromKVStore) // kubernetes source cannot update kvstore source - updated := IPIdentityCache.Upsert(endpointIP, nil, Identity{ + updated := IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKubernetes, }) c.Assert(updated, Equals, false) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) @@ -87,13 +87,13 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { c.Assert(exists, Equals, false) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) newIdentity := identityPkg.NumericIdentity(69) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: newIdentity, Source: FromKVStore, }) @@ -120,7 +120,7 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { identities := []identityPkg.NumericIdentity{5, 67, 29, 29, 29} for index := range endpointIPs { - IPIdentityCache.Upsert(endpointIPs[index], nil, Identity{ + IPIdentityCache.Upsert(endpointIPs[index], nil, 0, Identity{ ID: identities[index], Source: FromKVStore, }) diff --git a/pkg/ipcache/kvstore.go b/pkg/ipcache/kvstore.go index 102b4b8ded5b..6dfdcf93967b 100644 --- a/pkg/ipcache/kvstore.go +++ b/pkg/ipcache/kvstore.go @@ -166,13 +166,14 @@ func (r *kvReferenceCounter) release(ctx context.Context, key string) (err error // UpsertIPToKVStore updates / inserts the provided IP->Identity mapping into the // kvstore, which will subsequently trigger an event in NewIPIdentityWatcher(). -func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.NumericIdentity, metadata string) error { +func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.NumericIdentity, key uint8, metadata string) error { ipKey := path.Join(IPIdentitiesPath, AddressSpace, IP.String()) ipIDPair := identity.IPIdentityPair{ IP: IP, ID: ID, Metadata: metadata, HostIP: hostIP, + Key: key, } marshaledIPIDPair, err := json.Marshal(ipIDPair) @@ -183,6 +184,7 @@ func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.Numer log.WithFields(logrus.Fields{ logfields.IPAddr: ipIDPair.IP, logfields.Identity: ipIDPair.ID, + logfields.Key: ipIDPair.Key, logfields.Modification: Upsert, }).Debug("upserting IP->ID mapping to kvstore") @@ -313,7 +315,7 @@ restart: continue } - IPIdentityCache.Upsert(ipIDPair.PrefixString(), ipIDPair.HostIP, Identity{ + IPIdentityCache.Upsert(ipIDPair.PrefixString(), ipIDPair.HostIP, ipIDPair.Key, Identity{ ID: ipIDPair.ID, Source: FromKVStore, }) diff --git a/pkg/ipcache/listener.go b/pkg/ipcache/listener.go index 701151258c34..cd5379956d3c 100644 --- a/pkg/ipcache/listener.go +++ b/pkg/ipcache/listener.go @@ -40,7 +40,7 @@ type IPIdentityMappingListener interface { // hostIP is the IP address of the location of the cidr. // hostIP is optional and may only be non-nil for an Upsert modification. OnIPIdentityCacheChange(modType CacheModification, cidr net.IPNet, oldHostIP, newHostIP net.IP, - oldID *identity.NumericIdentity, newID identity.NumericIdentity) + oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) // OnIPIdentityCacheGC will be called to sync other components which are // reliant upon the IPIdentityCache with the IPIdentityCache. diff --git a/pkg/logging/logfields/logfields.go b/pkg/logging/logfields/logfields.go index 77df0e7b68e2..57949a7d0a29 100644 --- a/pkg/logging/logfields/logfields.go +++ b/pkg/logging/logfields/logfields.go @@ -350,4 +350,7 @@ const ( // Probe is the name of a status probe. Probe = "probe" + + // Key is the identity of the encryption key + Key = "key" ) diff --git a/pkg/maps/ipcache/ipcache.go b/pkg/maps/ipcache/ipcache.go index 6ac71dac3d36..43cdac7a7593 100644 --- a/pkg/maps/ipcache/ipcache.go +++ b/pkg/maps/ipcache/ipcache.go @@ -128,10 +128,11 @@ func NewKey(ip net.IP, mask net.IPMask) Key { type RemoteEndpointInfo struct { SecurityIdentity uint32 `align:"sec_label"` TunnelEndpoint [4]byte `align:"tunnel_endpoint"` + Key uint8 `align:"key"` } func (v *RemoteEndpointInfo) String() string { - return fmt.Sprintf("%d", v.SecurityIdentity) + return fmt.Sprintf("%d %d", v.SecurityIdentity, v.Key) } // GetValuePtr returns the unsafe pointer to the BPF value. diff --git a/pkg/maps/tunnel/logfields.go b/pkg/maps/tunnel/logfields.go index b5cef269037d..df376a1b4a96 100644 --- a/pkg/maps/tunnel/logfields.go +++ b/pkg/maps/tunnel/logfields.go @@ -22,6 +22,7 @@ import ( const ( fieldEndpoint = "endpoint" fieldPrefix = "prefix" + fieldKey = "key" ) var ( diff --git a/pkg/maps/tunnel/tunnel.go b/pkg/maps/tunnel/tunnel.go index 72eae6556eae..4d0e81f2390e 100644 --- a/pkg/maps/tunnel/tunnel.go +++ b/pkg/maps/tunnel/tunnel.go @@ -77,12 +77,13 @@ func newTunnelEndpoint(ip net.IP) *TunnelEndpoint { func (v TunnelEndpoint) NewValue() bpf.MapValue { return &TunnelEndpoint{} } // SetTunnelEndpoint adds/replaces a prefix => tunnel-endpoint mapping -func (m *Map) SetTunnelEndpoint(prefix net.IP, endpoint net.IP) error { +func (m *Map) SetTunnelEndpoint(encryptKey uint8, prefix, endpoint net.IP) error { key, val := newTunnelEndpoint(prefix), newTunnelEndpoint(endpoint) - + val.EndpointKey.Key = encryptKey log.WithFields(logrus.Fields{ fieldPrefix: prefix, fieldEndpoint: endpoint, + fieldKey: encryptKey, }).Debug("Updating tunnel map entry") return TunnelMap.Update(key, val) diff --git a/pkg/node/node_address.go b/pkg/node/node_address.go index 1a72a67101e8..0e7bc93560bb 100644 --- a/pkg/node/node_address.go +++ b/pkg/node/node_address.go @@ -41,6 +41,8 @@ var ( ipv6RouterAddress net.IP ipv4AllocRange *cidr.CIDR ipv6AllocRange *cidr.CIDR + + ipsecKeyIdentity uint8 ) func makeIPv6HostIP() net.IP { @@ -389,3 +391,14 @@ func getCiliumHostIPs() (ipv4GW, ipv6Router net.IP) { } return getCiliumHostIPsFromNetDev(option.Config.HostDevice) } + +// SetIPsecKeyIdentity sets the IPsec key identity an opaque value used to +// identity encryption keys used on the node. +func SetIPsecKeyIdentity(id uint8) { + ipsecKeyIdentity = id +} + +// GetIPsecKeyIdentity returns the IPsec key identity of the node +func GetIPsecKeyIdentity() uint8 { + return ipsecKeyIdentity +}