diff --git a/bpf/bpf_ipsec.c b/bpf/bpf_ipsec.c index 1c00ea81b636..fa45a5cfe7d1 100644 --- a/bpf/bpf_ipsec.c +++ b/bpf/bpf_ipsec.c @@ -27,8 +27,8 @@ __section("from-netdev") int from_netdev(struct __sk_buff *skb) { - if (skb->cb[0] == MARK_MAGIC_ENCRYPT) { - skb->mark = MARK_MAGIC_ENCRYPT; + if ((skb->cb[0] & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_ENCRYPT) { + skb->mark = skb->cb[0]; set_identity(skb, skb->cb[1]); } return TC_ACT_OK; diff --git a/bpf/bpf_lxc.c b/bpf/bpf_lxc.c index 0d9eba7642b3..a05eecba3524 100644 --- a/bpf/bpf_lxc.c +++ b/bpf/bpf_lxc.c @@ -146,6 +146,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, void *data, *data_end; union v6addr *daddr, orig_dip; __u32 tunnel_endpoint = 0; + __u8 encrypt_key = 0; __u32 monitor = 0; if (unlikely(!is_valid_lxc_src_ip(ip6))) @@ -221,6 +222,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, if (info != NULL && info->sec_label) { *dstID = info->sec_label; tunnel_endpoint = info->tunnel_endpoint; + encrypt_key = info->key; } else { *dstID = WORLD_ID; } @@ -331,7 +333,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, /* The packet goes to a peer not managed by this agent instance */ #ifdef ENCAP_IFINDEX if (tunnel_endpoint) { - ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, SECLABEL, monitor); + ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, encrypt_key, SECLABEL, monitor); /* If not redirected noteable due to IPSEC then pass up to stack * for further processing. */ @@ -475,6 +477,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) struct ct_state ct_state = {}; __be32 orig_dip; __u32 tunnel_endpoint = 0; + __u8 encrypt_key = 0; __u32 monitor = 0; if (!revalidate_data(skb, &data, &data_end, &ip4)) @@ -537,6 +540,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) if (info != NULL && info->sec_label) { *dstID = info->sec_label; tunnel_endpoint = info->tunnel_endpoint; + encrypt_key = info->key; } else { *dstID = WORLD_ID; } @@ -649,6 +653,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) #ifdef ENCAP_IFINDEX if (tunnel_endpoint) { int ret = encap_and_redirect_with_nodeid_from_lxc(skb, tunnel_endpoint, + encrypt_key, SECLABEL, monitor); /* If not redirected noteably due to IPSEC then pass up to stack * for further processing. diff --git a/bpf/bpf_netdev.c b/bpf/bpf_netdev.c index 93789e9ead3e..99f91a9e244a 100644 --- a/bpf/bpf_netdev.c +++ b/bpf/bpf_netdev.c @@ -242,6 +242,7 @@ static inline int handle_ipv6(struct __sk_buff *skb, __u32 src_identity) info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN); if (info != NULL && info->tunnel_endpoint != 0) { int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, + info->key, secctx, TRACE_PAYLOAD_LEN); /* If IPSEC is needed recirc through ingress to use xfrm stack @@ -443,6 +444,7 @@ static inline int handle_ipv4(struct __sk_buff *skb, __u32 src_identity) info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN); if (info != NULL && info->tunnel_endpoint != 0) { int ret = encap_and_redirect_with_nodeid(skb, info->tunnel_endpoint, + info->key, secctx, TRACE_PAYLOAD_LEN); if (ret == IPSEC_ENDPOINT) diff --git a/bpf/bpf_overlay.c b/bpf/bpf_overlay.c index 6b6887c50bbd..9673bef06118 100644 --- a/bpf/bpf_overlay.c +++ b/bpf/bpf_overlay.c @@ -64,6 +64,8 @@ static inline int handle_ipv6(struct __sk_buff *skb) */ if (ip6->nexthdr != IPPROTO_ESP) goto not_esp; + + /* Decrypt "key" is determined by SPI */ skb->mark = MARK_MAGIC_DECRYPT; set_identity(skb, key.tunnel_id); /* To IPSec stack on cilium_vxlan we are going to pass @@ -148,6 +150,7 @@ static inline int handle_ipv4(struct __sk_buff *skb) */ if (ip4->protocol != IPPROTO_ESP) goto not_esp; + /* Decrypt "key" is determined by SPI */ skb->mark = MARK_MAGIC_DECRYPT; set_identity(skb, key.tunnel_id); /* To IPSec stack on cilium_vxlan we are going to pass diff --git a/bpf/lib/common.h b/bpf/lib/common.h index 404dc74ad695..1080ef14d3df 100644 --- a/bpf/lib/common.h +++ b/bpf/lib/common.h @@ -162,7 +162,7 @@ struct endpoint_key { union v6addr ip6; }; __u8 family; - __u8 pad4; + __u8 key; __u16 pad5; } __attribute__((packed)); @@ -182,6 +182,7 @@ struct endpoint_info { struct remote_endpoint_info { __u32 sec_label; __u32 tunnel_endpoint; + __u8 key; }; struct policy_key { @@ -298,17 +299,24 @@ enum { * packets security identity. The lower/upper halves are swapped to recover * the identity. * - * The 4 bits at 0XF00 provide + * The 4 bits at 0X0F00 provide * - the magic marker values which indicate whether the packet is coming from * an ingress or egress proxy, a local process and its current encryption * status. + * + * The 4 bits at 0xF000 provide + * - the key index to use for encryption when multiple keys are in-flight. + * In the IPsec case this becomes the SPI on the wire. */ -#define MARK_MAGIC_HOST_MASK 0xF00 -#define MARK_MAGIC_PROXY_INGRESS 0xA00 -#define MARK_MAGIC_PROXY_EGRESS 0xB00 -#define MARK_MAGIC_HOST 0xC00 -#define MARK_MAGIC_DECRYPT 0xD00 -#define MARK_MAGIC_ENCRYPT 0xE00 +#define MARK_MAGIC_HOST_MASK 0x0F00 +#define MARK_MAGIC_PROXY_INGRESS 0x0A00 +#define MARK_MAGIC_PROXY_EGRESS 0x0B00 +#define MARK_MAGIC_HOST 0x0C00 +#define MARK_MAGIC_DECRYPT 0x0D00 +#define MARK_MAGIC_ENCRYPT 0x0E00 + +#define MARK_MAGIC_KEY_ID 0xF000 +#define MARK_MAGIC_KEY_MASK 0xFF00 /** * get_identity - returns source identity from the mark field @@ -323,10 +331,26 @@ static inline int __inline__ get_identity(struct __sk_buff *skb) */ static inline void __inline__ set_identity(struct __sk_buff *skb, __u32 identity) { - skb->mark = skb->mark & MARK_MAGIC_HOST_MASK; + skb->mark = skb->mark & MARK_MAGIC_KEY_MASK; skb->mark |= ((identity & 0xFFFF) << 16) | ((identity & 0xFF0000) >> 16); } +/** + * or_encrypt_key - mask and shift key into encryption format + */ +static inline __u32 __inline__ or_encrypt_key(__u8 key) +{ + return (((__u32)key & 0x0F) << 12) | MARK_MAGIC_ENCRYPT; +} + +/** + * set_encrypt_key - pushes 8 bit key and encryption marker into skb mark value. + */ +static inline void __inline__ set_encrypt_key(struct __sk_buff *skb, __u8 key) +{ + skb->mark = or_encrypt_key(key); +} + /* * skb->tc_index uses * diff --git a/bpf/lib/encap.h b/bpf/lib/encap.h index e3f194c93632..25cf969d893e 100644 --- a/bpf/lib/encap.h +++ b/bpf/lib/encap.h @@ -24,7 +24,7 @@ #ifdef ENCAP_IFINDEX #ifdef ENABLE_IPSEC static inline int __inline__ -enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, +enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { /* Traffic from local host in tunnel mode will be passed to @@ -40,14 +40,14 @@ enacap_and_redirect_nomark_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, * in both cases because xfrm layer would overwrite them. We * use cb[4] here so it doesn't need to be reset by bpf_ipsec. */ - skb->cb[0] = MARK_MAGIC_ENCRYPT; + skb->cb[0] = or_encrypt_key(key); skb->cb[1] = seclabel; skb->cb[4] = tunnel_endpoint; return IPSEC_ENDPOINT; } static inline int __inline__ -encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, +encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { /* IPSec is performed by the stack on any packets with the @@ -57,7 +57,7 @@ encap_and_redirect_ipsec(struct __sk_buff *skb, __u32 tunnel_endpoint, * label is stashed in the mark and extracted in bpf_netdev * to send skb onto tunnel for encap. */ - skb->mark = MARK_MAGIC_ENCRYPT; + set_encrypt_key(skb, key); set_identity(skb, seclabel); skb->cb[4] = tunnel_endpoint; return IPSEC_ENDPOINT; @@ -96,24 +96,24 @@ __encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, */ static inline int __inline__ encap_and_redirect_with_nodeid(struct __sk_buff *skb, __u32 tunnel_endpoint, - __u32 seclabel, __u32 monitor) + __u8 key, __u32 seclabel, __u32 monitor) { #ifdef ENABLE_IPSEC - return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); + if (key) + return enacap_and_redirect_nomark_ipsec(skb, tunnel_endpoint, key, seclabel, monitor); #endif + return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); } static inline int __inline__ -encap_and_redirect_with_nodeid_from_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, +encap_and_redirect_with_nodeid_from_lxc(struct __sk_buff *skb, __u32 tunnel_endpoint, __u8 key, __u32 seclabel, __u32 monitor) { #ifdef ENABLE_IPSEC - return encap_and_redirect_ipsec(skb, tunnel_endpoint, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); + if (key) + return encap_and_redirect_ipsec(skb, tunnel_endpoint, key, seclabel, monitor); #endif + return __encap_and_redirect_with_nodeid(skb, tunnel_endpoint, seclabel, monitor); } /* encap_and_redirect based on ENABLE_IPSEC flag and from_host bool will decide @@ -137,14 +137,22 @@ encap_and_redirect(struct __sk_buff *skb, struct endpoint_key *k, } #ifdef ENABLE_IPSEC - if (from_host) - return enacap_and_redirect_nomark_ipsec(skb, tunnel->ip4, - seclabel, monitor); - else - return encap_and_redirect_ipsec(skb, tunnel->ip4, seclabel, monitor); -#else - return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); + if (tunnel->key) { + if (from_host) + return enacap_and_redirect_nomark_ipsec(skb, + tunnel->ip4, + tunnel->key, + seclabel, + monitor); + else + return encap_and_redirect_ipsec(skb, + tunnel->ip4, + tunnel->key, + seclabel, + monitor); + } #endif + return __encap_and_redirect_with_nodeid(skb, tunnel->ip4, seclabel, monitor); } #endif /* ENCAP_IFINDEX */ #endif /* __LIB_ENCAP_H_ */ diff --git a/daemon/bpf.sha b/daemon/bpf.sha index 8f232abf6fb7..27cbd2ede5f7 100644 --- a/daemon/bpf.sha +++ b/daemon/bpf.sha @@ -1,2 +1,2 @@ -GO_BINDATA_SHA1SUM=af1ee3c89109c45ca4e8546e542ed854ce4fcc8f +GO_BINDATA_SHA1SUM=c2043b709a8ebc82155515aa31ac4637162eb3ec BPF_FILES=../bpf/COPYING ../bpf/Makefile ../bpf/Makefile.bpf ../bpf/bpf_alignchecker.c ../bpf/bpf_features.h ../bpf/bpf_ipsec.c ../bpf/bpf_lb.c ../bpf/bpf_lxc.c ../bpf/bpf_netdev.c ../bpf/bpf_overlay.c ../bpf/bpf_xdp.c ../bpf/cilium-map-migrate.c ../bpf/filter_config.h ../bpf/include/bpf/api.h ../bpf/include/elf/elf.h ../bpf/include/elf/gelf.h ../bpf/include/elf/libelf.h ../bpf/include/iproute2/bpf_elf.h ../bpf/include/linux/bpf.h ../bpf/include/linux/bpf_common.h ../bpf/include/linux/byteorder.h ../bpf/include/linux/byteorder/big_endian.h ../bpf/include/linux/byteorder/little_endian.h ../bpf/include/linux/icmp.h ../bpf/include/linux/icmpv6.h ../bpf/include/linux/if_arp.h ../bpf/include/linux/if_ether.h ../bpf/include/linux/if_packet.h ../bpf/include/linux/in.h ../bpf/include/linux/in6.h ../bpf/include/linux/ioctl.h ../bpf/include/linux/ip.h ../bpf/include/linux/ipv6.h ../bpf/include/linux/perf_event.h ../bpf/include/linux/swab.h ../bpf/include/linux/tcp.h ../bpf/include/linux/type_mapper.h ../bpf/include/linux/udp.h ../bpf/init.sh ../bpf/lib/arp.h ../bpf/lib/common.h ../bpf/lib/config.h ../bpf/lib/conntrack.h ../bpf/lib/csum.h ../bpf/lib/dbg.h ../bpf/lib/drop.h ../bpf/lib/encap.h ../bpf/lib/eps.h ../bpf/lib/eth.h ../bpf/lib/events.h ../bpf/lib/icmp6.h ../bpf/lib/ipv4.h ../bpf/lib/ipv6.h ../bpf/lib/l3.h ../bpf/lib/l4.h ../bpf/lib/lb.h ../bpf/lib/lxc.h ../bpf/lib/maps.h ../bpf/lib/metrics.h ../bpf/lib/nat46.h ../bpf/lib/policy.h ../bpf/lib/tailcall.h ../bpf/lib/trace.h ../bpf/lib/utils.h ../bpf/lib/xdp.h ../bpf/lxc_config.h ../bpf/netdev_config.h ../bpf/node_config.h ../bpf/probes/raw_change_tail.t ../bpf/probes/raw_insn.h ../bpf/probes/raw_invalidate_hash.t ../bpf/probes/raw_lpm_map.t ../bpf/probes/raw_lru_map.t ../bpf/probes/raw_main.c ../bpf/probes/raw_map_val_adj.t ../bpf/probes/raw_mark_map_val.t ../bpf/run_probes.sh ../bpf/sockops/Makefile ../bpf/sockops/bpf_redir.c ../bpf/sockops/bpf_sockops.c ../bpf/sockops/bpf_sockops.h ../bpf/sockops/sockops_config.h ../bpf/spawn_netns.sh diff --git a/daemon/daemon.go b/daemon/daemon.go index abd07446fec6..f6f21d779ac9 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -792,6 +792,7 @@ func (d *Daemon) syncLXCMap() error { } for _, ipIDPair := range specialIdentities { + hostKey := node.GetIPsecKeyIdentity() isHost := ipIDPair.ID == identity.ReservedIdentityHost if isHost { added, err := lxcmap.SyncHostEntry(ipIDPair.IP) @@ -807,7 +808,7 @@ func (d *Daemon) syncLXCMap() error { // Upsert will not propagate (reserved:foo->ID) mappings across the cluster, // and we specifically don't want to do so. - ipcache.IPIdentityCache.Upsert(ipIDPair.PrefixString(), nil, ipcache.Identity{ + ipcache.IPIdentityCache.Upsert(ipIDPair.PrefixString(), nil, hostKey, ipcache.Identity{ ID: ipIDPair.ID, Source: ipcache.FromAgentLocal, }) @@ -926,7 +927,8 @@ func NewDaemon(dp datapath.Datapath) (*Daemon, *endpointRestoreState, error) { mtuConfig := mtu.NewConfiguration(option.Config.Tunnel != option.TunnelDisabled, option.Config.MTU) if option.Config.EnableIPSec { - if err := ipsec.LoadIPSecKeysFile(option.Config.IPSecKeyFile); err != nil { + spi, err := ipsec.LoadIPSecKeysFile(option.Config.IPSecKeyFile) + if err != nil { return nil, nil, err } if option.Config.EnableIPv6 { @@ -934,6 +936,7 @@ func NewDaemon(dp datapath.Datapath) (*Daemon, *endpointRestoreState, error) { return nil, nil, err } } + node.SetIPsecKeyIdentity(spi) } nodeMngr, err := nodemanager.NewManager("all", dp.Node()) diff --git a/daemon/k8s_watcher.go b/daemon/k8s_watcher.go index 8008b9d0b15d..fbf74db5daa6 100644 --- a/daemon/k8s_watcher.go +++ b/daemon/k8s_watcher.go @@ -1511,10 +1511,12 @@ func (d *Daemon) updatePodHostIP(pod *types.Pod) (bool, error) { return true, fmt.Errorf("no/invalid PodIP: %s", pod.StatusPodIP) } + hostKey := node.GetIPsecKeyIdentity() + // Initial mapping of podIP <-> hostIP <-> identity. The mapping is // later updated once the allocator has determined the real identity. // If the endpoint remains unmanaged, the identity remains untouched. - selfOwned := ipcache.IPIdentityCache.Upsert(pod.StatusPodIP, hostIP, ipcache.Identity{ + selfOwned := ipcache.IPIdentityCache.Upsert(pod.StatusPodIP, hostIP, hostKey, ipcache.Identity{ ID: identity.ReservedIdentityUnmanaged, Source: ipcache.FromKubernetes, }) @@ -1741,7 +1743,8 @@ func (d *Daemon) updateK8sNodeTunneling(k8sNodeOld, k8sNodeNew *types.Node) erro } } - selfOwned := ipcache.IPIdentityCache.Upsert(ciliumIPStrNew, hostIPNew, ipcache.Identity{ + hostKey := node.GetIPsecKeyIdentity() + selfOwned := ipcache.IPIdentityCache.Upsert(ciliumIPStrNew, hostIPNew, hostKey, ipcache.Identity{ ID: identity.ReservedIdentityHost, Source: ipcache.FromKubernetes, }) diff --git a/pkg/bpf/endpoint.go b/pkg/bpf/endpoint.go index dadb57b4f425..6a6baca1c062 100644 --- a/pkg/bpf/endpoint.go +++ b/pkg/bpf/endpoint.go @@ -15,6 +15,7 @@ package bpf import ( + "fmt" "net" "unsafe" @@ -34,7 +35,7 @@ type EndpointKey struct { // represents both IPv6 and IPv4 (in the lowest four bytes) IP types.IPv6 `align:"$union0"` Family uint8 `align:"family"` - Pad1 uint8 `align:"pad4"` + Key uint8 `align:"key"` Pad2 uint16 `align:"pad5"` } @@ -57,6 +58,7 @@ func NewEndpointKey(ip net.IP) EndpointKey { result.Family = EndpointKeyIPv6 copy(result.IP[:], ip) } + result.Key = 0 return result } @@ -75,7 +77,7 @@ func (k EndpointKey) ToIP() net.IP { // String provides a string representation of the EndpointKey. func (k EndpointKey) String() string { if ip := k.ToIP(); ip != nil { - return ip.String() + return fmt.Sprintf("%s:%d", ip.String(), k.Key) } return "nil" } diff --git a/pkg/datapath/ipcache/listener.go b/pkg/datapath/ipcache/listener.go index d1fe4061c23a..6f310ad08c27 100644 --- a/pkg/datapath/ipcache/listener.go +++ b/pkg/datapath/ipcache/listener.go @@ -77,7 +77,7 @@ func NewListener(d datapath) *BPFListener { // IP->ID mapping will replace any existing contents; knowledge of the old pair // is not required to upsert the new pair. func (l *BPFListener) OnIPIdentityCacheChange(modType ipcache.CacheModification, cidr net.IPNet, - oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity) { + oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) { scopedLog := log.WithFields(logrus.Fields{ logfields.IPAddr: cidr, logfields.Identity: newID, @@ -98,6 +98,7 @@ func (l *BPFListener) OnIPIdentityCacheChange(modType ipcache.CacheModification, case ipcache.Upsert: value := ipcacheMap.RemoteEndpointInfo{ SecurityIdentity: uint32(newID), + Key: encryptKey, } if newHostIP != nil { diff --git a/pkg/datapath/linux/ipsec/ipsec_linux.go b/pkg/datapath/linux/ipsec/ipsec_linux.go index 2a2bea5ade32..860a59212f25 100644 --- a/pkg/datapath/linux/ipsec/ipsec_linux.go +++ b/pkg/datapath/linux/ipsec/ipsec_linux.go @@ -25,6 +25,7 @@ import ( "net" "os" "path/filepath" + "strconv" "strings" "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" @@ -43,7 +44,7 @@ const ( ) type ipSecKey struct { - Spi int + Spi uint8 ReqID int Auth *netlink.XfrmStateAlgo Crypt *netlink.XfrmStateAlgo @@ -79,7 +80,7 @@ func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, d tmpl := netlink.XfrmPolicyTmpl{ Proto: netlink.XFRM_PROTO_ESP, Mode: netlink.XFRM_MODE_TUNNEL, - Spi: keys.Spi, + Spi: int(keys.Spi), Reqid: keys.ReqID, Dst: dstIP, Src: srcIP, @@ -91,22 +92,20 @@ func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, d func ipSecJoinState(state *netlink.XfrmState, keys *ipSecKey) { state.Auth = keys.Auth state.Crypt = keys.Crypt - state.Spi = keys.Spi + state.Spi = int(keys.Spi) state.Reqid = keys.ReqID } -func ipSecReplaceState(remoteIP, localIP net.IP, spi int) error { - state := ipSecNewState() - +func ipSecReplaceState(remoteIP, localIP net.IP) (uint8, error) { key := getIPSecKeys(localIP) if key == nil { - return fmt.Errorf("IPSec key missing") + return 0, fmt.Errorf("IPSec key missing") } - key.Spi = spi + state := ipSecNewState() ipSecJoinState(state, key) state.Src = localIP state.Dst = remoteIP - return netlink.XfrmStateAdd(state) + return key.Spi, netlink.XfrmStateAdd(state) } func ipSecReplacePolicyIn(src, dst *net.IPNet) error { @@ -119,35 +118,42 @@ func ipSecReplacePolicyIn(src, dst *net.IPNet) error { } func ipSecReplacePolicyInFwd(src, dst *net.IPNet, dir netlink.Dir) error { + var spiWide uint32 + + key := getIPSecKeys(dst.IP) + if key == nil { + return fmt.Errorf("IPSec key missing") + } + spiWide = uint32(key.Spi) + policy := ipSecNewPolicy() policy.Dir = dir policy.Src = src policy.Dst = dst policy.Mark = &netlink.XfrmMark{ - Value: linux_defaults.RouteMarkDecrypt, - Mask: linux_defaults.RouteMarkMask, + Value: ((spiWide << 12) | linux_defaults.RouteMarkDecrypt), + Mask: linux_defaults.IPsecMarkMask, } + ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) + return netlink.XfrmPolicyUpdate(policy) +} + +func ipSecReplacePolicyOut(src, dst *net.IPNet, dir IPSecDir) error { + var spiWide uint32 key := getIPSecKeys(dst.IP) if key == nil { return fmt.Errorf("IPSec key missing") } - ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) - return netlink.XfrmPolicyUpdate(policy) -} + spiWide = uint32(key.Spi) -func ipSecReplacePolicyOut(src, dst *net.IPNet) error { policy := ipSecNewPolicy() policy.Dir = netlink.XFRM_DIR_OUT policy.Src = src policy.Dst = dst policy.Mark = &netlink.XfrmMark{ - Value: linux_defaults.RouteMarkEncrypt, - Mask: linux_defaults.RouteMarkMask, - } - key := getIPSecKeys(dst.IP) - if key == nil { - return fmt.Errorf("IPSec key missing") + Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt), + Mask: linux_defaults.IPsecMarkMask, } ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP) return netlink.XfrmPolicyUpdate(policy) @@ -175,7 +181,7 @@ func ipSecDeletePolicy(src, local net.IP) error { return nil } -/* UpsertIPSecEndpoint updates the IPSec context for a new endpoint inserted in +/* UpsertIPsecEndpoint updates the IPSec context for a new endpoint inserted in * the ipcache. Currently we support a global crypt/auth keyset that will encrypt * all traffic between endpoints. An IPSec context consists of two pieces a policy * and a state, the security policy database (SPD) and security association @@ -215,7 +221,10 @@ func ipSecDeletePolicy(src, local net.IP) error { * state space. Basic idea would be to reference a state using any key generated * from BPF program allowing for a single state per security ctx. */ -func UpsertIPSecEndpoint(local, remote *net.IPNet, spi int, dir IPSecDir) error { +func UpsertIPsecEndpoint(local, remote *net.IPNet, dir IPSecDir) (uint8, error) { + var spi uint8 + var err error + /* TODO: state reference ID is (dip,spi) which can be duplicated in the current global * mode. The duplication is on _all_ ingress states because dst_ip == host_ip in this * case and only a single spi entry is in use. Currently no check is done to avoid @@ -229,33 +238,33 @@ func UpsertIPSecEndpoint(local, remote *net.IPNet, spi int, dir IPSecDir) error */ if !local.IP.Equal(remote.IP) { if dir == IPSecDirIn || dir == IPSecDirBoth { - if err := ipSecReplaceState(local.IP, remote.IP, spi); err != nil { + if spi, err = ipSecReplaceState(local.IP, remote.IP); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace local state: %s", err) + return 0, fmt.Errorf("unable to replace local state: %s", err) } } - if err := ipSecReplacePolicyIn(remote, local); err != nil { + if err = ipSecReplacePolicyIn(remote, local); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace policy in: %s", err) + return 0, fmt.Errorf("unable to replace policy in: %s", err) } } } if dir == IPSecDirOut || dir == IPSecDirBoth { - if err := ipSecReplaceState(remote.IP, local.IP, spi); err != nil { + if spi, err = ipSecReplaceState(remote.IP, local.IP); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace remote state: %s", err) + return 0, fmt.Errorf("unable to replace remote state: %s", err) } } - if err := ipSecReplacePolicyOut(local, remote); err != nil { + if err = ipSecReplacePolicyOut(local, remote, dir); err != nil { if !os.IsExist(err) { - return fmt.Errorf("unable to replace policy out: %s", err) + return 0, fmt.Errorf("unable to replace policy out: %s", err) } } } } - return nil + return spi, nil } // DeleteIPSecEndpoint deletes the endpoint from IPSec SPD and SAD @@ -289,21 +298,24 @@ func decodeIPSecKey(keyRaw string) ([]byte, error) { // LoadIPSecKeysFile imports IPSec auth and crypt keys from a file. The format // is to put a key per line as follows, (auth-algo auth-key enc-algo enc-key) -func LoadIPSecKeysFile(path string) error { +func LoadIPSecKeysFile(path string) (uint8, error) { file, err := os.Open(path) if err != nil { - return err + return 0, err } defer file.Close() return loadIPSecKeys(file) } -func loadIPSecKeys(r io.Reader) error { +func loadIPSecKeys(r io.Reader) (uint8, error) { + var spi uint8 + scanner := bufio.NewScanner(r) scanner.Split(bufio.ScanLines) for scanner.Scan() { + offset := 0 + ipSecKey := &ipSecKey{ - Spi: 1, ReqID: 1, } @@ -311,20 +323,32 @@ func loadIPSecKeys(r io.Reader) error { // auth-algo auth-key enc-algo enc-key s := strings.Split(scanner.Text(), " ") if len(s) < 4 { - return fmt.Errorf("missing IPSec keys or invalid format") + return 0, fmt.Errorf("missing IPSec keys or invalid format") } - authkey, err := decodeIPSecKey(s[1]) + spiI, err := strconv.Atoi(s[0]) if err != nil { - return fmt.Errorf("unable to decode authkey string %q", s[1]) + // If no version info is provided assume using key format without + // versioning and assign SPI. + spiI = 1 + offset = -1 + } + if spiI > linux_defaults.IPsecMaxKeyVersion { + return 0, fmt.Errorf("encryption Key space exhausted, id must be less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0]) } - authname := s[0] + spi = uint8(spiI) - enckey, err := decodeIPSecKey(s[3]) + authkey, err := decodeIPSecKey(s[2+offset]) if err != nil { - return fmt.Errorf("unable to decode enckey string %q", s[3]) + return 0, fmt.Errorf("unable to decode authkey string %q", s[1+offset]) } - encname := s[2] + authname := s[1+offset] + + enckey, err := decodeIPSecKey(s[4+offset]) + if err != nil { + return 0, fmt.Errorf("unable to decode enckey string %q", s[3+offset]) + } + encname := s[3+offset] ipSecKey.Auth = &netlink.XfrmStateAlgo{ Name: authname, @@ -334,13 +358,15 @@ func loadIPSecKeys(r io.Reader) error { Name: encname, Key: enckey, } - if len(s) == 5 { - ipSecKeysGlobal[s[4]] = ipSecKey + ipSecKey.Spi = spi + + if len(s) == 6+offset { + ipSecKeysGlobal[s[5+offset]] = ipSecKey } else { ipSecKeysGlobal[""] = ipSecKey } } - return nil + return spi, nil } // EnableIPv6Forwarding sets proc file to enable IPv6 forwarding diff --git a/pkg/datapath/linux/ipsec/ipsec_linux_test.go b/pkg/datapath/linux/ipsec/ipsec_linux_test.go index 6f6f14410459..955aaa9d05f1 100644 --- a/pkg/datapath/linux/ipsec/ipsec_linux_test.go +++ b/pkg/datapath/linux/ipsec/ipsec_linux_test.go @@ -36,19 +36,18 @@ var _ = Suite(&IPSecSuitePrivileged{}) var ( path = "ipsec_keys_test" - keysDat = []byte("hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef\nhmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef foobar\n") - invalidKeysDat = []byte("test abcdefghijklmnopqrstuvwzyzABCDEF test abcdefghijklmnopqrstuvwzyzABCDEF\n") + keysDat = []byte("1 hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef\n1 hmac(sha256) 0123456789abcdef0123456789abcdef cbc(aes) 0123456789abcdef0123456789abcdef foobar\n") + invalidKeysDat = []byte("1 test abcdefghijklmnopqrstuvwzyzABCDEF test abcdefghijklmnopqrstuvwzyzABCDEF\n") ) func (p *IPSecSuitePrivileged) TestLoadKeysNoFile(c *C) { - err := LoadIPSecKeysFile(path) + _, err := LoadIPSecKeysFile(path) c.Assert(os.IsNotExist(err), Equals, true) } func (p *IPSecSuitePrivileged) TestInvalidLoadKeys(c *C) { keys := bytes.NewReader(invalidKeysDat) - err := loadIPSecKeys(keys) - spi := 1 + _, err := loadIPSecKeys(keys) c.Assert(err, NotNil) _, local, err := net.ParseCIDR("1.1.3.4/16") @@ -56,19 +55,17 @@ func (p *IPSecSuitePrivileged) TestInvalidLoadKeys(c *C) { _, remote, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, NotNil) } func (p *IPSecSuitePrivileged) TestLoadKeys(c *C) { keys := bytes.NewReader(keysDat) - err := loadIPSecKeys(keys) + _, err := loadIPSecKeys(keys) c.Assert(err, IsNil) } func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") @@ -84,7 +81,7 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { ipSecKeysGlobal["1.2.3.4"] = key ipSecKeysGlobal[""] = key - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, IsNil) err = DeleteIPSecEndpoint(remote.IP, local.IP) @@ -96,8 +93,6 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEquals(c *C) { } func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.1.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") @@ -114,7 +109,7 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { ipSecKeysGlobal["1.2.3.4"] = key ipSecKeysGlobal[""] = key - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, IsNil) err = DeleteIPSecEndpoint(remote.IP, local.IP) @@ -126,14 +121,12 @@ func (p *IPSecSuitePrivileged) TestUpsertIPSecEndpoint(c *C) { } func (p *IPSecSuitePrivileged) TestUpsertIPSecKeyMissing(c *C) { - spi := 1 - _, local, err := net.ParseCIDR("1.1.3.4/16") c.Assert(err, IsNil) _, remote, err := net.ParseCIDR("1.2.3.4/16") c.Assert(err, IsNil) - err = UpsertIPSecEndpoint(local, remote, spi, IPSecDirBoth) + _, err = UpsertIPsecEndpoint(local, remote, IPSecDirBoth) c.Assert(err, ErrorMatches, "unable to replace local state: IPSec key missing") err = DeleteIPSecEndpoint(remote.IP, local.IP) diff --git a/pkg/datapath/linux/linux_defaults/linux_defaults.go b/pkg/datapath/linux/linux_defaults/linux_defaults.go index c3b985aa516b..3644a37ff85f 100644 --- a/pkg/datapath/linux/linux_defaults/linux_defaults.go +++ b/pkg/datapath/linux/linux_defaults/linux_defaults.go @@ -36,9 +36,9 @@ const ( // TunnelDeviceName the default name of the tunnel device when using vxlan TunnelDeviceName = "cilium_vxlan" - // IPSec SPI value for endpoint rules - IPSecEndpointSPI = 1 + // IPSec offset value for node rules + IPsecMaxKeyVersion = 16 - // IPSec SPI value for node rules - IPSecNodeSPI = 2 + // IPsecMarkMask is the mask required for the IPsec SPI and encrypt/decrypt bits + IPsecMarkMask = 0xFF00 ) diff --git a/pkg/datapath/linux/node.go b/pkg/datapath/linux/node.go index 27e02e3f41c2..e935f9013274 100644 --- a/pkg/datapath/linux/node.go +++ b/pkg/datapath/linux/node.go @@ -60,7 +60,7 @@ func NewNodeHandler(datapathConfig DatapathConfiguration, nodeAddressing datapat // with encapsulation mode enabled. The CIDR and IP of both the old and new // node are provided as context. The caller expects the tunnel mapping in the // datapath to be updated. -func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, firstAddition, encapEnabled bool) { +func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, firstAddition, encapEnabled bool, encryptKey uint8) { if !encapEnabled { // When the protocol family is disabled, the initial node addition will // trigger a deletion to clean up leftover entries. The deletion happens @@ -78,7 +78,7 @@ func updateTunnelMapping(oldCIDR, newCIDR *cidr.CIDR, oldIP, newIP net.IP, first "allocCIDR": newCIDR, }).Debug("Updating tunnel map entry") - if err := tunnel.TunnelMap.SetTunnelEndpoint(newCIDR.IP, newIP); err != nil { + if err := tunnel.TunnelMap.SetTunnelEndpoint(encryptKey, newCIDR.IP, newIP); err != nil { log.WithError(err).WithFields(logrus.Fields{ "allocCIDR": newCIDR, }).Error("bpf: Unable to update in tunnel endpoint map") @@ -425,12 +425,15 @@ func (n *linuxNodeHandler) NodeUpdate(oldNode, newNode node.Node) error { return nil } -func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { - upsertIPsecLog := func(err error, spec string, loc, rem *net.IPNet) { +func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) uint8 { + var spi uint8 + var err error + upsertIPsecLog := func(err error, spec string, loc, rem *net.IPNet, spi uint8) { scopedLog := log.WithFields(logrus.Fields{ logfields.Reason: spec, "local-ip": loc, "remote-ip": rem, + "spi": spi, }) if err != nil { scopedLog.WithError(err).Error("IPsec enable failed") @@ -443,7 +446,7 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { n.replaceHostRules() } - if n.nodeConfig.EnableIPv4 { + if n.nodeConfig.EnableIPv4 && newNode.IPv4AllocCIDR != nil { new4Net := &net.IPNet{IP: newNode.IPv4AllocCIDR.IP, Mask: newNode.IPv4AllocCIDR.Mask} if newNode.IsLocal() { n.replaceNodeIPSecInRoute(new4Net) @@ -451,21 +454,21 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { if ciliumInternalIPv4 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv4().Router(), Mask: n.nodeAddressing.IPv4().AllocationCIDR().Mask} ipsecIPv4Wildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv4), Mask: net.IPv4Mask(0, 0, 0, 0)} - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecIPv4Wildcard, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirIn) - upsertIPsecLog(err, "local IPv4", ipsecLocal, ipsecIPv4Wildcard) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecIPv4Wildcard, ipsec.IPSecDirIn) + upsertIPsecLog(err, "local IPv4", ipsecLocal, ipsecIPv4Wildcard, spi) } } else { if ciliumInternalIPv4 := newNode.GetCiliumInternalIP(false); ciliumInternalIPv4 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv4().Router(), Mask: n.nodeAddressing.IPv4().AllocationCIDR().Mask} ipsecRemote := &net.IPNet{IP: ciliumInternalIPv4, Mask: newNode.IPv4AllocCIDR.Mask} n.replaceNodeIPSecOutRoute(new4Net) - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecRemote, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirOut) - upsertIPsecLog(err, "IPv4", ipsecLocal, ipsecRemote) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecRemote, ipsec.IPSecDirOut) + upsertIPsecLog(err, "IPv4", ipsecLocal, ipsecRemote, spi) } } } - if n.nodeConfig.EnableIPv6 { + if n.nodeConfig.EnableIPv6 && newNode.IPv6AllocCIDR != nil { new6Net := &net.IPNet{IP: newNode.IPv6AllocCIDR.IP, Mask: newNode.IPv6AllocCIDR.Mask} if newNode.IsLocal() { n.replaceHostRules() @@ -474,19 +477,20 @@ func (n *linuxNodeHandler) enableIPsec(newNode *node.Node) { if ciliumInternalIPv6 != nil { ipsecLocal := &net.IPNet{IP: n.nodeAddressing.IPv6().Router(), Mask: n.nodeAddressing.IPv6().AllocationCIDR().Mask} ipsecIPv6Wildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv6), Mask: net.CIDRMask(0, 0)} - err := ipsec.UpsertIPSecEndpoint(ipsecLocal, ipsecIPv6Wildcard, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirIn) - upsertIPsecLog(err, "local IPv6", ipsecLocal, ipsecIPv6Wildcard) + spi, err = ipsec.UpsertIPsecEndpoint(ipsecLocal, ipsecIPv6Wildcard, ipsec.IPSecDirIn) + upsertIPsecLog(err, "local IPv6", ipsecLocal, ipsecIPv6Wildcard, spi) } } else { if ciliumInternalIPv6 := newNode.GetCiliumInternalIP(true); ciliumInternalIPv6 != nil { ipsecLocalWildcard := &net.IPNet{IP: net.ParseIP(wildcardIPv6), Mask: net.CIDRMask(0, 0)} ipsecRemote := &net.IPNet{IP: ciliumInternalIPv6, Mask: newNode.IPv6AllocCIDR.Mask} n.replaceNodeIPSecOutRoute(new6Net) - err := ipsec.UpsertIPSecEndpoint(ipsecLocalWildcard, ipsecRemote, linux_defaults.IPSecEndpointSPI, ipsec.IPSecDirOut) - upsertIPsecLog(err, "IPv6", ipsecLocalWildcard, ipsecRemote) + spi, err := ipsec.UpsertIPsecEndpoint(ipsecLocalWildcard, ipsecRemote, ipsec.IPSecDirOut) + upsertIPsecLog(err, "IPv6", ipsecLocalWildcard, ipsecRemote, spi) } } } + return spi } func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition bool) error { @@ -495,6 +499,7 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition oldIP4, oldIP6 net.IP newIP4 = newNode.GetNodeIP(false) newIP6 = newNode.GetNodeIP(true) + encryptKey uint8 ) if oldNode != nil { @@ -505,7 +510,7 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition } if n.nodeConfig.EnableIPSec { - n.enableIPsec(newNode) + encryptKey = n.enableIPsec(newNode) } if newNode.IsLocal() { @@ -526,9 +531,9 @@ func (n *linuxNodeHandler) nodeUpdate(oldNode, newNode *node.Node, firstAddition // Update the tunnel mapping of the node. In case the // node has changed its CIDR range, a new entry in the // map is created and the old entry is removed. - updateTunnelMapping(oldIP4Cidr, newNode.IPv4AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv4) + updateTunnelMapping(oldIP4Cidr, newNode.IPv4AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv4, encryptKey) // Not a typo, the IPv4 host IP is used to build the IPv6 overlay - updateTunnelMapping(oldIP6Cidr, newNode.IPv6AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv6) + updateTunnelMapping(oldIP6Cidr, newNode.IPv6AllocCIDR, oldIP4, newIP4, firstAddition, n.nodeConfig.EnableIPv6, encryptKey) if !n.nodeConfig.UseSingleClusterRoute { n.updateOrRemoveNodeRoutes([]*cidr.CIDR{oldIP4Cidr}, []*cidr.CIDR{newNode.IPv4AllocCIDR}) diff --git a/pkg/endpoint/policy.go b/pkg/endpoint/policy.go index 909c6c82ba2a..8516d8608e24 100644 --- a/pkg/endpoint/policy.go +++ b/pkg/endpoint/policy.go @@ -511,13 +511,14 @@ func (e *Endpoint) runIPIdentitySync(endpointIP addressing.CiliumIP) { IP := endpointIP.IP() ID := e.SecurityIdentity.ID hostIP := node.GetExternalIPv4() + key := node.GetIPsecKeyIdentity() metadata := e.FormatGlobalEndpointID() // Release lock as we do not want to have long-lasting key-value // store operations resulting in lock being held for a long time. e.RUnlock() - if err := ipcache.UpsertIPToKVStore(ctx, IP, hostIP, ID, metadata); err != nil { + if err := ipcache.UpsertIPToKVStore(ctx, IP, hostIP, ID, key, metadata); err != nil { return fmt.Errorf("unable to add endpoint IP mapping '%s'->'%d': %s", IP.String(), ID, err) } return nil diff --git a/pkg/envoy/resources.go b/pkg/envoy/resources.go index 909e10a363b5..d63cfb487bbf 100644 --- a/pkg/envoy/resources.go +++ b/pkg/envoy/resources.go @@ -63,7 +63,7 @@ func (cache *NPHDSCache) OnIPIdentityCacheGC() { // OnIPIdentityCacheChange pushes modifications to the IP<->Identity mapping // into the Network Policy Host Discovery Service (NPHDS). func (cache *NPHDSCache) OnIPIdentityCacheChange(modType ipcache.CacheModification, cidr net.IPNet, - oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity) { + oldHostIP, newHostIP net.IP, oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) { // An upsert where an existing pair exists should translate into a // delete (for the old Identity) followed by an upsert (for the new). if oldID != nil && modType == ipcache.Upsert { @@ -72,7 +72,7 @@ func (cache *NPHDSCache) OnIPIdentityCacheChange(modType ipcache.CacheModificati return } - cache.OnIPIdentityCacheChange(ipcache.Delete, cidr, nil, nil, nil, *oldID) + cache.OnIPIdentityCacheChange(ipcache.Delete, cidr, nil, nil, nil, *oldID, encryptKey) } cidrStr := cidr.String() diff --git a/pkg/identity/identity.go b/pkg/identity/identity.go index 52d0c14ab3da..33551d1ff26e 100644 --- a/pkg/identity/identity.go +++ b/pkg/identity/identity.go @@ -64,6 +64,7 @@ type IPIdentityPair struct { Mask net.IPMask `json:"Mask"` HostIP net.IP `json:"HostIP"` ID NumericIdentity `json:"ID"` + Key uint8 `json:"Key"` Metadata string `json:"Metadata"` } diff --git a/pkg/ipcache/cidr.go b/pkg/ipcache/cidr.go index dd396b654037..5fd2fb58549f 100644 --- a/pkg/ipcache/cidr.go +++ b/pkg/ipcache/cidr.go @@ -61,7 +61,7 @@ func AllocateCIDRs(impl Implementation, prefixes []*net.IPNet) error { } for prefixString, id := range allocatedIdentities { - IPIdentityCache.Upsert(prefixString, nil, Identity{ + IPIdentityCache.Upsert(prefixString, nil, 0, Identity{ ID: id.ID, Source: FromCIDR, }) diff --git a/pkg/ipcache/ipcache.go b/pkg/ipcache/ipcache.go index d0b6eca7877a..452d25ae0603 100644 --- a/pkg/ipcache/ipcache.go +++ b/pkg/ipcache/ipcache.go @@ -63,6 +63,12 @@ type Identity struct { Source Source } +// IPKeyPair is the (IP, key) pair used of the identity +type IPKeyPair struct { + IP net.IP + Key uint8 +} + // IPCache is a collection of mappings: // - mapping of endpoint IP or CIDR to security identities of all endpoints // which are part of the same cluster, and vice-versa @@ -71,7 +77,7 @@ type IPCache struct { mutex lock.RWMutex ipToIdentityCache map[string]Identity identityToIPCache map[identity.NumericIdentity]map[string]struct{} - ipToHostIPCache map[string]net.IP + ipToHostIPCache map[string]IPKeyPair // prefixLengths reference-count the number of CIDRs that use // particular prefix lengths for the mask. @@ -94,7 +100,7 @@ func NewIPCache() *IPCache { return &IPCache{ ipToIdentityCache: map[string]Identity{}, identityToIPCache: map[identity.NumericIdentity]map[string]struct{}{}, - ipToHostIPCache: map[string]net.IP{}, + ipToHostIPCache: map[string]IPKeyPair{}, v4PrefixLengths: map[int]int{}, v6PrefixLengths: map[int]int{}, } @@ -211,6 +217,11 @@ func endpointIPToCIDR(ip net.IP) *net.IPNet { } } +func (ipc *IPCache) getHostIPCache(ip string) (net.IP, uint8) { + ipKeyPair := ipc.ipToHostIPCache[ip] + return ipKeyPair.IP, ipKeyPair.Key +} + // Upsert adds / updates the provided IP (endpoint or CIDR prefix) and identity // into the IPCache. // @@ -219,10 +230,11 @@ func endpointIPToCIDR(ip net.IP) *net.IPNet { // managed by the kvstore layer. See allowOverwrite() for rules on ownership. // hostIP is the location of the given IP. It is optional (may be nil) and is // propagated to the listeners. -func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool { +func (ipc *IPCache) Upsert(ip string, hostIP net.IP, hostKey uint8, newIdentity Identity) bool { scopedLog := log.WithFields(logrus.Fields{ logfields.IPAddr: ip, logfields.Identity: newIdentity, + logfields.Key: hostKey, }) ipc.mutex.Lock() @@ -232,7 +244,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool var oldIdentity *identity.NumericIdentity callbackListeners := true - oldHostIP := ipc.ipToHostIPCache[ip] + oldHostIP, oldHostKey := ipc.getHostIPCache(ip) cachedIdentity, found := ipc.ipToIdentityCache[ip] if found { @@ -242,7 +254,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool // Skip update if IP is already mapped to the given identity // and the host IP hasn't changed. - if cachedIdentity == newIdentity && bytes.Compare(oldHostIP, hostIP) == 0 { + if cachedIdentity == newIdentity && bytes.Compare(oldHostIP, hostIP) == 0 && hostKey == oldHostKey { return true } @@ -280,7 +292,7 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool if !found { cidrStr := cidr.String() if cidrIdentity, cidrFound := ipc.ipToIdentityCache[cidrStr]; cidrFound { - oldHostIP = ipc.ipToHostIPCache[cidrStr] + oldHostIP, _ = ipc.getHostIPCache(cidrStr) if cidrIdentity.ID != newIdentity.ID || bytes.Compare(oldHostIP, hostIP) != 0 { scopedLog.Debug("New endpoint IP started shadowing existing CIDR to identity mapping") oldIdentity = &cidrIdentity.ID @@ -316,12 +328,12 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool if hostIP == nil { delete(ipc.ipToHostIPCache, ip) } else { - ipc.ipToHostIPCache[ip] = hostIP + ipc.ipToHostIPCache[ip] = IPKeyPair{IP: hostIP, Key: hostKey} } if callbackListeners { for _, listener := range ipc.listeners { - listener.OnIPIdentityCacheChange(Upsert, *cidr, oldHostIP, hostIP, oldIdentity, newIdentity.ID) + listener.OnIPIdentityCacheChange(Upsert, *cidr, oldHostIP, hostIP, oldIdentity, newIdentity.ID, hostKey) } } @@ -332,13 +344,13 @@ func (ipc *IPCache) Upsert(ip string, hostIP net.IP, newIdentity Identity) bool // the listener's "OnIPIdentityCacheChange" method for each entry in the cache. func (ipc *IPCache) DumpToListenerLocked(listener IPIdentityMappingListener) { for ip, identity := range ipc.ipToIdentityCache { - hostIP := ipc.ipToHostIPCache[ip] + hostIP, encryptKey := ipc.getHostIPCache(ip) _, cidr, err := net.ParseCIDR(ip) if err != nil { endpointIP := net.ParseIP(ip) cidr = endpointIPToCIDR(endpointIP) } - listener.OnIPIdentityCacheChange(Upsert, *cidr, nil, hostIP, nil, identity.ID) + listener.OnIPIdentityCacheChange(Upsert, *cidr, nil, hostIP, nil, identity.ID, encryptKey) } } @@ -363,7 +375,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { var cidr *net.IPNet cacheModification := Delete - oldHostIP := ipc.ipToHostIPCache[ip] + oldHostIP, encryptKey := ipc.getHostIPCache(ip) var newHostIP net.IP var oldIdentity *identity.NumericIdentity newIdentity := cachedIdentity @@ -402,7 +414,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { // restore its mapping with the listeners if that was the case. cidrStr := cidr.String() if cidrIdentity, cidrFound := ipc.ipToIdentityCache[cidrStr]; cidrFound { - newHostIP = ipc.ipToHostIPCache[cidrStr] + newHostIP, _ = ipc.getHostIPCache(cidrStr) if cidrIdentity.ID != cachedIdentity.ID || bytes.Compare(oldHostIP, newHostIP) != 0 { scopedLog.Debug("Removal of endpoint IP revives shadowed CIDR to identity mapping") cacheModification = Upsert @@ -431,7 +443,7 @@ func (ipc *IPCache) deleteLocked(ip string, source Source) { if callbackListeners { for _, listener := range ipc.listeners { listener.OnIPIdentityCacheChange(cacheModification, *cidr, oldHostIP, newHostIP, - oldIdentity, newIdentity.ID) + oldIdentity, newIdentity.ID, encryptKey) } } } diff --git a/pkg/ipcache/ipcache_test.go b/pkg/ipcache/ipcache_test.go index 70912159857d..6279dc55f3ec 100644 --- a/pkg/ipcache/ipcache_test.go +++ b/pkg/ipcache/ipcache_test.go @@ -47,7 +47,7 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { // Deletion of key that doesn't exist doesn't cause panic. IPIdentityCache.Delete(endpointIP, FromKVStore) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) @@ -62,13 +62,13 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { c.Assert(cachedIdentity.Source, Equals, FromKVStore) // kubernetes source cannot update kvstore source - updated := IPIdentityCache.Upsert(endpointIP, nil, Identity{ + updated := IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKubernetes, }) c.Assert(updated, Equals, false) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) @@ -87,13 +87,13 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { c.Assert(exists, Equals, false) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: identity, Source: FromKVStore, }) newIdentity := identityPkg.NumericIdentity(69) - IPIdentityCache.Upsert(endpointIP, nil, Identity{ + IPIdentityCache.Upsert(endpointIP, nil, 0, Identity{ ID: newIdentity, Source: FromKVStore, }) @@ -120,7 +120,7 @@ func (s *IPCacheTestSuite) TestIPCache(c *C) { identities := []identityPkg.NumericIdentity{5, 67, 29, 29, 29} for index := range endpointIPs { - IPIdentityCache.Upsert(endpointIPs[index], nil, Identity{ + IPIdentityCache.Upsert(endpointIPs[index], nil, 0, Identity{ ID: identities[index], Source: FromKVStore, }) diff --git a/pkg/ipcache/kvstore.go b/pkg/ipcache/kvstore.go index 102b4b8ded5b..6dfdcf93967b 100644 --- a/pkg/ipcache/kvstore.go +++ b/pkg/ipcache/kvstore.go @@ -166,13 +166,14 @@ func (r *kvReferenceCounter) release(ctx context.Context, key string) (err error // UpsertIPToKVStore updates / inserts the provided IP->Identity mapping into the // kvstore, which will subsequently trigger an event in NewIPIdentityWatcher(). -func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.NumericIdentity, metadata string) error { +func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.NumericIdentity, key uint8, metadata string) error { ipKey := path.Join(IPIdentitiesPath, AddressSpace, IP.String()) ipIDPair := identity.IPIdentityPair{ IP: IP, ID: ID, Metadata: metadata, HostIP: hostIP, + Key: key, } marshaledIPIDPair, err := json.Marshal(ipIDPair) @@ -183,6 +184,7 @@ func UpsertIPToKVStore(ctx context.Context, IP, hostIP net.IP, ID identity.Numer log.WithFields(logrus.Fields{ logfields.IPAddr: ipIDPair.IP, logfields.Identity: ipIDPair.ID, + logfields.Key: ipIDPair.Key, logfields.Modification: Upsert, }).Debug("upserting IP->ID mapping to kvstore") @@ -313,7 +315,7 @@ restart: continue } - IPIdentityCache.Upsert(ipIDPair.PrefixString(), ipIDPair.HostIP, Identity{ + IPIdentityCache.Upsert(ipIDPair.PrefixString(), ipIDPair.HostIP, ipIDPair.Key, Identity{ ID: ipIDPair.ID, Source: FromKVStore, }) diff --git a/pkg/ipcache/listener.go b/pkg/ipcache/listener.go index 701151258c34..cd5379956d3c 100644 --- a/pkg/ipcache/listener.go +++ b/pkg/ipcache/listener.go @@ -40,7 +40,7 @@ type IPIdentityMappingListener interface { // hostIP is the IP address of the location of the cidr. // hostIP is optional and may only be non-nil for an Upsert modification. OnIPIdentityCacheChange(modType CacheModification, cidr net.IPNet, oldHostIP, newHostIP net.IP, - oldID *identity.NumericIdentity, newID identity.NumericIdentity) + oldID *identity.NumericIdentity, newID identity.NumericIdentity, encryptKey uint8) // OnIPIdentityCacheGC will be called to sync other components which are // reliant upon the IPIdentityCache with the IPIdentityCache. diff --git a/pkg/logging/logfields/logfields.go b/pkg/logging/logfields/logfields.go index 77df0e7b68e2..57949a7d0a29 100644 --- a/pkg/logging/logfields/logfields.go +++ b/pkg/logging/logfields/logfields.go @@ -350,4 +350,7 @@ const ( // Probe is the name of a status probe. Probe = "probe" + + // Key is the identity of the encryption key + Key = "key" ) diff --git a/pkg/maps/ipcache/ipcache.go b/pkg/maps/ipcache/ipcache.go index 6ac71dac3d36..43cdac7a7593 100644 --- a/pkg/maps/ipcache/ipcache.go +++ b/pkg/maps/ipcache/ipcache.go @@ -128,10 +128,11 @@ func NewKey(ip net.IP, mask net.IPMask) Key { type RemoteEndpointInfo struct { SecurityIdentity uint32 `align:"sec_label"` TunnelEndpoint [4]byte `align:"tunnel_endpoint"` + Key uint8 `align:"key"` } func (v *RemoteEndpointInfo) String() string { - return fmt.Sprintf("%d", v.SecurityIdentity) + return fmt.Sprintf("%d %d", v.SecurityIdentity, v.Key) } // GetValuePtr returns the unsafe pointer to the BPF value. diff --git a/pkg/maps/tunnel/logfields.go b/pkg/maps/tunnel/logfields.go index b5cef269037d..df376a1b4a96 100644 --- a/pkg/maps/tunnel/logfields.go +++ b/pkg/maps/tunnel/logfields.go @@ -22,6 +22,7 @@ import ( const ( fieldEndpoint = "endpoint" fieldPrefix = "prefix" + fieldKey = "key" ) var ( diff --git a/pkg/maps/tunnel/tunnel.go b/pkg/maps/tunnel/tunnel.go index 72eae6556eae..4d0e81f2390e 100644 --- a/pkg/maps/tunnel/tunnel.go +++ b/pkg/maps/tunnel/tunnel.go @@ -77,12 +77,13 @@ func newTunnelEndpoint(ip net.IP) *TunnelEndpoint { func (v TunnelEndpoint) NewValue() bpf.MapValue { return &TunnelEndpoint{} } // SetTunnelEndpoint adds/replaces a prefix => tunnel-endpoint mapping -func (m *Map) SetTunnelEndpoint(prefix net.IP, endpoint net.IP) error { +func (m *Map) SetTunnelEndpoint(encryptKey uint8, prefix, endpoint net.IP) error { key, val := newTunnelEndpoint(prefix), newTunnelEndpoint(endpoint) - + val.EndpointKey.Key = encryptKey log.WithFields(logrus.Fields{ fieldPrefix: prefix, fieldEndpoint: endpoint, + fieldKey: encryptKey, }).Debug("Updating tunnel map entry") return TunnelMap.Update(key, val) diff --git a/pkg/node/node_address.go b/pkg/node/node_address.go index 1a72a67101e8..0e7bc93560bb 100644 --- a/pkg/node/node_address.go +++ b/pkg/node/node_address.go @@ -41,6 +41,8 @@ var ( ipv6RouterAddress net.IP ipv4AllocRange *cidr.CIDR ipv6AllocRange *cidr.CIDR + + ipsecKeyIdentity uint8 ) func makeIPv6HostIP() net.IP { @@ -389,3 +391,14 @@ func getCiliumHostIPs() (ipv4GW, ipv6Router net.IP) { } return getCiliumHostIPsFromNetDev(option.Config.HostDevice) } + +// SetIPsecKeyIdentity sets the IPsec key identity an opaque value used to +// identity encryption keys used on the node. +func SetIPsecKeyIdentity(id uint8) { + ipsecKeyIdentity = id +} + +// GetIPsecKeyIdentity returns the IPsec key identity of the node +func GetIPsecKeyIdentity() uint8 { + return ipsecKeyIdentity +}