Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

High-Scale IPcache: Chapter 3 #25438

Merged
merged 8 commits into from
May 22, 2023
Merged
7 changes: 5 additions & 2 deletions bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ LB_OPTIONS = \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_SESSION_AFFINITY:-DENABLE_BANDWIDTH_MANAGER:-DENABLE_SRC_RANGE_CHECK:-DLB_SELECTION:-DLB_SELECTION_MAGLEV:-DENABLE_SOCKET_LB_HOST_ONLY: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_SESSION_AFFINITY:-DENABLE_BANDWIDTH_MANAGER:-DENABLE_SRC_RANGE_CHECK:-DLB_SELECTION:-DLB_SELECTION_MAGLEV:-DENABLE_SOCKET_LB_HOST_ONLY:-DENABLE_L7_LB:-DENABLE_SCTP: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_SESSION_AFFINITY:-DENABLE_BANDWIDTH_MANAGER:-DENABLE_SRC_RANGE_CHECK:-DLB_SELECTION:-DLB_SELECTION_MAGLEV:-DENABLE_SOCKET_LB_HOST_ONLY:-DENABLE_L7_LB:-DENABLE_SCTP:-DENABLE_VTEP: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_SESSION_AFFINITY:-DENABLE_BANDWIDTH_MANAGER:-DENABLE_SRC_RANGE_CHECK:-DLB_SELECTION:-DLB_SELECTION_MAGLEV:-DENABLE_SOCKET_LB_HOST_ONLY:-DENABLE_L7_LB:-DENABLE_SCTP:-DENABLE_HIGH_SCALE_IPCACHE: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_SESSION_AFFINITY:-DENABLE_BANDWIDTH_MANAGER:-DENABLE_SRC_RANGE_CHECK:-DLB_SELECTION:-DLB_SELECTION_MAGLEV:-DENABLE_SOCKET_LB_HOST_ONLY:-DENABLE_L7_LB:-DENABLE_SCTP:-DENABLE_VTEP:-DENABLE_CLUSTER_AWARE_ADDRESSING:-DENABLE_INTER_CLUSTER_SNAT:

# These options are intended to max out the BPF program complexity. it is load
Expand Down Expand Up @@ -159,7 +160,8 @@ HOST_OPTIONS = $(LXC_OPTIONS) \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP:
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP:-DENABLE_HIGH_SCALE_IPCACHE:

ifndef MAX_HOST_OPTIONS
MAX_HOST_OPTIONS = $(MAX_BASE_OPTIONS) -DENCAP_IFINDEX=1 -DTUNNEL_MODE=1
Expand Down Expand Up @@ -246,7 +248,8 @@ LXC_OPTIONS = \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING:-DENABLE_SKIP_FIB: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING:-DENABLE_SKIP_FIB:-DENABLE_ICMP_RULE:-DENABLE_SCTP: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING:-DENABLE_SKIP_FIB:-DENABLE_ICMP_RULE:-DENABLE_SCTP:-DENABLE_VTEP:
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING:-DENABLE_SKIP_FIB:-DENABLE_ICMP_RULE:-DENABLE_SCTP:-DENABLE_VTEP: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_TPROXY:-DENABLE_HOST_ROUTING:-DENABLE_SKIP_FIB:-DENABLE_ICMP_RULE:-DENABLE_SCTP:-DENABLE_HIGH_SCALE_IPCACHE:

# These options are intended to max out the BPF program complexity. it is load
# tested as well.
Expand Down
22 changes: 18 additions & 4 deletions bpf/bpf_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ handle_ipv4_cont(struct __ctx_buff *ctx, __u32 secctx, const bool from_host,
if (vtep->vtep_mac && vtep->tunnel_endpoint) {
if (eth_store_daddr(ctx, (__u8 *)&vtep->vtep_mac, 0) < 0)
return DROP_WRITE_ERROR;
return __encap_and_redirect_with_nodeid(ctx, vtep->tunnel_endpoint,
return __encap_and_redirect_with_nodeid(ctx, 0, vtep->tunnel_endpoint,
secctx, WORLD_ID, WORLD_ID, &trace);
}
}
Expand Down Expand Up @@ -936,8 +936,9 @@ static __always_inline int do_netdev_encrypt_encap(struct __ctx_buff *ctx, __u32

ctx->mark = 0;
bpf_clear_meta(ctx);
return __encap_and_redirect_with_nodeid(ctx, ep->tunnel_endpoint, src_id,
0, NOT_VTEP_DST, &trace);
return __encap_and_redirect_with_nodeid(ctx, 0, ep->tunnel_endpoint,
src_id, 0, NOT_VTEP_DST,
&trace);
}

static __always_inline int do_netdev_encrypt(struct __ctx_buff *ctx,
Expand Down Expand Up @@ -1274,7 +1275,8 @@ int cil_from_netdev(struct __ctx_buff *ctx)
}
}
#endif
ret = __encap_and_redirect_with_nodeid(ctx, ctx_get_xfer(ctx, XFER_ENCAP_NODEID),
ret = __encap_and_redirect_with_nodeid(ctx, 0,
ctx_get_xfer(ctx, XFER_ENCAP_NODEID),
ctx_get_xfer(ctx, XFER_ENCAP_SECLABEL),
ctx_get_xfer(ctx, XFER_ENCAP_DSTID),
NOT_VTEP_DST, &trace);
Expand All @@ -1299,6 +1301,18 @@ int cil_from_netdev(struct __ctx_buff *ctx)
__section("from-host")
int cil_from_host(struct __ctx_buff *ctx)
{
#ifdef ENABLE_HIGH_SCALE_IPCACHE
__u32 src_id = 0;
int ret;

ret = decapsulate_overlay(ctx, &src_id);
if (IS_ERR(ret))
send_drop_notify_error(ctx, src_id, ret, CTX_ACT_DROP,
METRIC_INGRESS);
if (ret == CTX_ACT_REDIRECT)
return ret;
#endif /* ENABLE_HIGH_SCALE_IPCACHE */

/* Traffic from the host ns going through cilium_host device must
* not be subject to EDT rate-limiting.
*/
Expand Down
16 changes: 8 additions & 8 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ static __always_inline int handle_ipv6_from_lxc(struct __ctx_buff *ctx, __u32 *d
* the packet needs IPSec encap so push ctx to stack for encap, or
* (c) packet was redirected to tunnel device so return.
*/
ret = encap_and_redirect_lxc(ctx, tunnel_endpoint, encrypt_key,
ret = encap_and_redirect_lxc(ctx, tunnel_endpoint, 0, 0, encrypt_key,
&key, node_id, SECLABEL, *dst_sec_identity,
&trace);
if (ret == CTX_ACT_OK)
Expand Down Expand Up @@ -1116,15 +1116,15 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx, __u32 *d
if (vtep->vtep_mac && vtep->tunnel_endpoint) {
if (eth_store_daddr(ctx, (__u8 *)&vtep->vtep_mac, 0) < 0)
return DROP_WRITE_ERROR;
return __encap_and_redirect_with_nodeid(ctx, vtep->tunnel_endpoint,
return __encap_and_redirect_with_nodeid(ctx, 0, vtep->tunnel_endpoint,
SECLABEL, WORLD_ID,
WORLD_ID, &trace);
}
}
skip_vtep:
#endif

#ifdef TUNNEL_MODE
#if defined(TUNNEL_MODE) || defined(ENABLE_HIGH_SCALE_IPCACHE)
{
struct tunnel_key key = {};

Expand All @@ -1147,9 +1147,9 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx, __u32 *d
}
#endif

ret = encap_and_redirect_lxc(ctx, tunnel_endpoint, encrypt_key,
&key, node_id, SECLABEL, *dst_sec_identity,
&trace);
ret = encap_and_redirect_lxc(ctx, tunnel_endpoint, ip4->saddr,
ip4->daddr, encrypt_key, &key, node_id,
SECLABEL, *dst_sec_identity, &trace);
if (ret == DROP_NO_TUNNEL_ENDPOINT)
goto pass_to_stack;
/* If not redirected noteably due to IPSEC then pass up to stack
Expand All @@ -1170,7 +1170,7 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx, __u32 *d
else
return ret;
}
#endif /* TUNNEL_MODE */
#endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */
if (is_defined(ENABLE_HOST_ROUTING)) {
int oif = 0;

Expand Down Expand Up @@ -1225,7 +1225,7 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx, __u32 *d
#endif
}

#if defined(TUNNEL_MODE) || defined(ENABLE_EGRESS_GATEWAY)
#if defined(TUNNEL_MODE) || defined(ENABLE_EGRESS_GATEWAY) || defined(ENABLE_HIGH_SCALE_IPCACHE)
encrypt_to_stack:
#endif
send_trace_notify(ctx, TRACE_TO_STACK, SECLABEL, *dst_sec_identity, 0, 0,
Expand Down
19 changes: 14 additions & 5 deletions bpf/bpf_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ static __always_inline int handle_ipv6(struct __ctx_buff *ctx,
struct bpf_tunnel_key key = {};
struct endpoint_info *ep;
bool decrypted;
__u32 key_size;

/* verifier workaround (dereference of modified ctx ptr) */
if (!revalidate_data_pull(ctx, &data, &data_end, &ip6))
Expand All @@ -76,7 +77,8 @@ static __always_inline int handle_ipv6(struct __ctx_buff *ctx,
if (info)
*identity = key.tunnel_id = info->sec_identity;
} else {
if (unlikely(ctx_get_tunnel_key(ctx, &key, sizeof(key), 0) < 0))
key_size = TUNNEL_KEY_WITHOUT_SRC_IP;
if (unlikely(ctx_get_tunnel_key(ctx, &key, key_size, 0) < 0))
return DROP_NO_TUNNEL_KEY;
*identity = key.tunnel_id;

Expand Down Expand Up @@ -286,6 +288,7 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx,
__s8 *ext_err __maybe_unused)
{
struct remote_endpoint_info *info;
__u32 key_size __maybe_unused;
void *data_end, *data;
struct iphdr *ip4;
struct endpoint_info *ep;
Expand Down Expand Up @@ -327,9 +330,14 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx,
if (info)
*identity = key.tunnel_id = info->sec_identity;
} else {
if (unlikely(ctx_get_tunnel_key(ctx, &key, sizeof(key), 0) < 0))
#ifdef ENABLE_HIGH_SCALE_IPCACHE
*identity = key.tunnel_id = ctx_load_meta(ctx, CB_SRC_LABEL);
#else
key_size = TUNNEL_KEY_WITHOUT_SRC_IP;
if (unlikely(ctx_get_tunnel_key(ctx, &key, key_size, 0) < 0))
return DROP_NO_TUNNEL_KEY;
*identity = key.tunnel_id;
#endif /* ENABLE_HIGH_SCALE_IPCACHE */

if (*identity == HOST_ID)
return DROP_INVALID_IDENTITY;
Expand Down Expand Up @@ -462,8 +470,10 @@ int tail_handle_arp(struct __ctx_buff *ctx)
struct bpf_tunnel_key key = {};
struct vtep_key vkey = {};
struct vtep_value *info;
__u32 key_size;

if (unlikely(ctx_get_tunnel_key(ctx, &key, sizeof(key), 0) < 0))
key_size = TUNNEL_KEY_WITHOUT_SRC_IP;
if (unlikely(ctx_get_tunnel_key(ctx, &key, key_size, 0) < 0))
return send_drop_notify_error(ctx, 0, DROP_NO_TUNNEL_KEY, CTX_ACT_DROP,
METRIC_INGRESS);

Expand All @@ -478,7 +488,7 @@ int tail_handle_arp(struct __ctx_buff *ctx)
if (unlikely(ret != 0))
return send_drop_notify_error(ctx, 0, ret, CTX_ACT_DROP, METRIC_EGRESS);
if (info->tunnel_endpoint) {
ret = __encap_and_redirect_with_nodeid(ctx, info->tunnel_endpoint,
ret = __encap_and_redirect_with_nodeid(ctx, 0, info->tunnel_endpoint,
LOCAL_NODE_ID, WORLD_ID,
WORLD_ID, &trace);
if (IS_ERR(ret))
Expand Down Expand Up @@ -540,7 +550,6 @@ int cil_from_overlay(struct __ctx_buff *ctx)
__u16 proto;
int ret;

bpf_clear_meta(ctx);
ctx_skip_nodeport_clear(ctx);

if (!validate_ethertype(ctx, &proto)) {
Expand Down
4 changes: 4 additions & 0 deletions bpf/include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5593,6 +5593,10 @@ struct bpf_tunnel_key {
__u8 tunnel_ttl;
__u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label;
union {
__u32 local_ipv4;
__u32 local_ipv6[4];
};
};

/* user accessible mirror of in-kernel xfrm_state.
Expand Down
10 changes: 10 additions & 0 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,16 @@ struct genevehdr {
__u8 reserved;
};

struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
};

/* Older kernels don't support the larger tunnel key structure and we don't
* need it since we only want to retrieve the tunnel ID anyway.
*/
#define TUNNEL_KEY_WITHOUT_SRC_IP offsetof(struct bpf_tunnel_key, local_ipv4)

#include "overloadable.h"

#endif /* __LIB_COMMON_H_ */
61 changes: 42 additions & 19 deletions bpf/lib/encap.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "trace.h"
#include "l3.h"
#include "lib/wireguard.h"
#include "high_scale_ipcache.h"

#ifdef HAVE_ENCAP
#ifdef ENABLE_IPSEC
Expand All @@ -30,7 +31,7 @@ encap_and_redirect_ipsec(struct __ctx_buff *ctx, __u8 key, __u16 node_id,
#endif /* ENABLE_IPSEC */

static __always_inline int
__encap_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__encap_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip, __be32 tunnel_endpoint,
__u32 seclabel, __u32 dstid, __u32 vni __maybe_unused,
enum trace_reason ct_reason, __u32 monitor, int *ifindex)
{
Expand All @@ -48,7 +49,8 @@ __encap_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,

cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel);

ret = ctx_set_encap_info(ctx, node_id, seclabel, dstid, vni, NULL, 0, false, ifindex);
ret = ctx_set_encap_info(ctx, src_ip, node_id, seclabel, dstid, vni,
NULL, 0, false, ifindex);
if (ret == CTX_ACT_REDIRECT)
send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, 0, *ifindex,
ct_reason, monitor);
Expand All @@ -57,7 +59,8 @@ __encap_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
}

static __always_inline int
__encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __u32 src_ip __maybe_unused,
__be32 tunnel_endpoint,
__u32 seclabel, __u32 dstid, __u32 vni,
const struct trace_ctx *trace)
{
Expand All @@ -79,7 +82,7 @@ __encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
return ret;
#endif /* ENABLE_WIREGUARD */

ret = __encap_with_nodeid(ctx, tunnel_endpoint, seclabel, dstid,
ret = __encap_with_nodeid(ctx, src_ip, tunnel_endpoint, seclabel, dstid,
vni, trace->reason, trace->monitor,
&ifindex);
if (ret != CTX_ACT_REDIRECT)
Expand All @@ -99,7 +102,8 @@ encap_and_redirect_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__u32 seclabel, __u32 dstid,
const struct trace_ctx *trace)
{
return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint, seclabel, dstid, NOT_VTEP_DST,
return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint,
seclabel, dstid, NOT_VTEP_DST,
trace);
}

Expand Down Expand Up @@ -129,20 +133,21 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
* the tunnel, to apply the correct reverse DNAT.
* See #14674 for details.
*/
ret = __encap_with_nodeid(ctx, tunnel_endpoint, seclabel, dstid, NOT_VTEP_DST,
trace->reason, trace->monitor, &ifindex);
ret = __encap_with_nodeid(ctx, 0, tunnel_endpoint, seclabel, dstid,
NOT_VTEP_DST, trace->reason, trace->monitor,
&ifindex);
if (ret != CTX_ACT_REDIRECT)
return ret;

/* tell caller that this packet needs to go through the stack: */
return CTX_ACT_OK;
#else
return __encap_and_redirect_with_nodeid(ctx, tunnel_endpoint,
return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint,
seclabel, dstid, NOT_VTEP_DST, trace);
#endif /* !ENABLE_NODEPORT && (ENABLE_IPSEC || ENABLE_HOST_FIREWALL) */
}

#ifdef TUNNEL_MODE
#if defined(TUNNEL_MODE) || defined(ENABLE_HIGH_SCALE_IPCACHE)
/* encap_and_redirect_lxc adds IPSec metadata (if enabled) and returns the packet
* so that it can be passed to the IP stack. Without IPSec the packet is
* typically redirected to the output tunnel device and ctx will not be seen by
Expand All @@ -153,13 +158,30 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
* and finally on successful redirect returns CTX_ACT_REDIRECT.
*/
static __always_inline int
encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__u8 encrypt_key, struct tunnel_key *key,
__u16 node_id, __u32 seclabel, __u32 dstid,
encap_and_redirect_lxc(struct __ctx_buff *ctx,
__be32 tunnel_endpoint __maybe_unused,
__u32 src_ip __maybe_unused,
__u32 dst_ip __maybe_unused,
__u8 encrypt_key __maybe_unused,
struct tunnel_key *key __maybe_unused,
__u16 node_id __maybe_unused,
__u32 seclabel, __u32 dstid,
const struct trace_ctx *trace)
{
struct tunnel_value *tunnel;
struct tunnel_value *tunnel __maybe_unused;

#ifdef ENABLE_HIGH_SCALE_IPCACHE
/* If the destination doesn't match one of the world CIDRs, we assume
* it's destined to a remote pod. In that case, since the high-scale
* ipcache is enabled, we want to encapsulate with the remote pod's IP
* itself.
*/
if (!world_cidrs_lookup4(dst_ip))
return __encap_and_redirect_with_nodeid(ctx, src_ip, dst_ip,
seclabel, dstid,
NOT_VTEP_DST, trace);
return DROP_NO_TUNNEL_ENDPOINT;
#else /* ENABLE_HIGH_SCALE_IPCACHE */
if (tunnel_endpoint)
return __encap_and_redirect_lxc(ctx, tunnel_endpoint,
encrypt_key, node_id, seclabel,
Expand All @@ -169,16 +191,17 @@ encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
if (!tunnel)
return DROP_NO_TUNNEL_ENDPOINT;

#ifdef ENABLE_IPSEC
# ifdef ENABLE_IPSEC
if (tunnel->key) {
__u8 min_encrypt_key = get_min_encrypt_key(tunnel->key);

return encap_and_redirect_ipsec(ctx, min_encrypt_key, node_id,
seclabel);
}
#endif
return __encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel,
# endif
return __encap_and_redirect_with_nodeid(ctx, 0, tunnel->ip4, seclabel,
dstid, NOT_VTEP_DST, trace);
#endif /* ENABLE_HIGH_SCALE_IPCACHE */
}

static __always_inline int
Expand All @@ -191,10 +214,10 @@ encap_and_redirect_netdev(struct __ctx_buff *ctx, struct tunnel_key *k,
if (!tunnel)
return DROP_NO_TUNNEL_ENDPOINT;

return __encap_and_redirect_with_nodeid(ctx, tunnel->ip4, seclabel,
return __encap_and_redirect_with_nodeid(ctx, 0, tunnel->ip4, seclabel,
0, NOT_VTEP_DST, trace);
}
#endif /* TUNNEL_MODE */
#endif /* TUNNEL_MODE || ENABLE_HIGH_SCALE_IPCACHE */

#if defined(ENABLE_DSR) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE
static __always_inline int
Expand All @@ -218,7 +241,7 @@ __encap_with_nodeid_opt(struct __ctx_buff *ctx, __u32 tunnel_endpoint,

cilium_dbg(ctx, DBG_ENCAP, node_id, seclabel);

ret = ctx_set_encap_info(ctx, node_id, seclabel, dstid, vni,
ret = ctx_set_encap_info(ctx, 0, node_id, seclabel, dstid, vni,
opt, opt_len, is_ipv6, ifindex);
if (ret == CTX_ACT_REDIRECT)
send_trace_notify(ctx, TRACE_TO_OVERLAY, seclabel, dstid, 0, *ifindex,
Expand Down