Skip to content

Commit

Permalink
bpf: nodeport: SNAT before adding tunnel info in NAT egress path
Browse files Browse the repository at this point in the history
When forwarding to a remote backend via tunnel, we currently first add
the encap info and only then apply SNAT to the packet. Meaning that
the TRACE_TO_OVERLAY in __encap_with_nodeid() doesn't report the final
header content.

This is in contrast to the non-tunnel path (where the TRACE_TO_NETWORK
in to-netdev will report the post-SNAT header content), and the reply
path (where from-overlay raises the TRACE_FROM_OVERLAY long before checking
for revSNAT).

So re-order the NAT egress path to first apply SNAT, and then add the
encap info afterwards.

For now this just helps to make the TRACE_TO_OVERLAY entry consistent. In
the future it's also needed to enable in-XDP encap (as we want to SNAT the
inner packet, before adding the encap headers).

Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
julianwiedmann committed May 8, 2023
1 parent daa85a0 commit 096ca44
Showing 1 changed file with 35 additions and 33 deletions.
68 changes: 35 additions & 33 deletions bpf/lib/nodeport.h
Expand Up @@ -820,8 +820,8 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)
__s8 ext_err = 0;
#ifdef TUNNEL_MODE
struct remote_endpoint_info *info;
int verdict = CTX_ACT_REDIRECT;
bool use_tunnel = false;
__be32 tunnel_endpoint = 0;
__u32 dst_sec_identity = 0;
union v6addr *dst;
#endif

Expand All @@ -837,18 +837,10 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)
dst = (union v6addr *)&ip6->daddr;
info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN, 0);
if (info && info->tunnel_endpoint != 0) {
ret = __encap_with_nodeid(ctx, info->tunnel_endpoint,
WORLD_ID,
info->sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
if (IS_ERR(ret))
goto drop_err;
tunnel_endpoint = info->tunnel_endpoint;
dst_sec_identity = info->sec_identity;

BPF_V6(target.addr, ROUTER_IP);
use_tunnel = true;
verdict = ret;
}
#endif
ret = snat_v6_nat(ctx, &target, &ext_err);
Expand All @@ -857,12 +849,21 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)

ctx_snat_done_set(ctx);
#ifdef TUNNEL_MODE
if (use_tunnel) {
if (tunnel_endpoint) {
ret = __encap_with_nodeid(ctx, tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
if (IS_ERR(ret))
goto drop_err;

cilium_capture_out(ctx);
if (verdict == CTX_ACT_REDIRECT)
if (ret == CTX_ACT_REDIRECT)
return ctx_redirect(ctx, oif, 0);
ctx_move_xfer(ctx);
return verdict;
return ret;
}
#endif
if (!revalidate_data(ctx, &data, &data_end, &ip6)) {
Expand Down Expand Up @@ -2033,8 +2034,8 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)
__s8 ext_err = 0;
#ifdef TUNNEL_MODE
struct remote_endpoint_info *info;
int verdict = CTX_ACT_REDIRECT;
bool use_tunnel = false;
__be32 tunnel_endpoint = 0;
__u32 dst_sec_identity = 0;

if (!revalidate_data(ctx, &data, &data_end, &ip4)) {
ret = DROP_INVALID;
Expand All @@ -2043,39 +2044,40 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)

info = ipcache_lookup4(&IPCACHE_MAP, ip4->daddr, V4_CACHE_KEY_LEN, 0);
if (info && info->tunnel_endpoint != 0) {
tunnel_endpoint = info->tunnel_endpoint;
dst_sec_identity = info->sec_identity;

target.addr = IPV4_GATEWAY;
}
#endif
ret = snat_v4_nat(ctx, &target, &ext_err);
if (IS_ERR(ret) && ret != NAT_PUNT_TO_STACK)
goto drop_err;

ctx_snat_done_set(ctx);
#ifdef TUNNEL_MODE
if (tunnel_endpoint) {
/* The request came from outside, so we need to
* set the security id in the tunnel header to WORLD_ID.
* Otherwise, the remote node will assume, that the
* request originated from a cluster node which will
* bypass any netpol which disallows LB requests from
* outside.
*/
ret = __encap_with_nodeid(ctx, info->tunnel_endpoint,
ret = __encap_with_nodeid(ctx, tunnel_endpoint,
WORLD_ID,
info->sec_identity,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
if (IS_ERR(ret))
goto drop_err;

target.addr = IPV4_GATEWAY;
use_tunnel = true;
verdict = ret;
}
#endif
ret = snat_v4_nat(ctx, &target, &ext_err);
if (IS_ERR(ret) && ret != NAT_PUNT_TO_STACK)
goto drop_err;

ctx_snat_done_set(ctx);
#ifdef TUNNEL_MODE
if (use_tunnel) {
cilium_capture_out(ctx);
if (verdict == CTX_ACT_REDIRECT)
if (ret == CTX_ACT_REDIRECT)
return ctx_redirect(ctx, oif, 0);
ctx_move_xfer(ctx);
return verdict;
return ret;
}
#endif
if (!revalidate_data(ctx, &data, &data_end, &ip4)) {
Expand Down

0 comments on commit 096ca44

Please sign in to comment.