Skip to content

Commit

Permalink
bpf: nodeport: only set outer src IP for tunnel encap in XDP
Browse files Browse the repository at this point in the history
As part of introducing native tunnel support for XDP, we started passing
a `src_ip` parameter to `__encap_with_nodeid()`. This was needed to
manually build the packet's outer headers.

But for TC context we actually don't want to specify the outer src IP, and
defer its selection to the kernel stack instead. Worse, specifying the
outer src IP means that the skb implementation for `ctx_set_encap_info()`
starts to use the `local_ipv4` field in the `bpf_tunnel_key`. And that's
not supported on older kernels, resulting in drops.

Fix this by only setting the `src_ip` parameter in XDP context.

Reported-by: Yusuke Suzuki <yusuke-suzuki@cybozu.co.jp>
Fixes: 43dffb2 ("bpf: encap: manually set src IP and port from nodeport XDP paths")
Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
julianwiedmann committed Jul 10, 2023
1 parent 3b979b7 commit 5e1139d
Showing 1 changed file with 97 additions and 70 deletions.
167 changes: 97 additions & 70 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,39 @@ bpf_skip_recirculation(const struct __ctx_buff *ctx __maybe_unused)
#endif
}

#ifdef HAVE_ENCAP
static __always_inline int
nodeport_add_tunnel_encap(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port,
__be32 dst_ip, __u32 src_sec_identity, __u32 dst_sec_identity,
enum trace_reason ct_reason, __u32 monitor, int *ifindex)
{
/* Let kernel choose the outer source ip */
if (ctx_is_skb())
src_ip = 0;

return __encap_with_nodeid(ctx, src_ip, src_port, dst_ip,
src_sec_identity, dst_sec_identity, NOT_VTEP_DST,
ct_reason, monitor, ifindex);
}

# if defined(ENABLE_DSR) && DSR_ENCAP_MODE == DSR_ENCAP_GENEVE
static __always_inline int
nodeport_add_tunnel_encap_opt(struct __ctx_buff *ctx, __u32 src_ip, __be16 src_port,
__be32 dst_ip, __u32 src_sec_identity, __u32 dst_sec_identity,
void *opt, __u32 opt_len, enum trace_reason ct_reason,
__u32 monitor, int *ifindex)
{
/* Let kernel choose the outer source ip */
if (ctx_is_skb())
src_ip = 0;

return __encap_with_nodeid_opt(ctx, src_ip, src_port, dst_ip,
src_sec_identity, dst_sec_identity, NOT_VTEP_DST,
opt, opt_len, ct_reason, monitor, ifindex);
}
# endif
#endif /* HAVE_ENCAP */

static __always_inline bool dsr_fail_needs_reply(int code __maybe_unused)
{
#ifdef ENABLE_DSR_ICMP_ERRORS
Expand Down Expand Up @@ -302,29 +335,27 @@ static __always_inline int encap_geneve_dsr_opt6(struct __ctx_buff *ctx,
}

if (need_opt)
return __encap_with_nodeid_opt(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
NOT_VTEP_DST,
&gopt,
sizeof(gopt),
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);

return __encap_with_nodeid(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);
return nodeport_add_tunnel_encap_opt(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
&gopt,
sizeof(gopt),
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);

return nodeport_add_tunnel_encap(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);
}
#endif /* DSR_ENCAP_MODE */

Expand Down Expand Up @@ -900,15 +931,14 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)

src_port = tunnel_gen_src_port_v6(&tuple);

ret = __encap_with_nodeid(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
ret = nodeport_add_tunnel_encap(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
if (IS_ERR(ret))
goto drop_err;

Expand Down Expand Up @@ -1263,9 +1293,9 @@ static __always_inline int rev_nodeport_lb6(struct __ctx_buff *ctx, __s8 *ext_er
encap_redirect:
src_port = tunnel_gen_src_port_v6(&tuple);

ret = __encap_with_nodeid(ctx, IPV4_DIRECT_ROUTING, src_port,
tunnel_endpoint, SECLABEL, dst_sec_identity,
NOT_VTEP_DST, reason, monitor, &ifindex);
ret = nodeport_add_tunnel_encap(ctx, IPV4_DIRECT_ROUTING, src_port,
tunnel_endpoint, SECLABEL, dst_sec_identity,
reason, monitor, &ifindex);
if (IS_ERR(ret))
return ret;

Expand Down Expand Up @@ -1723,29 +1753,27 @@ static __always_inline int encap_geneve_dsr_opt4(struct __ctx_buff *ctx, int l3_
#endif

if (need_opt)
return __encap_with_nodeid_opt(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
src_sec_identity,
dst_sec_identity,
NOT_VTEP_DST,
&gopt,
sizeof(gopt),
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);

return __encap_with_nodeid(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
src_sec_identity,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);
return nodeport_add_tunnel_encap_opt(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
src_sec_identity,
dst_sec_identity,
&gopt,
sizeof(gopt),
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);

return nodeport_add_tunnel_encap(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
src_sec_identity,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN,
ifindex);
}
#endif /* DSR_ENCAP_MODE */

Expand Down Expand Up @@ -2244,15 +2272,14 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)
* bypass any netpol which disallows LB requests from
* outside.
*/
ret = __encap_with_nodeid(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
ret = nodeport_add_tunnel_encap(ctx,
IPV4_DIRECT_ROUTING,
src_port,
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
if (IS_ERR(ret))
goto drop_err;

Expand Down Expand Up @@ -2664,9 +2691,9 @@ static __always_inline int rev_nodeport_lb4(struct __ctx_buff *ctx, __s8 *ext_er
encap_redirect:
src_port = tunnel_gen_src_port_v4(&tuple);

ret = __encap_with_nodeid(ctx, IPV4_DIRECT_ROUTING, src_port,
tunnel_endpoint, SECLABEL, dst_sec_identity,
NOT_VTEP_DST, reason, monitor, &ifindex);
ret = nodeport_add_tunnel_encap(ctx, IPV4_DIRECT_ROUTING, src_port,
tunnel_endpoint, SECLABEL, dst_sec_identity,
reason, monitor, &ifindex);
if (IS_ERR(ret))
return ret;

Expand Down

0 comments on commit 5e1139d

Please sign in to comment.