Skip to content

Commit

Permalink
bpf: Re-introduce ICMPv6 NS responder on from-netdev
Browse files Browse the repository at this point in the history
This reverts commit 6580714, to fix
the breakage of "IPv6 NS responder for pod" introduced by
#12086 (bpf: Reply NA when recv ND for local IPv6 endpoints).

6580714 was merged to solve #14509.
To not revive #14509, this commit also passes through ICMPv6 NS if the
target is native node IP (eth0's addr). By letting stack take care of
those NS-for-node-IP packets, we managed to:

1. Solve #14509 again, but in a way keeping NS responder. The cause of
   #14509 was NS responder always generates ND whose source IP is
   "router_ip" (cilium_internal_ip) rather than "node_ip". Once we pass
   those NS-for-node-IP packets to stack, the ND response would
   naturally have "node_ip" as source.

2. Avoid the fib_lookup failure mentioned at #30837 (comment).

icmp6_host_handle() also has a new parameter `handle_ns` to control if
we want NS responder to be active. If it is called from `to-netdev` code
path, handle_ns is set to false. This is suggested by julianwiedmann.

Signed-off-by: Zhichuan Liang <gray.liang@isovalent.com>
  • Loading branch information
jschwinger233 authored and julianwiedmann committed Mar 1, 2024
1 parent 60c5e76 commit dc9dfd7
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 12 deletions.
13 changes: 6 additions & 7 deletions bpf/bpf_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,31 +144,28 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx __maybe_unused,
#ifdef ENABLE_HOST_FIREWALL
struct ct_buffer6 ct_buffer = {};
bool need_hostfw = false;
__u8 nexthdr;
int hdrlen;
bool is_host_id = false;
#endif /* ENABLE_HOST_FIREWALL */
void *data, *data_end;
struct ipv6hdr *ip6;
int __maybe_unused ret;
int ret, hdrlen;
__u8 nexthdr;

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;

#ifdef ENABLE_HOST_FIREWALL
nexthdr = ip6->nexthdr;
hdrlen = ipv6_hdrlen(ctx, &nexthdr);
if (hdrlen < 0)
return hdrlen;

if (likely(nexthdr == IPPROTO_ICMPV6)) {
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen);
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen, ext_err, !from_host);
if (ret == SKIP_HOST_FIREWALL)
goto skip_host_firewall;
if (IS_ERR(ret))
return ret;
}
#endif /* ENABLE_HOST_FIREWALL */

#ifdef ENABLE_NODEPORT
if (!from_host) {
Expand Down Expand Up @@ -213,8 +210,10 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx __maybe_unused,
if (map_update_elem(&CT_TAIL_CALL_BUFFER6, &zero, &ct_buffer, 0) < 0)
return DROP_INVALID_TC_BUFFER;
}
#endif /* ENABLE_HOST_FIREWALL */

skip_host_firewall:
#ifdef ENABLE_HOST_FIREWALL
ctx_store_meta(ctx, CB_FROM_HOST,
(need_hostfw ? FROM_HOST_FLAG_NEED_HOSTFW : 0) |
(is_host_id ? FROM_HOST_FLAG_HOST_ID : 0));
Expand Down Expand Up @@ -497,7 +496,7 @@ handle_to_netdev_ipv6(struct __ctx_buff *ctx, struct trace_ctx *trace, __s8 *ext
return hdrlen;

if (likely(nexthdr == IPPROTO_ICMPV6)) {
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen);
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen, ext_err, false);
if (ret == SKIP_HOST_FIREWALL)
return CTX_ACT_OK;
if (IS_ERR(ret))
Expand Down
30 changes: 25 additions & 5 deletions bpf/lib/icmp6.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ static __always_inline int __icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off)
{
union v6addr target, router;
struct endpoint_info *ep;
union macaddr router_mac = NODE_MAC;

if (ctx_load_bytes(ctx, nh_off + ICMP6_ND_TARGET_OFFSET, target.addr,
sizeof(((struct ipv6hdr *)NULL)->saddr)) < 0)
Expand All @@ -309,15 +310,27 @@ static __always_inline int __icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off)
BPF_V6(router, ROUTER_IP);

if (ipv6_addr_equals(&target, &router)) {
union macaddr router_mac = NODE_MAC;

return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, true);
}

ep = __lookup_ip6_endpoint(&target);
if (ep) {
union macaddr router_mac = NODE_MAC;

if (ep->flags & ENDPOINT_F_HOST) {
/* Target must be a node_ip, because of ENDPOINT_F_HOST flag
* and target != router_ip.
*
* We pass these packets to stack to make sure:
*
* 1. The response NA has node IP as source address instead of
* router IP, to address https://github.com/cilium/cilium/issues/14509.
*
* 2. Kernel stack can record a neighbor entry for the
* source IP, to avoid bpf_fib_lookup failure as mentioned at
* https://github.com/cilium/cilium/pull/30837#issuecomment-1960897445.
*/
return CTX_ACT_OK;
}
return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, false);
}

Expand Down Expand Up @@ -392,13 +405,17 @@ static __always_inline int icmp6_ndp_handle(struct __ctx_buff *ctx, int nh_off,
}

static __always_inline int
icmp6_host_handle(struct __ctx_buff *ctx, int l4_off)
icmp6_host_handle(struct __ctx_buff *ctx, int l4_off, __s8 *ext_err, bool handle_ns)
{
__u8 type;

if (icmp6_load_type(ctx, l4_off, &type) < 0)
return DROP_INVALID;

if (type == ICMP6_NS_MSG_TYPE && handle_ns)
return icmp6_handle_ns(ctx, ETH_HLEN, METRIC_INGRESS, ext_err);

#ifdef ENABLE_HOST_FIREWALL
/* When the host firewall is enabled, we drop and allow ICMPv6 messages
* according to RFC4890, except for echo request and reply messages which
* are handled by host policies and can be dropped.
Expand All @@ -418,7 +435,7 @@ icmp6_host_handle(struct __ctx_buff *ctx, int l4_off)
* | ICMPv6-mult-list-done | CTX_ACT_OK | 132 |
* | ICMPv6-router-solici | CTX_ACT_OK | 133 |
* | ICMPv6-router-advert | CTX_ACT_OK | 134 |
* | ICMPv6-neighbor-solicit | CTX_ACT_OK | 135 |
* | ICMPv6-neighbor-solicit | icmp6_handle_ns | 135 |
* | ICMPv6-neighbor-advert | CTX_ACT_OK | 136 |
* | ICMPv6-redirect-message | CTX_ACT_DROP | 137 |
* | ICMPv6-router-renumber | CTX_ACT_OK | 138 |
Expand Down Expand Up @@ -460,6 +477,9 @@ icmp6_host_handle(struct __ctx_buff *ctx, int l4_off)
(ICMP6_MULT_RA_MSG_TYPE <= type && type <= ICMP6_MULT_RT_MSG_TYPE))
return SKIP_HOST_FIREWALL;
return DROP_FORBIDDEN_ICMP6;
#else
return CTX_ACT_OK;
#endif /* ENABLE_HOST_FIREWALL */
}

#endif

0 comments on commit dc9dfd7

Please sign in to comment.