Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v1.14] re-introduce ICMPv6 NS responder on from-netdev #31186

Merged
merged 7 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint-bpf-checks.yaml
Expand Up @@ -167,7 +167,7 @@ jobs:
env:
# Disable coverage report for these test cases since they are hitting
# https://github.com/cilium/coverbee/issues/7
NOCOVER_PATTERN: "inter_cluster_snat_clusterip.*|l4lb_ipip_health_check_host.o|nodeport_geneve_dsr_*|session_affinity_test.o|tc_egressgw_redirect.o|tc_egressgw_snat.o|tc_nodeport_lb4_dsr_backend.o|tc_nodeport_lb4_dsr_lb.o|tc_nodeport_lb4_nat_backend.o|tc_nodeport_lb4_nat_lb.o|tc_nodeport_lb6_dsr_backend.o|tc_nodeport_lb6_dsr_lb.o|xdp_egressgw_reply.o|xdp_nodeport_lb4_dsr_lb.o|xdp_nodeport_lb4_nat_backend.o|xdp_nodeport_lb4_nat_lb.o|xdp_nodeport_lb4_test.o|xdp_nodeport_lb6_dsr_lb.o|bpf_nat_tests.o"
NOCOVER_PATTERN: "inter_cluster_snat_clusterip.*|l4lb_ipip_health_check_host.o|nodeport_geneve_dsr_*|session_affinity_test.o|tc_egressgw_redirect.o|tc_egressgw_snat.o|tc_nodeport_lb4_dsr_backend.o|tc_nodeport_lb4_dsr_lb.o|tc_nodeport_lb4_nat_backend.o|tc_nodeport_lb4_nat_lb.o|tc_nodeport_lb6_dsr_backend.o|tc_nodeport_lb6_dsr_lb.o|xdp_egressgw_reply.o|xdp_nodeport_lb4_dsr_lb.o|xdp_nodeport_lb4_nat_backend.o|xdp_nodeport_lb4_nat_lb.o|xdp_nodeport_lb4_test.o|xdp_nodeport_lb6_dsr_lb.o|bpf_nat_tests.o|tc_nodeport_l3_dev.o"
run: |
make -C test run_bpf_tests COVER=1 NOCOVER="$NOCOVER_PATTERN" || (echo "Run 'make -C test run_bpf_tests COVER=1 NOCOVER=\"$NOCOVER_PATTERN\"' locally to investigate failures"; exit 1)
- name: Archive code coverage results
Expand Down
34 changes: 18 additions & 16 deletions bpf/bpf_host.c
Expand Up @@ -149,27 +149,27 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx __maybe_unused,
#endif /* ENABLE_HOST_FIREWALL */
void *data, *data_end;
struct ipv6hdr *ip6;
int __maybe_unused ret;
int hdrlen;
__u8 nexthdr;
int ret;

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;

nexthdr = ip6->nexthdr;
hdrlen = ipv6_hdrlen(ctx, &nexthdr);
if (hdrlen < 0)
return hdrlen;
if (is_defined(ENABLE_HOST_FIREWALL) || !from_host) {
__u8 nexthdr = ip6->nexthdr;
int hdrlen;

#ifdef ENABLE_HOST_FIREWALL
if (likely(nexthdr == IPPROTO_ICMPV6)) {
ret = icmp6_host_handle(ctx);
if (ret == SKIP_HOST_FIREWALL)
goto skip_host_firewall;
if (IS_ERR(ret))
return ret;
hdrlen = ipv6_hdrlen(ctx, &nexthdr);
if (hdrlen < 0)
return hdrlen;

if (likely(nexthdr == IPPROTO_ICMPV6)) {
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen, !from_host);
if (ret == SKIP_HOST_FIREWALL)
goto skip_host_firewall;
if (IS_ERR(ret))
return ret;
}
}
#endif /* ENABLE_HOST_FIREWALL */

#ifdef ENABLE_NODEPORT
if (!from_host) {
Expand Down Expand Up @@ -214,8 +214,10 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx __maybe_unused,
if (map_update_elem(&CT_TAIL_CALL_BUFFER6, &zero, &ct_buffer, 0) < 0)
return DROP_INVALID_TC_BUFFER;
}
#endif /* ENABLE_HOST_FIREWALL */

skip_host_firewall:
#ifdef ENABLE_HOST_FIREWALL
ctx_store_meta(ctx, CB_FROM_HOST,
(need_hostfw ? FROM_HOST_FLAG_NEED_HOSTFW : 0) |
(is_host_id ? FROM_HOST_FLAG_HOST_ID : 0));
Expand Down Expand Up @@ -473,7 +475,7 @@ handle_to_netdev_ipv6(struct __ctx_buff *ctx, struct trace_ctx *trace, __s8 *ext
return hdrlen;

if (likely(nexthdr == IPPROTO_ICMPV6)) {
ret = icmp6_host_handle(ctx);
ret = icmp6_host_handle(ctx, ETH_HLEN + hdrlen, false);
if (ret == SKIP_HOST_FIREWALL)
return CTX_ACT_OK;
if (IS_ERR(ret))
Expand Down
56 changes: 40 additions & 16 deletions bpf/lib/icmp6.h
Expand Up @@ -11,7 +11,7 @@
#include "drop.h"
#include "eps.h"

#define ICMP6_TYPE_OFFSET (sizeof(struct ipv6hdr) + offsetof(struct icmp6hdr, icmp6_type))
#define ICMP6_TYPE_OFFSET offsetof(struct icmp6hdr, icmp6_type)
#define ICMP6_CSUM_OFFSET (sizeof(struct ipv6hdr) + offsetof(struct icmp6hdr, icmp6_cksum))
#define ICMP6_ND_TARGET_OFFSET (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr))
#define ICMP6_ND_OPTS (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + sizeof(struct in6_addr))
Expand Down Expand Up @@ -40,12 +40,9 @@
#define ACTION_UNKNOWN_ICMP6_NS DROP_UNKNOWN_TARGET
#endif

static __always_inline __u8 icmp6_load_type(struct __ctx_buff *ctx, int nh_off)
static __always_inline int icmp6_load_type(struct __ctx_buff *ctx, int l4_off, __u8 *type)
{
__u8 type;

ctx_load_bytes(ctx, nh_off + ICMP6_TYPE_OFFSET, &type, sizeof(type));
return type;
return ctx_load_bytes(ctx, l4_off + ICMP6_TYPE_OFFSET, type, sizeof(*type));
}

static __always_inline int icmp6_send_reply(struct __ctx_buff *ctx, int nh_off)
Expand Down Expand Up @@ -305,6 +302,7 @@ static __always_inline int __icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off)
{
union v6addr target, router;
struct endpoint_info *ep;
union macaddr router_mac = NODE_MAC;

if (ctx_load_bytes(ctx, nh_off + ICMP6_ND_TARGET_OFFSET, target.addr,
sizeof(((struct ipv6hdr *)NULL)->saddr)) < 0)
Expand All @@ -315,15 +313,27 @@ static __always_inline int __icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off)
BPF_V6(router, ROUTER_IP);

if (ipv6_addr_equals(&target, &router)) {
union macaddr router_mac = NODE_MAC;

return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, true);
}

ep = __lookup_ip6_endpoint(&target);
if (ep) {
union macaddr router_mac = NODE_MAC;

if (ep->flags & ENDPOINT_F_HOST) {
/* Target must be a node_ip, because of ENDPOINT_F_HOST flag
* and target != router_ip.
*
* We pass these packets to stack to make sure:
*
* 1. The response NA has node IP as source address instead of
* router IP, to address https://github.com/cilium/cilium/issues/14509.
*
* 2. Kernel stack can record a neighbor entry for the
* source IP, to avoid bpf_fib_lookup failure as mentioned at
* https://github.com/cilium/cilium/pull/30837#issuecomment-1960897445.
*/
return CTX_ACT_OK;
}
return send_icmp6_ndisc_adv(ctx, nh_off, &router_mac, false);
}

Expand Down Expand Up @@ -370,7 +380,10 @@ static __always_inline int icmp6_handle_ns(struct __ctx_buff *ctx, int nh_off,
static __always_inline bool
is_icmp6_ndp(struct __ctx_buff *ctx, const struct ipv6hdr *ip6, int nh_off)
{
__u8 type = icmp6_load_type(ctx, nh_off);
__u8 type;

if (icmp6_load_type(ctx, nh_off + sizeof(struct ipv6hdr), &type) < 0)
return false;

return ip6->nexthdr == IPPROTO_ICMPV6 &&
(type == ICMP6_NS_MSG_TYPE || type == ICMP6_NA_MSG_TYPE);
Expand All @@ -379,9 +392,12 @@ is_icmp6_ndp(struct __ctx_buff *ctx, const struct ipv6hdr *ip6, int nh_off)
static __always_inline int icmp6_ndp_handle(struct __ctx_buff *ctx, int nh_off,
enum metric_dir direction)
{
__u8 type = icmp6_load_type(ctx, nh_off);
cilium_dbg(ctx, DBG_ICMP6_HANDLE, type, 0);
__u8 type;

if (icmp6_load_type(ctx, nh_off + sizeof(struct ipv6hdr), &type) < 0)
return DROP_INVALID;

cilium_dbg(ctx, DBG_ICMP6_HANDLE, type, 0);
if (type == ICMP6_NS_MSG_TYPE)
return icmp6_handle_ns(ctx, nh_off, direction);

Expand All @@ -392,12 +408,17 @@ static __always_inline int icmp6_ndp_handle(struct __ctx_buff *ctx, int nh_off,
}

static __always_inline int
icmp6_host_handle(struct __ctx_buff *ctx __maybe_unused)
icmp6_host_handle(struct __ctx_buff *ctx, int l4_off, bool handle_ns)
{
__u8 type __maybe_unused;
__u8 type;

if (icmp6_load_type(ctx, l4_off, &type) < 0)
return DROP_INVALID;

type = icmp6_load_type(ctx, ETH_HLEN);
if (type == ICMP6_NS_MSG_TYPE && handle_ns)
return icmp6_handle_ns(ctx, ETH_HLEN, METRIC_INGRESS);

#ifdef ENABLE_HOST_FIREWALL
/* When the host firewall is enabled, we drop and allow ICMPv6 messages
* according to RFC4890, except for echo request and reply messages which
* are handled by host policies and can be dropped.
Expand All @@ -417,7 +438,7 @@ icmp6_host_handle(struct __ctx_buff *ctx __maybe_unused)
* | ICMPv6-mult-list-done | CTX_ACT_OK | 132 |
* | ICMPv6-router-solici | CTX_ACT_OK | 133 |
* | ICMPv6-router-advert | CTX_ACT_OK | 134 |
* | ICMPv6-neighbor-solicit | CTX_ACT_OK | 135 |
* | ICMPv6-neighbor-solicit | icmp6_handle_ns | 135 |
* | ICMPv6-neighbor-advert | CTX_ACT_OK | 136 |
* | ICMPv6-redirect-message | CTX_ACT_DROP | 137 |
* | ICMPv6-router-renumber | CTX_ACT_OK | 138 |
Expand Down Expand Up @@ -459,6 +480,9 @@ icmp6_host_handle(struct __ctx_buff *ctx __maybe_unused)
(ICMP6_MULT_RA_MSG_TYPE <= type && type <= ICMP6_MULT_RT_MSG_TYPE))
return SKIP_HOST_FIREWALL;
return DROP_FORBIDDEN_ICMP6;
#else
return CTX_ACT_OK;
#endif /* ENABLE_HOST_FIREWALL */
}

#endif
4 changes: 2 additions & 2 deletions bpf/lib/nat.h
Expand Up @@ -1590,7 +1590,7 @@ static __always_inline int snat_v6_rewrite_ingress(struct __ctx_buff *ctx,
__u8 type = 0;
__be32 from, to;

if (ctx_load_bytes(ctx, off, &type, 1) < 0)
if (icmp6_load_type(ctx, off, &type) < 0)
return DROP_INVALID;
if (type == ICMPV6_ECHO_REQUEST || type == ICMPV6_ECHO_REPLY) {
if (ctx_store_bytes(ctx, off +
Expand Down Expand Up @@ -1955,7 +1955,7 @@ snat_v6_rev_nat_handle_icmp_pkt_toobig(struct __ctx_buff *ctx, __u32 off)
/* No reasons to see a packet different than
* ICMPV6_ECHO_REQUEST.
*/
if (ctx_load_bytes(ctx, icmpoff, &type, sizeof(type)) < 0 ||
if (icmp6_load_type(ctx, icmpoff, &type) < 0 ||
type != ICMPV6_ECHO_REQUEST)
return DROP_INVALID;
if (ctx_load_bytes(ctx, icmpoff +
Expand Down
9 changes: 7 additions & 2 deletions bpf/lib/wireguard.h
Expand Up @@ -22,7 +22,6 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
__u16 proto = 0;
struct ipv6hdr __maybe_unused *ip6;
struct iphdr __maybe_unused *ip4;
__u8 __maybe_unused icmp_type = 0;
bool from_tunnel __maybe_unused = false;

if (!validate_ethertype(ctx, &proto))
Expand All @@ -40,10 +39,16 @@ wg_maybe_redirect_to_encrypt(struct __ctx_buff *ctx)
* NA should not be sent over WG.
*/
if (ip6->nexthdr == IPPROTO_ICMPV6) {
__u8 icmp_type;
julianwiedmann marked this conversation as resolved.
Show resolved Hide resolved

if (data + sizeof(*ip6) + ETH_HLEN +
sizeof(struct icmp6hdr) > data_end)
return DROP_INVALID;
icmp_type = icmp6_load_type(ctx, ETH_HLEN);

if (icmp6_load_type(ctx, ETH_HLEN + sizeof(struct ipv6hdr),
&icmp_type) < 0)
return DROP_INVALID;

if (icmp_type == ICMP6_NA_MSG_TYPE)
goto out;
}
Expand Down