From a4755fb34999625e28b84c12ec79f2b1cb0c0bd9 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 12 Jun 2020 15:12:14 +0200 Subject: [PATCH] bpf: Hande icmpv6 in host firewall In IPv6 mode, when the host firewall is enabled and rules are enforced, we start dropping ICMPv6 packets that are required to route packets. In particular, we can notice the following drops in cilium monitor: xx drop (Policy denied) flow 0x1fc0ef0 to endpoint 0, identity 1->0: fd00::c -> f00d::a0f:0:0:dfa1 DestinationUnreachable(NoRouteToDst) xx drop (Policy denied) flow 0x0 to endpoint 0, identity 0->0: fd01::c -> fd01::b NeighborAdvertisement The nodes need to be able to exchange ICMPv6 NS and NA messages to establish routes. We already handle the response to NS messages on ingress, but when the egress policies are enforced, we start dropping outgoing NS and NA messages. This commit fixes that by allowing and rejecting ICMPv6 messages according to RFC4890 Section 4.4. No other verifications than the types' are performed on the messages' correctness or their source IP addresses. Such messages from the pods are already handled on their egress, so we're not at risk of spoofing from pods here. Handling of echo request and reply messages does not conform to RFC4890 as they can be filtered by the host firewall. That is to be consistent with our handling of ICMPv4 messages. With this commit, we also stop answering to NS and echo request messages from the BPF program on ingress to the host. This behavior had been broken by a695f53 ("Endpoint for host"), but we will now explicitly stop replying to those messages and pass them up the stack instead. Fixes: a695f53 ("Endpoint for host") Fixes: 489dbef ("bpf: Enforce host policies for IPv6") Signed-off-by: Paul Chaignon --- bpf/bpf_host.c | 59 ++++++++++++++++++++-------- bpf/init.sh | 1 - bpf/lib/common.h | 1 + bpf/lib/icmp6.h | 85 ++++++++++++++++++++++++++++++++++++++++- bpf/lib/nat.h | 4 +- bpf/netdev_config.h | 1 - pkg/monitor/api/drop.go | 1 + 7 files changed, 132 insertions(+), 20 deletions(-) diff --git a/bpf/bpf_host.c b/bpf/bpf_host.c index 67f0efe068ab..a42d16a3c494 100644 --- a/bpf/bpf_host.c +++ b/bpf/bpf_host.c @@ -329,11 +329,10 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host) void *data, *data_end; struct ipv6hdr *ip6; union v6addr *dst; - int ret, l3_off = ETH_HLEN, hdrlen; __u32 __maybe_unused remoteID = WORLD_ID; + int ret, l3_off = ETH_HLEN, hdrlen; bool skip_redirect = false; struct endpoint_info *ep; - __u8 nexthdr; if (!revalidate_data(ctx, &data, &data_end, &ip6)) return DROP_INVALID; @@ -360,18 +359,18 @@ handle_ipv6(struct __ctx_buff *ctx, __u32 secctx, const bool from_host) } #endif /* ENABLE_NODEPORT */ - nexthdr = ip6->nexthdr; - hdrlen = ipv6_hdrlen(ctx, l3_off, &nexthdr); - if (hdrlen < 0) - return hdrlen; + if (!skip_redirect) { + __u8 nexthdr = ip6->nexthdr; + hdrlen = ipv6_hdrlen(ctx, ETH_HLEN, &nexthdr); + if (hdrlen < 0) + return hdrlen; -#ifdef HANDLE_NS - if (!skip_redirect && unlikely(nexthdr == IPPROTO_ICMPV6)) { - ret = icmp6_handle(ctx, ETH_HLEN, ip6, METRIC_INGRESS); - if (IS_ERR(ret)) - return ret; + if (likely(nexthdr == IPPROTO_ICMPV6)) { + ret = icmp6_host_handle(ctx); + if (IS_ERR(ret)) + return ret; + } } -#endif if (from_host && !skip_redirect) { /* If we are attached to cilium_host at egress, this will @@ -491,6 +490,37 @@ int tail_handle_ipv6_from_netdev(struct __ctx_buff *ctx) { return tail_handle_ipv6(ctx, false); } + +# ifdef ENABLE_HOST_FIREWALL +static __always_inline int +handle_to_netdev_ipv6(struct __ctx_buff *ctx) +{ + void *data, *data_end; + struct ipv6hdr *ip6; + int hdrlen, ret; + __u32 srcID = 0; + __u8 nexthdr; + + if (!revalidate_data(ctx, &data, &data_end, &ip6)) + return DROP_INVALID; + + nexthdr = ip6->nexthdr; + hdrlen = ipv6_hdrlen(ctx, ETH_HLEN, &nexthdr); + if (hdrlen < 0) + return hdrlen; + + if (likely(nexthdr == IPPROTO_ICMPV6)) { + ret = icmp6_host_handle(ctx); + if (IS_ERR(ret)) + return ret; + } + + /* to-netdev is attached to the egress path of the native + * device. */ + srcID = ipcache_lookup_srcid6(ctx); + return ipv6_host_policy_egress(ctx, srcID); +} +# endif #endif /* ENABLE_IPV6 */ #ifdef ENABLE_IPV4 @@ -1223,10 +1253,7 @@ int to_netdev(struct __ctx_buff *ctx __maybe_unused) # endif # ifdef ENABLE_IPV6 case bpf_htons(ETH_P_IPV6): - /* to-netdev is attached to the egress path of the native - * device. */ - srcID = ipcache_lookup_srcid6(ctx); - ret = ipv6_host_policy_egress(ctx, srcID); + ret = handle_to_netdev_ipv6(ctx); break; # endif # ifdef ENABLE_IPV4 diff --git a/bpf/init.sh b/bpf/init.sh index eef645f7a8b9..59bce580c172 100755 --- a/bpf/init.sh +++ b/bpf/init.sh @@ -281,7 +281,6 @@ function bpf_compile() -I. -I$DIR -I$LIB -I$LIB/include \ -D__NR_CPUS__=$NR_CPUS \ -DENABLE_ARP_RESPONDER=1 \ - -DHANDLE_NS=1 \ $EXTRA_OPTS \ -c $LIB/$IN -o - | \ llc -march=bpf -mcpu=$MCPU -mattr=dwarfris -filetype=$TYPE -o $OUT diff --git a/bpf/lib/common.h b/bpf/lib/common.h index 61b3171d9664..5e2646842006 100644 --- a/bpf/lib/common.h +++ b/bpf/lib/common.h @@ -349,6 +349,7 @@ enum { #define DROP_NAT_NOT_NEEDED -173 /* Mapped as drop code, though drop not necessary. */ #define DROP_IS_CLUSTER_IP -174 #define DROP_FRAG_NOT_FOUND -175 +#define DROP_FORBIDDEN_ICMP6 -176 #define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED diff --git a/bpf/lib/icmp6.h b/bpf/lib/icmp6.h index 0e7f7fa646d0..3446b727ebb6 100644 --- a/bpf/lib/icmp6.h +++ b/bpf/lib/icmp6.h @@ -15,6 +15,21 @@ #define ICMP6_ND_TARGET_OFFSET (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)) #define ICMP6_ND_OPTS (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) + sizeof(struct in6_addr)) +#define ICMP6_UNREACH_MSG_TYPE 1 +#define ICMP6_PARAM_ERR_MSG_TYPE 4 +#define ICMP6_ECHO_REQUEST_MSG_TYPE 128 +#define ICMP6_ECHO_REPLY_MSG_TYPE 129 +#define ICMP6_MULT_LIST_QUERY_TYPE 130 +#define ICMP6_NS_MSG_TYPE 135 +#define ICMP6_NA_MSG_TYPE 136 +#define ICMP6_RR_MSG_TYPE 138 +#define ICMP6_INV_NS_MSG_TYPE 141 +#define ICMP6_MULT_LIST_REPORT_V2_TYPE 143 +#define ICMP6_SEND_NS_MSG_TYPE 148 +#define ICMP6_SEND_NA_MSG_TYPE 149 +#define ICMP6_MULT_RA_MSG_TYPE 151 +#define ICMP6_MULT_RT_MSG_TYPE 153 + /* If not specific action is specified, drop unknown neighbour solication * messages */ #ifndef ACTION_UNKNOWN_ICMP6_NS @@ -392,7 +407,7 @@ static __always_inline int icmp6_handle(struct __ctx_buff *ctx, int nh_off, BPF_V6(router_ip, ROUTER_IP); switch(type) { - case 135: + case ICMP6_NS_MSG_TYPE: return icmp6_handle_ns(ctx, nh_off, direction); case ICMPV6_ECHO_REQUEST: if (!ipv6_addrcmp((union v6addr *) &ip6->daddr, &router_ip)) @@ -406,4 +421,72 @@ static __always_inline int icmp6_handle(struct __ctx_buff *ctx, int nh_off, return 0; } +static __always_inline int +icmp6_host_handle(struct __ctx_buff *ctx __maybe_unused) +{ + __u8 type __maybe_unused; + +#ifdef ENABLE_HOST_FIREWALL + /* When the host firewall is enabled, we drop and allow ICMPv6 messages + * according to RFC4890, except for echo request and reply messages which + * are handled by host policies and can be dropped. + * | ICMPv6 Message | Action | Type | + * |---------------------------------|--------------|------| + * | ICMPv6-unreach | CTX_ACT_OK | 1 | + * | ICMPv6-too-big | CTX_ACT_OK | 2 | + * | ICMPv6-timed | CTX_ACT_OK | 3 | + * | ICMPv6-parameter | CTX_ACT_OK | 4 | + * | ICMPv6-err-private-exp-100 | CTX_ACT_DROP | 100 | + * | ICMPv6-err-private-exp-101 | CTX_ACT_DROP | 101 | + * | ICMPv6-err-expansion | CTX_ACT_DROP | 127 | + * | ICMPv6-echo-message | Firewall | 128 | + * | ICMPv6-echo-reply | Firewall | 129 | + * | ICMPv6-mult-list-query | CTX_ACT_OK | 130 | + * | ICMPv6-mult-list-report | CTX_ACT_OK | 131 | + * | ICMPv6-mult-list-done | CTX_ACT_OK | 132 | + * | ICMPv6-router-solici | CTX_ACT_OK | 133 | + * | ICMPv6-router-advert | CTX_ACT_OK | 134 | + * | ICMPv6-neighbor-solicit | CTX_ACT_OK | 135 | + * | ICMPv6-neighbor-advert | CTX_ACT_OK | 136 | + * | ICMPv6-redirect-message | CTX_ACT_DROP | 137 | + * | ICMPv6-router-renumber | CTX_ACT_OK | 138 | + * | ICMPv6-node-info-query | CTX_ACT_DROP | 139 | + * | ICMPv6-node-info-response | CTX_ACT_DROP | 140 | + * | ICMPv6-inv-neighbor-solicit | CTX_ACT_OK | 141 | + * | ICMPv6-inv-neighbor-advert | CTX_ACT_OK | 142 | + * | ICMPv6-mult-list-report-v2 | CTX_ACT_OK | 143 | + * | ICMPv6-home-agent-disco-request | CTX_ACT_DROP | 144 | + * | ICMPv6-home-agent-disco-reply | CTX_ACT_DROP | 145 | + * | ICMPv6-mobile-solicit | CTX_ACT_DROP | 146 | + * | ICMPv6-mobile-advert | CTX_ACT_DROP | 147 | + * | ICMPv6-send-solicit | CTX_ACT_OK | 148 | + * | ICMPv6-send-advert | CTX_ACT_OK | 149 | + * | ICMPv6-mobile-exp | CTX_ACT_DROP | 150 | + * | ICMPv6-mult-router-advert | CTX_ACT_OK | 151 | + * | ICMPv6-mult-router-solicit | CTX_ACT_OK | 152 | + * | ICMPv6-mult-router-term | CTX_ACT_OK | 153 | + * | ICMPv6-FMIPv6 | CTX_ACT_DROP | 154 | + * | ICMPv6-rpl-control | CTX_ACT_DROP | 155 | + * | ICMPv6-info-private-exp-200 | CTX_ACT_DROP | 200 | + * | ICMPv6-info-private-exp-201 | CTX_ACT_DROP | 201 | + * | ICMPv6-info-expansion | CTX_ACT_DROP | 255 | + * | ICMPv6-unallocated | CTX_ACT_DROP | | + * | ICMPv6-unassigned | CTX_ACT_DROP | | + */ + type = icmp6_load_type(ctx, ETH_HLEN); + if (type == ICMP6_ECHO_REQUEST_MSG_TYPE || type == ICMP6_ECHO_REPLY_MSG_TYPE) + return CTX_ACT_OK; + + if ((ICMP6_UNREACH_MSG_TYPE <= type && type <= ICMP6_PARAM_ERR_MSG_TYPE) || + (ICMP6_MULT_LIST_QUERY_TYPE <= type && type <= ICMP6_NA_MSG_TYPE) || + (ICMP6_INV_NS_MSG_TYPE <= type && type <= ICMP6_MULT_LIST_REPORT_V2_TYPE) || + (ICMP6_SEND_NS_MSG_TYPE <= type && type <= ICMP6_SEND_NA_MSG_TYPE) || + (ICMP6_MULT_RA_MSG_TYPE <= type && type <= ICMP6_MULT_RT_MSG_TYPE)) + return CTX_ACT_OK; + return DROP_FORBIDDEN_ICMP6; +#else + return CTX_ACT_OK; +#endif /* ENABLE_HOST_FIREWALL */ +} + #endif diff --git a/bpf/lib/nat.h b/bpf/lib/nat.h index 86e3afe9f4f0..6c1c23f959fd 100644 --- a/bpf/lib/nat.h +++ b/bpf/lib/nat.h @@ -17,6 +17,7 @@ #include "signal.h" #include "conntrack.h" #include "conntrack_map.h" +#include "icmp6.h" enum { NAT_DIR_EGRESS = TUPLE_F_OUT, @@ -1000,7 +1001,8 @@ static __always_inline __maybe_unused int snat_v6_process(struct __ctx_buff *ctx if (ctx_load_bytes(ctx, off, &icmp6hdr, sizeof(icmp6hdr)) < 0) return DROP_INVALID; /* Letting neighbor solicitation / advertisement pass through. */ - if (icmp6hdr.icmp6_type == 135 || icmp6hdr.icmp6_type == 136) + if (icmp6hdr.icmp6_type == ICMP6_NS_MSG_TYPE || + icmp6hdr.icmp6_type == ICMP6_NA_MSG_TYPE) return CTX_ACT_OK; if (icmp6hdr.icmp6_type != ICMPV6_ECHO_REQUEST && icmp6hdr.icmp6_type != ICMPV6_ECHO_REPLY) diff --git a/bpf/netdev_config.h b/bpf/netdev_config.h index cec5ddb90a9d..58216a6d0790 100644 --- a/bpf/netdev_config.h +++ b/bpf/netdev_config.h @@ -9,7 +9,6 @@ #ifndef SKIP_DEBUG #define DEBUG #endif -#define HANDLE_NS #define ENCAP_IFINDEX 1 #define SECLABEL 2 #define SECLABEL_NB 0xfffff diff --git a/pkg/monitor/api/drop.go b/pkg/monitor/api/drop.go index 4165f93d51ec..ea676c0c4875 100644 --- a/pkg/monitor/api/drop.go +++ b/pkg/monitor/api/drop.go @@ -77,6 +77,7 @@ var errors = map[uint8]string{ 173: "NAT not needed", 174: "Is a ClusterIP", 175: "First logical datagram fragment not found", + 176: "Forbidden ICMPv6 message", } // DropReason prints the drop reason in a human readable string