From f9957825c5ab272f0f445bd50f03b30739bc1f9e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 17 Apr 2020 15:41:28 +0200 Subject: [PATCH] bpf: Pass packets to host via stack instead of redirecting [ upstream commit 5f50d8230bf65e0da3f7289da701d6487d4ed03c ] Packets to a host IP are currently redirected via cilium_host/cilium_net. The reason for this is mostly historic. For other packets where routing by the kernel routing tables is desired, packets are already passed on via TC_ACT_OK to the stack directly. The two cases where this redirection is needed are: * For proxy redirection due to a kernel limitation on passing the routing tables multiple times. This case is left untouched. * For the HOST_REDIRECT_TO_INGRESS case, e.g. flannel integration. This case is left untouched. The IPv4 and IPv6 case is brought in line to not accidently lose this logic later on. A side effect of this is that the skb gets scrubbed including the skb->mark. The presence of the identity in the skb->mark is being relied on in a follow-up fix however. Therfore, pass packets via the stack using TC_ACT_OK. This is faster, simpler, and allows for the identity to be carried in the mark. Fixes: #9784 Signed-off-by: Thomas Graf Signed-off-by: Quentin Monnet --- bpf/bpf_lxc.c | 10 +++------- daemon/bpf.sha | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/bpf/bpf_lxc.c b/bpf/bpf_lxc.c index 135fb2c5f847..78a096905ce8 100644 --- a/bpf/bpf_lxc.c +++ b/bpf/bpf_lxc.c @@ -348,7 +348,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, #ifdef ENABLE_ROUTING to_host: - if (is_defined(ENABLE_HOST_REDIRECT)) { + if (is_defined(HOST_REDIRECT_TO_INGRESS)) { union macaddr host_mac = HOST_IFINDEX_MAC; ret = ipv6_l3(skb, l3_off, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, METRIC_EGRESS); @@ -359,7 +359,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb, HOST_IFINDEX, reason, monitor); cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX); - return redirect(HOST_IFINDEX, 0); + return redirect(HOST_IFINDEX, BPF_F_INGRESS); } #endif @@ -675,7 +675,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) #ifdef ENABLE_ROUTING to_host: - if (is_defined(ENABLE_HOST_REDIRECT)) { + if (is_defined(HOST_REDIRECT_TO_INGRESS)) { union macaddr host_mac = HOST_IFINDEX_MAC; ret = ipv4_l3(skb, l3_off, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, ip4); @@ -686,11 +686,7 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID) reason, monitor); cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX); -#ifdef HOST_REDIRECT_TO_INGRESS return redirect(HOST_IFINDEX, BPF_F_INGRESS); -#else - return redirect(HOST_IFINDEX, 0); -#endif } #endif diff --git a/daemon/bpf.sha b/daemon/bpf.sha index db0e6d6d06b5..e1c23bf4acf8 100644 --- a/daemon/bpf.sha +++ b/daemon/bpf.sha @@ -1,2 +1,2 @@ -GO_BINDATA_SHA1SUM=d03d8900221291eb9f1080707e28776960944277 +GO_BINDATA_SHA1SUM=09a05065f3000f3852a0fe6ee12071c8ee53b68d BPF_FILES=../bpf/COPYING ../bpf/Makefile ../bpf/Makefile.bpf ../bpf/bpf_alignchecker.c ../bpf/bpf_features.h ../bpf/bpf_hostdev_ingress.c ../bpf/bpf_ipsec.c ../bpf/bpf_lb.c ../bpf/bpf_lxc.c ../bpf/bpf_netdev.c ../bpf/bpf_network.c ../bpf/bpf_overlay.c ../bpf/bpf_sock.c ../bpf/bpf_xdp.c ../bpf/cilium-map-migrate.c ../bpf/filter_config.h ../bpf/include/bpf/api.h ../bpf/include/elf/elf.h ../bpf/include/elf/gelf.h ../bpf/include/elf/libelf.h ../bpf/include/iproute2/bpf_elf.h ../bpf/include/linux/bpf.h ../bpf/include/linux/bpf_common.h ../bpf/include/linux/byteorder.h ../bpf/include/linux/byteorder/big_endian.h ../bpf/include/linux/byteorder/little_endian.h ../bpf/include/linux/icmp.h ../bpf/include/linux/icmpv6.h ../bpf/include/linux/if_arp.h ../bpf/include/linux/if_ether.h ../bpf/include/linux/if_packet.h ../bpf/include/linux/in.h ../bpf/include/linux/in6.h ../bpf/include/linux/ioctl.h ../bpf/include/linux/ip.h ../bpf/include/linux/ipv6.h ../bpf/include/linux/perf_event.h ../bpf/include/linux/swab.h ../bpf/include/linux/tcp.h ../bpf/include/linux/type_mapper.h ../bpf/include/linux/udp.h ../bpf/init.sh ../bpf/lib/arp.h ../bpf/lib/common.h ../bpf/lib/config.h ../bpf/lib/conntrack.h ../bpf/lib/conntrack_map.h ../bpf/lib/csum.h ../bpf/lib/dbg.h ../bpf/lib/drop.h ../bpf/lib/encap.h ../bpf/lib/eps.h ../bpf/lib/eth.h ../bpf/lib/events.h ../bpf/lib/icmp6.h ../bpf/lib/ipv4.h ../bpf/lib/ipv6.h ../bpf/lib/l3.h ../bpf/lib/l4.h ../bpf/lib/lb.h ../bpf/lib/lxc.h ../bpf/lib/maps.h ../bpf/lib/metrics.h ../bpf/lib/nat.h ../bpf/lib/nat46.h ../bpf/lib/nodeport.h ../bpf/lib/policy.h ../bpf/lib/signal.h ../bpf/lib/tailcall.h ../bpf/lib/trace.h ../bpf/lib/utils.h ../bpf/lib/xdp.h ../bpf/lxc_config.h ../bpf/netdev_config.h ../bpf/node_config.h ../bpf/probes/raw_change_tail.t ../bpf/probes/raw_fib_lookup.t ../bpf/probes/raw_insn.h ../bpf/probes/raw_invalidate_hash.t ../bpf/probes/raw_lpm_map.t ../bpf/probes/raw_lru_map.t ../bpf/probes/raw_main.c ../bpf/probes/raw_map_val_adj.t ../bpf/probes/raw_mark_map_val.t ../bpf/probes/raw_max_insn.t ../bpf/probes/raw_sock_cookie.t ../bpf/run_probes.sh ../bpf/sockops/Makefile ../bpf/sockops/bpf_redir.c ../bpf/sockops/bpf_sockops.c ../bpf/sockops/bpf_sockops.h ../bpf/sockops/sockops_config.h