Skip to content

Commit

Permalink
bpf: lift multi dev restriction on fast redirect
Browse files Browse the repository at this point in the history
Refactor current redirect_neigh() code into redirect_direct_{v4,v6}() and
add multi-device support. The latter performs a route lookup and only calls
into redirect_neigh() if L2 addresses must be resolved. It also passes the
GW information from the fib_lookup() to redirect_neigh() to avoid a second
lookup for the latter. This now enabled to use the fast redirect in the more
general case.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
borkmann committed Nov 17, 2020
1 parent 8b0c892 commit 92edccf
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 16 deletions.
4 changes: 3 additions & 1 deletion bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ LXC_OPTIONS = \
-DENABLE_HOST_REDIRECT:-DENABLE_IPV4:-DENABLE_IPV6:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NAT46 \
-DENABLE_NODEPORT:-DENABLE_IPV4:-DENABLE_IPV6:-DPOLICY_VERDICT_NOTIFY \
-DENABLE_NODEPORT:-DENABLE_DSR:-DENABLE_IPV4:-DENABLE_IPV6:-DPOLICY_VERDICT_NOTIFY \
-DENABLE_IPV4:-DENABLE_IPV4:-DPOLICY_VERDICT_NOTIFY:-DUSE_BPF_PROG_FOR_INGRESS_POLICY
-DENABLE_IPV4:-DENABLE_IPV6:-DPOLICY_VERDICT_NOTIFY:-DUSE_BPF_PROG_FOR_INGRESS_POLICY \
-DENABLE_IPV4:-DENABLE_IPV6:-DHAVE_LPM_TRIE_MAP_TYPE:-DHAVE_LRU_HASH_MAP_TYPE:-DENABLE_TPROXY:-DENABLE_REDIRECT_FAST \
-DENABLE_IPV4:-DENABLE_IPV6:-DHAVE_LPM_TRIE_MAP_TYPE:-DHAVE_LRU_HASH_MAP_TYPE:-DENABLE_TPROXY:-DENABLE_REDIRECT_FAST:-DENABLE_SKIP_FIB

# These options are intended to max out the BPF program complexity. it is load
# tested as well.
Expand Down
19 changes: 7 additions & 12 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "lib/encap.h"
#include "lib/eps.h"
#include "lib/nat.h"
#include "lib/fib.h"
#include "lib/nodeport.h"
#include "lib/policy_log.h"

Expand Down Expand Up @@ -327,12 +328,9 @@ static __always_inline int ipv6_l3_from_lxc(struct __ctx_buff *ctx,
return DROP_MISSED_TAIL_CALL;
}
#endif
if (is_defined(ENABLE_REDIRECT_FAST)) {
ret = ipv6_l3(ctx, l3_off, NULL, NULL, METRIC_EGRESS);
if (unlikely(ret != CTX_ACT_OK))
return ret;
return redirect_neigh(DIRECT_ROUTING_DEV_IFINDEX, NULL, 0, 0);
}
if (is_defined(ENABLE_REDIRECT_FAST))
return redirect_direct_v6(ctx, l3_off, ip6);

goto pass_to_stack;

#ifdef ENABLE_ROUTING
Expand Down Expand Up @@ -703,12 +701,9 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
return ret;
}
#endif
if (is_defined(ENABLE_REDIRECT_FAST)) {
ret = ipv4_l3(ctx, l3_off, NULL, NULL, ip4);
if (unlikely(ret != CTX_ACT_OK))
return ret;
return redirect_neigh(DIRECT_ROUTING_DEV_IFINDEX, NULL, 0, 0);
}
if (is_defined(ENABLE_REDIRECT_FAST))
return redirect_direct_v4(ctx, l3_off, ip4);

goto pass_to_stack;

#ifdef ENABLE_ROUTING
Expand Down
129 changes: 129 additions & 0 deletions bpf/lib/fib.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2020 Authors of Cilium */

#ifndef __LIB_FIB_H_
#define __LIB_FIB_H_

#include <bpf/ctx/ctx.h>
#include <bpf/api.h>

#include "common.h"
#include "l3.h"

#ifdef ENABLE_IPV6
static __always_inline int
redirect_direct_v6(struct __ctx_buff *ctx __maybe_unused,
int l3_off __maybe_unused,
struct ipv6hdr *ip6 __maybe_unused)
{
bool no_neigh = is_defined(ENABLE_SKIP_FIB);
int ret, oif = DIRECT_ROUTING_DEV_IFINDEX;
struct bpf_redir_neigh *nh = NULL;
# ifndef ENABLE_SKIP_FIB
struct bpf_redir_neigh nh_params;
struct bpf_fib_lookup fib_params = {
.family = AF_INET6,
.ifindex = ctx->ingress_ifindex,
};

ipv6_addr_copy((union v6addr *)&fib_params.ipv6_src,
(union v6addr *)&ip6->saddr);
ipv6_addr_copy((union v6addr *)&fib_params.ipv6_dst,
(union v6addr *)&ip6->daddr);

ret = fib_lookup(ctx, &fib_params, sizeof(fib_params),
BPF_FIB_LOOKUP_DIRECT);
switch (ret) {
case BPF_FIB_LKUP_RET_SUCCESS:
break;
case BPF_FIB_LKUP_RET_NO_NEIGH:
nh_params.nh_family = fib_params.family;
__bpf_memcpy_builtin(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
sizeof(nh_params.ipv6_nh));
no_neigh = true;
nh = &nh_params;
break;
default:
return CTX_ACT_DROP;
}

oif = fib_params.ifindex;
# endif /* ENABLE_SKIP_FIB */

ret = ipv6_l3(ctx, l3_off, NULL, NULL, METRIC_EGRESS);
if (unlikely(ret != CTX_ACT_OK))
return ret;
if (no_neigh)
return redirect_neigh(oif, nh, nh ? sizeof(*nh) : 0, 0);
# ifndef ENABLE_SKIP_FIB
if (eth_store_daddr(ctx, fib_params.dmac, 0) < 0)
return CTX_ACT_DROP;
if (eth_store_saddr(ctx, fib_params.smac, 0) < 0)
return CTX_ACT_DROP;
return redirect(oif, 0);
# endif /* ENABLE_SKIP_FIB */
return CTX_ACT_DROP;
}
#endif /* ENABLE_IPV6 */

#ifdef ENABLE_IPV4
static __always_inline int
redirect_direct_v4(struct __ctx_buff *ctx __maybe_unused,
int l3_off __maybe_unused,
struct iphdr *ip4 __maybe_unused)
{
/* For deployments with just single external dev, redirect_neigh()
* will resolve the GW and do L2 resolution for us. For multi-device
* deployments we perform a FIB lookup prior to the redirect. If the
* neigh entry cannot be resolved, we ask redirect_neigh() to do it,
* otherwise we can directly call redirect().
*/
bool no_neigh = is_defined(ENABLE_SKIP_FIB);
int ret, oif = DIRECT_ROUTING_DEV_IFINDEX;
struct bpf_redir_neigh *nh = NULL;
# ifndef ENABLE_SKIP_FIB
struct bpf_redir_neigh nh_params;
struct bpf_fib_lookup fib_params = {
.family = AF_INET,
.ifindex = ctx->ingress_ifindex,
.ipv4_src = ip4->saddr,
.ipv4_dst = ip4->daddr,
};

ret = fib_lookup(ctx, &fib_params, sizeof(fib_params),
BPF_FIB_LOOKUP_DIRECT);
switch (ret) {
case BPF_FIB_LKUP_RET_SUCCESS:
break;
case BPF_FIB_LKUP_RET_NO_NEIGH:
/* GW could also be v6, so copy union. */
nh_params.nh_family = fib_params.family;
__bpf_memcpy_builtin(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
sizeof(nh_params.ipv6_nh));
no_neigh = true;
nh = &nh_params;
break;
default:
return CTX_ACT_DROP;
}

oif = fib_params.ifindex;
# endif /* ENABLE_SKIP_FIB */

ret = ipv4_l3(ctx, l3_off, NULL, NULL, ip4);
if (unlikely(ret != CTX_ACT_OK))
return ret;
if (no_neigh)
return redirect_neigh(oif, nh, nh ? sizeof(*nh) : 0, 0);
# ifndef ENABLE_SKIP_FIB
if (eth_store_daddr(ctx, fib_params.dmac, 0) < 0)
return CTX_ACT_DROP;
if (eth_store_saddr(ctx, fib_params.smac, 0) < 0)
return CTX_ACT_DROP;
return redirect(oif, 0);
# endif /* ENABLE_SKIP_FIB */
return CTX_ACT_DROP;
}
#endif /* ENABLE_IPV4 */

#endif /* __LIB_FIB_H_ */
3 changes: 0 additions & 3 deletions daemon/cmd/kube_proxy_replacement.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,6 @@ func initKubeProxyReplacementOptions() (strict bool) {
fallthrough
case option.Config.Tunnel != option.TunnelDisabled:
fallthrough
// TODO: add multi-dev support
case len(option.Config.Devices) > 1:
fallthrough
// Non-BPF masquerade requires netfilter and hence CT.
case option.Config.Masquerade && !option.Config.EnableBPFMasquerade:
option.Config.EnableHostLegacyRouting = true
Expand Down
3 changes: 3 additions & 0 deletions pkg/datapath/linux/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,9 @@ func (h *HeaderfileWriter) writeTemplateConfig(fw *bufio.Writer, e datapath.Endp
return err
}
fmt.Fprintf(fw, "#define DIRECT_ROUTING_DEV_IFINDEX %d\n", directRoutingIfIndex)
if len(option.Config.Devices) == 1 {
fmt.Fprintf(fw, "#define ENABLE_SKIP_FIB 1\n")
}
}

if e.IsHost() {
Expand Down

0 comments on commit 92edccf

Please sign in to comment.