Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpf: SRv6 fib on encap #26136

Merged
merged 4 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 69 additions & 2 deletions bpf/lib/egress_policies.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#ifndef __LIB_EGRESS_POLICIES_H_
#define __LIB_EGRESS_POLICIES_H_

#include "lib/fib.h"
#include "lib/identity.h"

#include "maps.h"
Expand Down Expand Up @@ -579,25 +580,91 @@ srv6_store_meta_sid(struct __ctx_buff *ctx, const union v6addr *sid)
ctx_store_meta(ctx, CB_SRV6_SID_4, sid->p4);
}

#ifdef ENABLE_IPV6
/* SRv6 encapsulation occurs at the native-dev currently.
* Its possible that after encapsulation a fib entry exists which would actually
* route the IPv6 destination somewhere else.
*
* Therefore, this function performs an additional fib lookup on the encap'd
* packet to ensure we transmit it via the correct link and with the correct
* l2 addresses.
*/
static __always_inline int
srv6_refib(struct __ctx_buff *ctx, int *ext_err)
{
struct bpf_fib_lookup_padded params = {0};
__u32 old_oif = ctx_get_ifindex(ctx);
void *data, *data_end;
struct ipv6hdr *ip6;

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;

*ext_err = (__s8)fib_lookup_v6(ctx,
&params,
&ip6->saddr,
&ip6->daddr,
BPF_FIB_LOOKUP_OUTPUT);

switch (*ext_err) {
case BPF_FIB_LKUP_RET_SUCCESS:
/* We found an oif and ARP was successful.
* We may need to redirect to the appropriate oif, if not
* rewrite the layer 2 and continue processing.
*/
if (old_oif != params.l.ifindex)
return fib_do_redirect(ctx, true, &params,
(__s8 *)ext_err, (int *)&old_oif);

if (eth_store_daddr(ctx, params.l.dmac, 0) < 0)
return DROP_WRITE_ERROR;

julianwiedmann marked this conversation as resolved.
Show resolved Hide resolved
break;
case BPF_FIB_LKUP_RET_NO_NEIGH:
/* In this case, we found an oif, but ARP failed.
* We can't rule out that oif is a veth, in which ARP is not
* strictly necessary to deliver the packet, since the kernel
* can fill in the veth pair's dmac without it, we lets deliver
* or redirect.
*/
if (old_oif != params.l.ifindex)
return fib_do_redirect(ctx, true, &params,
(__s8 *)ext_err, (int *)&old_oif);
break;
default:
return DROP_NO_FIB;
};
return CTX_ACT_OK;
}
#endif /* ENABLE_IPV6 */

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_SRV6_ENCAP)
int tail_srv6_encap(struct __ctx_buff *ctx)
{
struct in6_addr dst_sid;
__u32 vrf_id;
int ret = 0;
int __maybe_unused ext_err = 0;

srv6_load_meta_sid(ctx, &dst_sid);
vrf_id = ctx_load_meta(ctx, CB_SRV6_VRF_ID);

ret = srv6_handling(ctx, vrf_id, &dst_sid);

if (ret < 0)
return send_drop_notify_error(ctx, SECLABEL, ret, CTX_ACT_DROP,
METRIC_EGRESS);

#ifdef ENABLE_IPV6
ret = srv6_refib(ctx, &ext_err);
if (ret < 0)
return send_drop_notify_ext(ctx, SECLABEL, 0, 0, ret, ext_err,
CTX_ACT_DROP, METRIC_EGRESS);
#endif

send_trace_notify(ctx, TRACE_TO_STACK, SECLABEL, 0, 0, 0,
TRACE_REASON_UNKNOWN, 0);
return CTX_ACT_OK;

return ret;
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_SRV6_DECAP)
Expand Down
175 changes: 167 additions & 8 deletions bpf/lib/fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,134 @@ static __always_inline bool fib_ok(int ret)
return likely(ret == CTX_ACT_TX || ret == CTX_ACT_REDIRECT);
}

/* fib_redirect() is common helper code which performs fib lookup, populates
* the corresponding hardware addresses and pushes the packet to a target
* device for the next hop. Calling fib_redirect_v{4,6} is preferred unless
* due to NAT46x64 struct bpf_fib_lookup_padded needs to be prepared at the
* callsite. oif must be 0 if otherwise not passed in from the BPF CT. The
* needs_l2_check must be true if the packet could transition between L2->L3
* or L3->L2 device.
*/
/* fib_do_redirect will redirect the ctx to a particular output interface.
*
* the redirect can occur with or without a previous call to fib_lookup.
*
* if a previous fib_lookup was performed, this function will attempt to redirect
* to the output interface in the provided 'fib_params', as long as 'fib_ret'
* is set to 'BPF_FIB_LKUP_RET_SUCCESS'
*
* if a previous fib_lookup was performed and the return was 'BPF_FIB_LKUP_NO_NEIGH'
* this function will then attempt to copy the af_family and destination address
* out of 'fib_params' and into 'redir_neigh' struct then perform a
* 'redirect_neigh'.
*
* if no previous fib_lookup was performed, and the desire is to simply use
* 'redirect_neigh' then set 'fib_params' to nil and 'fib_ret' to
* 'BPF_FIB_LKUP_RET_NO_NEIGH'.
* in this case, the 'oif' value will be used for the 'redirect_neigh' call.
*
* in a special case, if a previous fib_lookup was performed, and the return
* was 'BPF_FIB_LKUP_RET_NO_NEIGH', and we are on a kernel version where
* the target interface for the fib lookup is not returned
* (due to ARP failing, see Kernel commit d1c362e1dd68) the provided 'oif'
* will be used as output interface for redirect.
*/
static __always_inline int
fib_do_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
const struct bpf_fib_lookup_padded *fib_params, __s8 *fib_ret,
int *oif)
{
struct bpf_redir_neigh nh_params;
struct bpf_redir_neigh *nh = NULL;
union macaddr smac = NATIVE_DEV_MAC_BY_IFINDEX(*oif);
union macaddr *dmac = 0;
int ret;

/* sanity check, we only enter this function with these two fib lookup
* return codes.
*/
if (*fib_ret && (*fib_ret != BPF_FIB_LKUP_RET_NO_NEIGH))
return DROP_NO_FIB;

/* determine which oif to use before needs_l2_check determines if layer 2
* header needs to be pushed.
*/
if (fib_params) {
if (*fib_ret == BPF_FIB_LKUP_RET_NO_NEIGH &&
!is_defined(HAVE_FIB_IFINDEX) && *oif) {
/* For kernels without d1c362e1dd68 ("bpf: Always
* return target ifindex in bpf_fib_lookup") we
* fall back to use the caller-provided oif when
* necessary.
* no-op
*/
} else {
*oif = fib_params->l.ifindex;
}
}

/* determine if we need to append layer 2 header */
if (needs_l2_check) {
bool l2_hdr_required = true;

ret = maybe_add_l2_hdr(ctx, *oif, &l2_hdr_required);
if (ret != 0)
return ret;
if (!l2_hdr_required)
goto out_send;
}

/* determine if we are performing redirect or redirect_neigh*/
switch (*fib_ret) {
case BPF_FIB_LKUP_RET_SUCCESS:
if (eth_store_daddr(ctx, fib_params->l.dmac, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr(ctx, fib_params->l.smac, 0) < 0)
return DROP_WRITE_ERROR;
break;
case BPF_FIB_LKUP_RET_NO_NEIGH:
/* previous fib lookup was performed, we can fillout both
* a bpf_redir_neigh and a dmac.
*
* the former is used if we have access to redirect_neigh
* the latter is used if we don't and have to use the eBPF
* neighbor map.
*/
if (fib_params) {
nh_params.nh_family = fib_params->l.family;
__bpf_memcpy_builtin(&nh_params.ipv6_nh,
&fib_params->l.ipv6_dst,
sizeof(nh_params.ipv6_nh));
nh = &nh_params;

if (!neigh_resolver_available()) {
/* The neigh_record_ip{4,6} locations are mainly from
* inbound client traffic on the load-balancer where we
* know that replies need to go back to them.
*/
dmac = fib_params->l.family == AF_INET ?
neigh_lookup_ip4(&fib_params->l.ipv4_dst) :
neigh_lookup_ip6((void *)&fib_params->l.ipv6_dst);
}
}

/* If we are able to resolve neighbors on demand, always
* prefer that over the BPF neighbor map since the latter
* might be less accurate in some asymmetric corner cases.
*/
if (neigh_resolver_available()) {
if (nh)
return redirect_neigh(*oif, &nh_params,
sizeof(nh_params), 0);
else
return redirect_neigh(*oif, NULL, 0, 0);
} else {
if (!dmac) {
*fib_ret = BPF_FIB_MAP_NO_NEIGH;
return DROP_NO_FIB;
}
if (eth_store_daddr_aligned(ctx, dmac->addr, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr_aligned(ctx, smac.addr, 0) < 0)
return DROP_WRITE_ERROR;
}
};
out_send:
return ctx_redirect(ctx, *oif, 0);
}

static __always_inline int
fib_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
struct bpf_fib_lookup_padded *fib_params, __s8 *fib_err, int *oif)
Expand Down Expand Up @@ -134,6 +254,28 @@ fib_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
}

#ifdef ENABLE_IPV6
/* fib_lookup_v6 will perform a fib lookup with the src and dest addresses
* provided.
*
* after the function returns 'fib_params' will have the results of the fib lookup
* if successful.
*/
static __always_inline int
fib_lookup_v6(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst,
int flags)
{
fib_params->l.family = AF_INET6;
fib_params->l.ifindex = ctx_get_ifindex(ctx);

ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_src,
(union v6addr *)ipv6_src);
ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_dst,
(union v6addr *)ipv6_dst);

return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
};

static __always_inline int
fib_redirect_v6(struct __ctx_buff *ctx, int l3_off,
struct ipv6hdr *ip6, const bool needs_l2_check,
Expand Down Expand Up @@ -161,6 +303,23 @@ fib_redirect_v6(struct __ctx_buff *ctx, int l3_off,
#endif /* ENABLE_IPV6 */

#ifdef ENABLE_IPV4
/* fib_lookup_v4 will perform a fib lookup with the src and dest addresses
* provided.
*
* after the function returns 'fib_params' will have the results of the fib lookup
* if successful.
*/
static __always_inline int
fib_lookup_v4(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
__be32 ipv4_src, __be32 ipv4_dst, int flags) {
fib_params->l.family = AF_INET;
fib_params->l.ifindex = ctx_get_ifindex(ctx);
fib_params->l.ipv4_src = ipv4_src;
fib_params->l.ipv4_dst = ipv4_dst;

return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
}

static __always_inline int
fib_redirect_v4(struct __ctx_buff *ctx, int l3_off,
struct iphdr *ip4, const bool needs_l2_check,
Expand Down