Skip to content

Commit 4c79579

Browse files
dsahernborkmann
authored andcommitted
bpf: Change bpf_fib_lookup to return lookup status
For ACLs implemented using either FIB rules or FIB entries, the BPF program needs the FIB lookup status to be able to drop the packet. Since the bpf_fib_lookup API has not reached a released kernel yet, change the return code to contain an encoding of the FIB lookup result and return the nexthop device index in the params struct. In addition, inform the BPF program of any post FIB lookup reason as to why the packet needs to go up the stack. The fib result for unicast routes must have an egress device, so remove the check that it is non-NULL. Signed-off-by: David Ahern <dsahern@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
1 parent 3203c90 commit 4c79579

File tree

3 files changed

+81
-41
lines changed

3 files changed

+81
-41
lines changed

include/uapi/linux/bpf.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,7 +1857,8 @@ union bpf_attr {
18571857
* is resolved), the nexthop address is returned in ipv4_dst
18581858
* or ipv6_dst based on family, smac is set to mac address of
18591859
* egress device, dmac is set to nexthop mac address, rt_metric
1860-
* is set to metric from route (IPv4/IPv6 only).
1860+
* is set to metric from route (IPv4/IPv6 only), and ifindex
1861+
* is set to the device index of the nexthop from the FIB lookup.
18611862
*
18621863
* *plen* argument is the size of the passed in struct.
18631864
* *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
18731874
* *ctx* is either **struct xdp_md** for XDP programs or
18741875
* **struct sk_buff** tc cls_act programs.
18751876
* Return
1876-
* Egress device index on success, 0 if packet needs to continue
1877-
* up the stack for further processing or a negative error in case
1878-
* of failure.
1877+
* * < 0 if any input argument is invalid
1878+
* * 0 on success (packet is forwarded, nexthop neighbor exists)
1879+
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
1880+
* * packet is not forwarded or needs assist from full stack
18791881
*
18801882
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
18811883
* Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
26122614
#define BPF_FIB_LOOKUP_DIRECT BIT(0)
26132615
#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
26142616

2617+
enum {
2618+
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
2619+
BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
2620+
BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
2621+
BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
2622+
BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
2623+
BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
2624+
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
2625+
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
2626+
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
2627+
};
2628+
26152629
struct bpf_fib_lookup {
26162630
/* input: network family for lookup (AF_INET, AF_INET6)
26172631
* output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
26252639

26262640
/* total length of packet from network header - used for MTU check */
26272641
__u16 tot_len;
2628-
__u32 ifindex; /* L3 device index for lookup */
2642+
2643+
/* input: L3 device index for lookup
2644+
* output: device index from FIB lookup
2645+
*/
2646+
__u32 ifindex;
26292647

26302648
union {
26312649
/* inputs to lookup */

net/core/filter.c

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
40734073
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
40744074
params->h_vlan_TCI = 0;
40754075
params->h_vlan_proto = 0;
4076+
params->ifindex = dev->ifindex;
40764077

4077-
return dev->ifindex;
4078+
return 0;
40784079
}
40794080
#endif
40804081

@@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
40984099
/* verify forwarding is enabled on this interface */
40994100
in_dev = __in_dev_get_rcu(dev);
41004101
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4101-
return 0;
4102+
return BPF_FIB_LKUP_RET_FWD_DISABLED;
41024103

41034104
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
41044105
fl4.flowi4_iif = 1;
@@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
41234124

41244125
tb = fib_get_table(net, tbid);
41254126
if (unlikely(!tb))
4126-
return 0;
4127+
return BPF_FIB_LKUP_RET_NOT_FWDED;
41274128

41284129
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
41294130
} else {
@@ -4135,28 +4136,37 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
41354136
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
41364137
}
41374138

4138-
if (err || res.type != RTN_UNICAST)
4139-
return 0;
4139+
if (err) {
4140+
/* map fib lookup errors to RTN_ type */
4141+
if (err == -EINVAL)
4142+
return BPF_FIB_LKUP_RET_BLACKHOLE;
4143+
if (err == -EHOSTUNREACH)
4144+
return BPF_FIB_LKUP_RET_UNREACHABLE;
4145+
if (err == -EACCES)
4146+
return BPF_FIB_LKUP_RET_PROHIBIT;
4147+
4148+
return BPF_FIB_LKUP_RET_NOT_FWDED;
4149+
}
4150+
4151+
if (res.type != RTN_UNICAST)
4152+
return BPF_FIB_LKUP_RET_NOT_FWDED;
41404153

41414154
if (res.fi->fib_nhs > 1)
41424155
fib_select_path(net, &res, &fl4, NULL);
41434156

41444157
if (check_mtu) {
41454158
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
41464159
if (params->tot_len > mtu)
4147-
return 0;
4160+
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
41484161
}
41494162

41504163
nh = &res.fi->fib_nh[res.nh_sel];
41514164

41524165
/* do not handle lwt encaps right now */
41534166
if (nh->nh_lwtstate)
4154-
return 0;
4167+
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
41554168

41564169
dev = nh->nh_dev;
4157-
if (unlikely(!dev))
4158-
return 0;
4159-
41604170
if (nh->nh_gw)
41614171
params->ipv4_dst = nh->nh_gw;
41624172

@@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
41664176
* rcu_read_lock_bh is not needed here
41674177
*/
41684178
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
4169-
if (neigh)
4170-
return bpf_fib_set_fwd_params(params, neigh, dev);
4179+
if (!neigh)
4180+
return BPF_FIB_LKUP_RET_NO_NEIGH;
41714181

4172-
return 0;
4182+
return bpf_fib_set_fwd_params(params, neigh, dev);
41734183
}
41744184
#endif
41754185

@@ -4190,15 +4200,15 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
41904200

41914201
/* link local addresses are never forwarded */
41924202
if (rt6_need_strict(dst) || rt6_need_strict(src))
4193-
return 0;
4203+
return BPF_FIB_LKUP_RET_NOT_FWDED;
41944204

41954205
dev = dev_get_by_index_rcu(net, params->ifindex);
41964206
if (unlikely(!dev))
41974207
return -ENODEV;
41984208

41994209
idev = __in6_dev_get_safely(dev);
42004210
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
4201-
return 0;
4211+
return BPF_FIB_LKUP_RET_FWD_DISABLED;
42024212

42034213
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
42044214
fl6.flowi6_iif = 1;
@@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
42254235

42264236
tb = ipv6_stub->fib6_get_table(net, tbid);
42274237
if (unlikely(!tb))
4228-
return 0;
4238+
return BPF_FIB_LKUP_RET_NOT_FWDED;
42294239

42304240
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
42314241
} else {
@@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
42384248
}
42394249

42404250
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
4241-
return 0;
4251+
return BPF_FIB_LKUP_RET_NOT_FWDED;
4252+
4253+
if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
4254+
switch (f6i->fib6_type) {
4255+
case RTN_BLACKHOLE:
4256+
return BPF_FIB_LKUP_RET_BLACKHOLE;
4257+
case RTN_UNREACHABLE:
4258+
return BPF_FIB_LKUP_RET_UNREACHABLE;
4259+
case RTN_PROHIBIT:
4260+
return BPF_FIB_LKUP_RET_PROHIBIT;
4261+
default:
4262+
return BPF_FIB_LKUP_RET_NOT_FWDED;
4263+
}
4264+
}
42424265

4243-
if (unlikely(f6i->fib6_flags & RTF_REJECT ||
4244-
f6i->fib6_type != RTN_UNICAST))
4245-
return 0;
4266+
if (f6i->fib6_type != RTN_UNICAST)
4267+
return BPF_FIB_LKUP_RET_NOT_FWDED;
42464268

42474269
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
42484270
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
42524274
if (check_mtu) {
42534275
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
42544276
if (params->tot_len > mtu)
4255-
return 0;
4277+
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
42564278
}
42574279

42584280
if (f6i->fib6_nh.nh_lwtstate)
4259-
return 0;
4281+
return BPF_FIB_LKUP_RET_UNSUPP_LWT;
42604282

42614283
if (f6i->fib6_flags & RTF_GATEWAY)
42624284
*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
42704292
*/
42714293
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
42724294
ndisc_hashfn, dst, dev);
4273-
if (neigh)
4274-
return bpf_fib_set_fwd_params(params, neigh, dev);
4295+
if (!neigh)
4296+
return BPF_FIB_LKUP_RET_NO_NEIGH;
42754297

4276-
return 0;
4298+
return bpf_fib_set_fwd_params(params, neigh, dev);
42774299
}
42784300
#endif
42794301

@@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
43154337
struct bpf_fib_lookup *, params, int, plen, u32, flags)
43164338
{
43174339
struct net *net = dev_net(skb->dev);
4318-
int index = -EAFNOSUPPORT;
4340+
int rc = -EAFNOSUPPORT;
43194341

43204342
if (plen < sizeof(*params))
43214343
return -EINVAL;
@@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
43264348
switch (params->family) {
43274349
#if IS_ENABLED(CONFIG_INET)
43284350
case AF_INET:
4329-
index = bpf_ipv4_fib_lookup(net, params, flags, false);
4351+
rc = bpf_ipv4_fib_lookup(net, params, flags, false);
43304352
break;
43314353
#endif
43324354
#if IS_ENABLED(CONFIG_IPV6)
43334355
case AF_INET6:
4334-
index = bpf_ipv6_fib_lookup(net, params, flags, false);
4356+
rc = bpf_ipv6_fib_lookup(net, params, flags, false);
43354357
break;
43364358
#endif
43374359
}
43384360

4339-
if (index > 0) {
4361+
if (!rc) {
43404362
struct net_device *dev;
43414363

4342-
dev = dev_get_by_index_rcu(net, index);
4364+
dev = dev_get_by_index_rcu(net, params->ifindex);
43434365
if (!is_skb_forwardable(dev, skb))
4344-
index = 0;
4366+
rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
43454367
}
43464368

4347-
return index;
4369+
return rc;
43484370
}
43494371

43504372
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {

samples/bpf/xdp_fwd_kern.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
4848
struct ethhdr *eth = data;
4949
struct ipv6hdr *ip6h;
5050
struct iphdr *iph;
51-
int out_index;
5251
u16 h_proto;
5352
u64 nh_off;
53+
int rc;
5454

5555
nh_off = sizeof(*eth);
5656
if (data + nh_off > data_end)
@@ -101,23 +101,23 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
101101

102102
fib_params.ifindex = ctx->ingress_ifindex;
103103

104-
out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
104+
rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
105105

106106
/* verify egress index has xdp support
107107
* TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
108108
* cannot pass map_type 14 into func bpf_map_lookup_elem#1:
109109
* NOTE: without verification that egress index supports XDP
110110
* forwarding packets are dropped.
111111
*/
112-
if (out_index > 0) {
112+
if (rc == 0) {
113113
if (h_proto == htons(ETH_P_IP))
114114
ip_decrease_ttl(iph);
115115
else if (h_proto == htons(ETH_P_IPV6))
116116
ip6h->hop_limit--;
117117

118118
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
119119
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
120-
return bpf_redirect_map(&tx_port, out_index, 0);
120+
return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
121121
}
122122

123123
return XDP_PASS;

0 commit comments

Comments
 (0)