Skip to content

Commit d7d7807

Browse files
committed
net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2193175 commit 7482e38 Author: Daniel Borkmann <daniel@iogearbox.net> Date: Mon Oct 11 14:12:38 2021 +0200 net, neigh: Add NTF_MANAGED flag for managed neighbor entries Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED flag. The flag then indicates to the kernel that the neighbor entry should be periodically probed for keeping the entry in NUD_REACHABLE state iff possible. The use case for this is targeting XDP or tc BPF load-balancers which use the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution for their backends. Given they cannot be resolved in fast-path, a control plane inserts the L3 (without L2) entries manually into the neighbor table and lets the kernel do the neighbor resolution either on the gateway or on the backend directly in case the latter resides in the same L2. This avoids to deal with L2 in the control plane and to rebuild what the kernel already does best anyway. NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor table which gets triggered by the system work queue to periodically call neigh_event_send() for performing the resolution. The implementation allows migration from/to NTF_MANAGED neighbor entries, so that already existing entries can be converted by the control plane if needed. Potentially, we could make the interval for periodically calling neigh_event_send() configurable; right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which has similar driver-internal infrastructure c723c73 ("mlxsw: spectrum_router: Periodically update the kernel's neigh table"). In future, the latter could possibly reuse the NTF_MANAGED neighbors as well. Example: # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE [...] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Roopa Prabhu <roopa@nvidia.com> Link: https://linuxplumbersconf.org/event/11/contributions/953/ Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Ivan Vecera <ivecera@redhat.com>
1 parent 047b64d commit d7d7807

File tree

3 files changed

+120
-48
lines changed

3 files changed

+120
-48
lines changed

include/net/neighbour.h

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ struct neighbour {
156156
int (*output)(struct neighbour *, struct sk_buff *);
157157
const struct neigh_ops *ops;
158158
struct list_head gc_list;
159+
struct list_head managed_list;
159160
struct rcu_head rcu;
160161
struct net_device *dev;
161162
netdevice_tracker dev_tracker;
@@ -219,11 +220,13 @@ struct neigh_table {
219220
int gc_thresh3;
220221
unsigned long last_flush;
221222
struct delayed_work gc_work;
223+
struct delayed_work managed_work;
222224
struct timer_list proxy_timer;
223225
struct sk_buff_head proxy_queue;
224226
atomic_t entries;
225227
atomic_t gc_entries;
226228
struct list_head gc_list;
229+
struct list_head managed_list;
227230
rwlock_t lock;
228231
unsigned long last_rand;
229232
struct neigh_statistics __percpu *stats;
@@ -253,17 +256,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
253256
}
254257

255258
/* flags for neigh_update() */
256-
#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
257-
#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
258-
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
259-
#define NEIGH_UPDATE_F_USE 0x10000000
260-
#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
261-
#define NEIGH_UPDATE_F_ISROUTER 0x40000000
262-
#define NEIGH_UPDATE_F_ADMIN 0x80000000
259+
#define NEIGH_UPDATE_F_OVERRIDE BIT(0)
260+
#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1)
261+
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2)
262+
#define NEIGH_UPDATE_F_USE BIT(3)
263+
#define NEIGH_UPDATE_F_MANAGED BIT(4)
264+
#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5)
265+
#define NEIGH_UPDATE_F_ISROUTER BIT(6)
266+
#define NEIGH_UPDATE_F_ADMIN BIT(7)
263267

264268
/* In-kernel representation for NDA_FLAGS_EXT flags: */
265269
#define NTF_OLD_MASK 0xff
266270
#define NTF_EXT_SHIFT 8
271+
#define NTF_EXT_MASK (NTF_EXT_MANAGED)
272+
273+
#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT)
267274

268275
extern const struct nla_policy nda_policy[];
269276

include/uapi/linux/neighbour.h

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,16 @@ enum {
4141
* Neighbor Cache Entry Flags
4242
*/
4343

44-
#define NTF_USE 0x01
45-
#define NTF_SELF 0x02
46-
#define NTF_MASTER 0x04
47-
#define NTF_PROXY 0x08 /* == ATF_PUBL */
48-
#define NTF_EXT_LEARNED 0x10
49-
#define NTF_OFFLOADED 0x20
50-
#define NTF_STICKY 0x40
51-
#define NTF_ROUTER 0x80
44+
#define NTF_USE (1 << 0)
45+
#define NTF_SELF (1 << 1)
46+
#define NTF_MASTER (1 << 2)
47+
#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
48+
#define NTF_EXT_LEARNED (1 << 4)
49+
#define NTF_OFFLOADED (1 << 5)
50+
#define NTF_STICKY (1 << 6)
51+
#define NTF_ROUTER (1 << 7)
52+
/* Extended flags under NDA_FLAGS_EXT: */
53+
#define NTF_EXT_MANAGED (1 << 0)
5254

5355
/*
5456
* Neighbor Cache Entry States.
@@ -66,12 +68,22 @@ enum {
6668
#define NUD_PERMANENT 0x80
6769
#define NUD_NONE 0x00
6870

69-
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
70-
* and make no address resolution or NUD.
71-
* NUD_PERMANENT also cannot be deleted by garbage collectors.
71+
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
72+
* address resolution or NUD.
73+
*
74+
* NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
75+
* for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
76+
* down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
77+
* flagged entries explicitly are (which is also consistent with the routing
78+
* subsystem).
79+
*
7280
* When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
7381
* states don't make sense and thus are ignored. Such entries don't age and
7482
* can roam.
83+
*
84+
* NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
85+
* of a user space control plane, and automatically refreshed so that (if
86+
* possible) they remain in NUD_REACHABLE state.
7587
*/
7688

7789
struct nda_cacheinfo {

net/core/neighbour.c

Lines changed: 83 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
122122
list_del_init(&n->gc_list);
123123
atomic_dec(&n->tbl->gc_entries);
124124
}
125+
if (!list_empty(&n->managed_list))
126+
list_del_init(&n->managed_list);
125127
}
126128

127129
static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
130132

131133
write_lock_bh(&n->tbl->lock);
132134
write_lock(&n->lock);
133-
134135
if (n->dead)
135136
goto out;
136137

@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
149150
list_add_tail(&n->gc_list, &n->tbl->gc_list);
150151
atomic_inc(&n->tbl->gc_entries);
151152
}
153+
out:
154+
write_unlock(&n->lock);
155+
write_unlock_bh(&n->tbl->lock);
156+
}
157+
158+
static void neigh_update_managed_list(struct neighbour *n)
159+
{
160+
bool on_managed_list, add_to_managed;
161+
162+
write_lock_bh(&n->tbl->lock);
163+
write_lock(&n->lock);
164+
if (n->dead)
165+
goto out;
166+
167+
add_to_managed = n->flags & NTF_MANAGED;
168+
on_managed_list = !list_empty(&n->managed_list);
152169

170+
if (!add_to_managed && on_managed_list)
171+
list_del_init(&n->managed_list);
172+
else if (add_to_managed && !on_managed_list)
173+
list_add_tail(&n->managed_list, &n->tbl->managed_list);
153174
out:
154175
write_unlock(&n->lock);
155176
write_unlock_bh(&n->tbl->lock);
156177
}
157178

158-
static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
159-
int *notify)
179+
static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180+
bool *gc_update, bool *managed_update)
160181
{
161-
bool rc = false;
162-
u32 ndm_flags;
182+
u32 ndm_flags, old_flags = neigh->flags;
163183

164184
if (!(flags & NEIGH_UPDATE_F_ADMIN))
165-
return rc;
185+
return;
186+
187+
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188+
ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
166189

167-
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168-
if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
190+
if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169191
if (ndm_flags & NTF_EXT_LEARNED)
170192
neigh->flags |= NTF_EXT_LEARNED;
171193
else
172194
neigh->flags &= ~NTF_EXT_LEARNED;
173-
rc = true;
174195
*notify = 1;
196+
*gc_update = true;
197+
}
198+
if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199+
if (ndm_flags & NTF_MANAGED)
200+
neigh->flags |= NTF_MANAGED;
201+
else
202+
neigh->flags &= ~NTF_MANAGED;
203+
*notify = 1;
204+
*managed_update = true;
175205
}
176-
177-
return rc;
178206
}
179207

180208
static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
422450
refcount_set(&n->refcnt, 1);
423451
n->dead = 1;
424452
INIT_LIST_HEAD(&n->gc_list);
453+
INIT_LIST_HEAD(&n->managed_list);
425454

426455
atomic_inc(&tbl->entries);
427456
out:
@@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
650679
n->dead = 0;
651680
if (!exempt_from_gc)
652681
list_add_tail(&n->gc_list, &n->tbl->gc_list);
653-
682+
if (n->flags & NTF_MANAGED)
683+
list_add_tail(&n->managed_list, &n->tbl->managed_list);
654684
if (want_ref)
655685
neigh_hold(n);
656686
rcu_assign_pointer(n->next,
@@ -1204,8 +1234,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
12041234
}
12051235
}
12061236

1207-
1208-
12091237
/* Generic update routine.
12101238
-- lladdr is new lladdr or NULL, if it is not supplied.
12111239
-- new is new state.
@@ -1217,24 +1245,23 @@ static void neigh_update_hhs(struct neighbour *neigh)
12171245
if it is different.
12181246
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
12191247
NEIGH_UPDATE_F_USE means that the entry is user triggered.
1248+
NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
12201249
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
12211250
NTF_ROUTER flag.
12221251
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
12231252
a router.
12241253
12251254
Caller MUST hold reference count on the entry.
12261255
*/
1227-
12281256
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
12291257
u8 new, u32 flags, u32 nlmsg_pid,
12301258
struct netlink_ext_ack *extack)
12311259
{
1232-
bool ext_learn_change = false;
1233-
u8 old;
1234-
int err;
1235-
int notify = 0;
1236-
struct net_device *dev;
1260+
bool gc_update = false, managed_update = false;
12371261
int update_isrouter = 0;
1262+
struct net_device *dev;
1263+
int err, notify = 0;
1264+
u8 old;
12381265

12391266
trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
12401267

@@ -1253,8 +1280,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
12531280
(old & (NUD_NOARP | NUD_PERMANENT)))
12541281
goto out;
12551282

1256-
ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1257-
if (flags & NEIGH_UPDATE_F_USE) {
1283+
neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1284+
if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
12581285
new = old & ~NUD_PERMANENT;
12591286
neigh->nud_state = new;
12601287
err = 0;
@@ -1404,15 +1431,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
14041431
if (update_isrouter)
14051432
neigh_update_is_router(neigh, flags, &notify);
14061433
write_unlock_bh(&neigh->lock);
1407-
1408-
if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1434+
if (((new ^ old) & NUD_PERMANENT) || gc_update)
14091435
neigh_update_gc_list(neigh);
1410-
1436+
if (managed_update)
1437+
neigh_update_managed_list(neigh);
14111438
if (notify)
14121439
neigh_update_notify(neigh, nlmsg_pid);
1413-
14141440
trace_neigh_update_done(neigh, err);
1415-
14161441
return err;
14171442
}
14181443

@@ -1538,6 +1563,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
15381563
}
15391564
EXPORT_SYMBOL(neigh_direct_output);
15401565

1566+
static void neigh_managed_work(struct work_struct *work)
1567+
{
1568+
struct neigh_table *tbl = container_of(work, struct neigh_table,
1569+
managed_work.work);
1570+
struct neighbour *neigh;
1571+
1572+
write_lock_bh(&tbl->lock);
1573+
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1574+
neigh_event_send(neigh, NULL);
1575+
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1576+
NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
1577+
write_unlock_bh(&tbl->lock);
1578+
}
1579+
15411580
static void neigh_proxy_process(struct timer_list *t)
15421581
{
15431582
struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1684,6 +1723,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
16841723

16851724
INIT_LIST_HEAD(&tbl->parms_list);
16861725
INIT_LIST_HEAD(&tbl->gc_list);
1726+
INIT_LIST_HEAD(&tbl->managed_list);
1727+
16871728
list_add(&tbl->parms.list, &tbl->parms_list);
16881729
write_pnet(&tbl->parms.net, &init_net);
16891730
refcount_set(&tbl->parms.refcnt, 1);
@@ -1715,9 +1756,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
17151756
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
17161757

17171758
rwlock_init(&tbl->lock);
1759+
17181760
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
17191761
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
17201762
tbl->parms.reachable_time);
1763+
INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1764+
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1765+
17211766
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
17221767
skb_queue_head_init_class(&tbl->proxy_queue,
17231768
&neigh_table_proxy_queue_class);
@@ -1890,7 +1935,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
18901935
if (tb[NDA_FLAGS_EXT]) {
18911936
u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
18921937

1893-
if (ext & ~0) {
1938+
if (ext & ~NTF_EXT_MASK) {
18941939
NL_SET_ERR_MSG(extack, "Invalid extended flags");
18951940
goto out;
18961941
}
@@ -1926,6 +1971,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19261971
if (ndm_flags & NTF_PROXY) {
19271972
struct pneigh_entry *pn;
19281973

1974+
if (ndm_flags & NTF_MANAGED) {
1975+
NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1976+
goto out;
1977+
}
1978+
19291979
err = -ENOBUFS;
19301980
pn = pneigh_lookup(tbl, net, dst, dev, 1);
19311981
if (pn) {
@@ -1959,7 +2009,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19592009
exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
19602010
ndm_flags & NTF_EXT_LEARNED;
19612011
neigh = ___neigh_create(tbl, dst, dev,
1962-
ndm_flags & NTF_EXT_LEARNED,
2012+
ndm_flags &
2013+
(NTF_EXT_LEARNED | NTF_MANAGED),
19632014
exempt_from_gc, true);
19642015
if (IS_ERR(neigh)) {
19652016
err = PTR_ERR(neigh);
@@ -1983,12 +2034,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19832034
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
19842035
if (ndm_flags & NTF_ROUTER)
19852036
flags |= NEIGH_UPDATE_F_ISROUTER;
2037+
if (ndm_flags & NTF_MANAGED)
2038+
flags |= NEIGH_UPDATE_F_MANAGED;
19862039
if (ndm_flags & NTF_USE)
19872040
flags |= NEIGH_UPDATE_F_USE;
19882041

19892042
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
19902043
NETLINK_CB(skb).portid, extack);
1991-
if (!err && ndm_flags & NTF_USE) {
2044+
if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
19922045
neigh_event_send(neigh, NULL);
19932046
err = 0;
19942047
}

0 commit comments

Comments
 (0)