Skip to content

Commit 7482e38

Browse files
borkmanndavem330
authored andcommitted
net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED flag. The flag then indicates to the kernel that the neighbor entry should be periodically probed for keeping the entry in NUD_REACHABLE state iff possible. The use case for this is targeting XDP or tc BPF load-balancers which use the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution for their backends. Given they cannot be resolved in fast-path, a control plane inserts the L3 (without L2) entries manually into the neighbor table and lets the kernel do the neighbor resolution either on the gateway or on the backend directly in case the latter resides in the same L2. This avoids to deal with L2 in the control plane and to rebuild what the kernel already does best anyway. NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor table which gets triggered by the system work queue to periodically call neigh_event_send() for performing the resolution. The implementation allows migration from/to NTF_MANAGED neighbor entries, so that already existing entries can be converted by the control plane if needed. Potentially, we could make the interval for periodically calling neigh_event_send() configurable; right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which has similar driver-internal infrastructure c723c73 ("mlxsw: spectrum_router: Periodically update the kernel's neigh table"). In future, the latter could possibly reuse the NTF_MANAGED neighbors as well. Example: # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn # ./ip/ip n 192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE [...] Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Roopa Prabhu <roopa@nvidia.com> Link: https://linuxplumbersconf.org/event/11/contributions/953/ Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 2c611ad commit 7482e38

File tree

3 files changed

+120
-48
lines changed

3 files changed

+120
-48
lines changed

include/net/neighbour.h

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ struct neighbour {
155155
int (*output)(struct neighbour *, struct sk_buff *);
156156
const struct neigh_ops *ops;
157157
struct list_head gc_list;
158+
struct list_head managed_list;
158159
struct rcu_head rcu;
159160
struct net_device *dev;
160161
u8 primary_key[0];
@@ -216,11 +217,13 @@ struct neigh_table {
216217
int gc_thresh3;
217218
unsigned long last_flush;
218219
struct delayed_work gc_work;
220+
struct delayed_work managed_work;
219221
struct timer_list proxy_timer;
220222
struct sk_buff_head proxy_queue;
221223
atomic_t entries;
222224
atomic_t gc_entries;
223225
struct list_head gc_list;
226+
struct list_head managed_list;
224227
rwlock_t lock;
225228
unsigned long last_rand;
226229
struct neigh_statistics __percpu *stats;
@@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
250253
}
251254

252255
/* flags for neigh_update() */
253-
#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
254-
#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
255-
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
256-
#define NEIGH_UPDATE_F_USE 0x10000000
257-
#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
258-
#define NEIGH_UPDATE_F_ISROUTER 0x40000000
259-
#define NEIGH_UPDATE_F_ADMIN 0x80000000
256+
#define NEIGH_UPDATE_F_OVERRIDE BIT(0)
257+
#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1)
258+
#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2)
259+
#define NEIGH_UPDATE_F_USE BIT(3)
260+
#define NEIGH_UPDATE_F_MANAGED BIT(4)
261+
#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5)
262+
#define NEIGH_UPDATE_F_ISROUTER BIT(6)
263+
#define NEIGH_UPDATE_F_ADMIN BIT(7)
260264

261265
/* In-kernel representation for NDA_FLAGS_EXT flags: */
262266
#define NTF_OLD_MASK 0xff
263267
#define NTF_EXT_SHIFT 8
268+
#define NTF_EXT_MASK (NTF_EXT_MANAGED)
269+
270+
#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT)
264271

265272
extern const struct nla_policy nda_policy[];
266273

include/uapi/linux/neighbour.h

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,16 @@ enum {
4141
* Neighbor Cache Entry Flags
4242
*/
4343

44-
#define NTF_USE 0x01
45-
#define NTF_SELF 0x02
46-
#define NTF_MASTER 0x04
47-
#define NTF_PROXY 0x08 /* == ATF_PUBL */
48-
#define NTF_EXT_LEARNED 0x10
49-
#define NTF_OFFLOADED 0x20
50-
#define NTF_STICKY 0x40
51-
#define NTF_ROUTER 0x80
44+
#define NTF_USE (1 << 0)
45+
#define NTF_SELF (1 << 1)
46+
#define NTF_MASTER (1 << 2)
47+
#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
48+
#define NTF_EXT_LEARNED (1 << 4)
49+
#define NTF_OFFLOADED (1 << 5)
50+
#define NTF_STICKY (1 << 6)
51+
#define NTF_ROUTER (1 << 7)
52+
/* Extended flags under NDA_FLAGS_EXT: */
53+
#define NTF_EXT_MANAGED (1 << 0)
5254

5355
/*
5456
* Neighbor Cache Entry States.
@@ -66,12 +68,22 @@ enum {
6668
#define NUD_PERMANENT 0x80
6769
#define NUD_NONE 0x00
6870

69-
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
70-
* and make no address resolution or NUD.
71-
* NUD_PERMANENT also cannot be deleted by garbage collectors.
71+
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
72+
* address resolution or NUD.
73+
*
74+
* NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
75+
* for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
76+
* down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
77+
* flagged entries explicitly are (which is also consistent with the routing
78+
* subsystem).
79+
*
7280
* When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
7381
* states don't make sense and thus are ignored. Such entries don't age and
7482
* can roam.
83+
*
84+
* NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
85+
* of a user space control plane, and automatically refreshed so that (if
86+
* possible) they remain in NUD_REACHABLE state.
7587
*/
7688

7789
struct nda_cacheinfo {

net/core/neighbour.c

Lines changed: 83 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
122122
list_del_init(&n->gc_list);
123123
atomic_dec(&n->tbl->gc_entries);
124124
}
125+
if (!list_empty(&n->managed_list))
126+
list_del_init(&n->managed_list);
125127
}
126128

127129
static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
130132

131133
write_lock_bh(&n->tbl->lock);
132134
write_lock(&n->lock);
133-
134135
if (n->dead)
135136
goto out;
136137

@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
149150
list_add_tail(&n->gc_list, &n->tbl->gc_list);
150151
atomic_inc(&n->tbl->gc_entries);
151152
}
153+
out:
154+
write_unlock(&n->lock);
155+
write_unlock_bh(&n->tbl->lock);
156+
}
157+
158+
static void neigh_update_managed_list(struct neighbour *n)
159+
{
160+
bool on_managed_list, add_to_managed;
161+
162+
write_lock_bh(&n->tbl->lock);
163+
write_lock(&n->lock);
164+
if (n->dead)
165+
goto out;
166+
167+
add_to_managed = n->flags & NTF_MANAGED;
168+
on_managed_list = !list_empty(&n->managed_list);
152169

170+
if (!add_to_managed && on_managed_list)
171+
list_del_init(&n->managed_list);
172+
else if (add_to_managed && !on_managed_list)
173+
list_add_tail(&n->managed_list, &n->tbl->managed_list);
153174
out:
154175
write_unlock(&n->lock);
155176
write_unlock_bh(&n->tbl->lock);
156177
}
157178

158-
static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
159-
int *notify)
179+
static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180+
bool *gc_update, bool *managed_update)
160181
{
161-
bool rc = false;
162-
u32 ndm_flags;
182+
u32 ndm_flags, old_flags = neigh->flags;
163183

164184
if (!(flags & NEIGH_UPDATE_F_ADMIN))
165-
return rc;
185+
return;
186+
187+
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188+
ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
166189

167-
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168-
if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
190+
if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169191
if (ndm_flags & NTF_EXT_LEARNED)
170192
neigh->flags |= NTF_EXT_LEARNED;
171193
else
172194
neigh->flags &= ~NTF_EXT_LEARNED;
173-
rc = true;
174195
*notify = 1;
196+
*gc_update = true;
197+
}
198+
if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199+
if (ndm_flags & NTF_MANAGED)
200+
neigh->flags |= NTF_MANAGED;
201+
else
202+
neigh->flags &= ~NTF_MANAGED;
203+
*notify = 1;
204+
*managed_update = true;
175205
}
176-
177-
return rc;
178206
}
179207

180208
static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -422,6 +450,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
422450
refcount_set(&n->refcnt, 1);
423451
n->dead = 1;
424452
INIT_LIST_HEAD(&n->gc_list);
453+
INIT_LIST_HEAD(&n->managed_list);
425454

426455
atomic_inc(&tbl->entries);
427456
out:
@@ -650,7 +679,8 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
650679
n->dead = 0;
651680
if (!exempt_from_gc)
652681
list_add_tail(&n->gc_list, &n->tbl->gc_list);
653-
682+
if (n->flags & NTF_MANAGED)
683+
list_add_tail(&n->managed_list, &n->tbl->managed_list);
654684
if (want_ref)
655685
neigh_hold(n);
656686
rcu_assign_pointer(n->next,
@@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
12051235
}
12061236
}
12071237

1208-
1209-
12101238
/* Generic update routine.
12111239
-- lladdr is new lladdr or NULL, if it is not supplied.
12121240
-- new is new state.
@@ -1218,24 +1246,23 @@ static void neigh_update_hhs(struct neighbour *neigh)
12181246
if it is different.
12191247
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
12201248
NEIGH_UPDATE_F_USE means that the entry is user triggered.
1249+
NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
12211250
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
12221251
NTF_ROUTER flag.
12231252
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
12241253
a router.
12251254
12261255
Caller MUST hold reference count on the entry.
12271256
*/
1228-
12291257
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
12301258
u8 new, u32 flags, u32 nlmsg_pid,
12311259
struct netlink_ext_ack *extack)
12321260
{
1233-
bool ext_learn_change = false;
1234-
u8 old;
1235-
int err;
1236-
int notify = 0;
1237-
struct net_device *dev;
1261+
bool gc_update = false, managed_update = false;
12381262
int update_isrouter = 0;
1263+
struct net_device *dev;
1264+
int err, notify = 0;
1265+
u8 old;
12391266

12401267
trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
12411268

@@ -1254,8 +1281,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
12541281
(old & (NUD_NOARP | NUD_PERMANENT)))
12551282
goto out;
12561283

1257-
ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1258-
if (flags & NEIGH_UPDATE_F_USE) {
1284+
neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1285+
if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
12591286
new = old & ~NUD_PERMANENT;
12601287
neigh->nud_state = new;
12611288
err = 0;
@@ -1405,15 +1432,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
14051432
if (update_isrouter)
14061433
neigh_update_is_router(neigh, flags, &notify);
14071434
write_unlock_bh(&neigh->lock);
1408-
1409-
if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1435+
if (((new ^ old) & NUD_PERMANENT) || gc_update)
14101436
neigh_update_gc_list(neigh);
1411-
1437+
if (managed_update)
1438+
neigh_update_managed_list(neigh);
14121439
if (notify)
14131440
neigh_update_notify(neigh, nlmsg_pid);
1414-
14151441
trace_neigh_update_done(neigh, err);
1416-
14171442
return err;
14181443
}
14191444

@@ -1539,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
15391564
}
15401565
EXPORT_SYMBOL(neigh_direct_output);
15411566

1567+
static void neigh_managed_work(struct work_struct *work)
1568+
{
1569+
struct neigh_table *tbl = container_of(work, struct neigh_table,
1570+
managed_work.work);
1571+
struct neighbour *neigh;
1572+
1573+
write_lock_bh(&tbl->lock);
1574+
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1575+
neigh_event_send(neigh, NULL);
1576+
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1577+
NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
1578+
write_unlock_bh(&tbl->lock);
1579+
}
1580+
15421581
static void neigh_proxy_process(struct timer_list *t)
15431582
{
15441583
struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1685,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
16851724

16861725
INIT_LIST_HEAD(&tbl->parms_list);
16871726
INIT_LIST_HEAD(&tbl->gc_list);
1727+
INIT_LIST_HEAD(&tbl->managed_list);
1728+
16881729
list_add(&tbl->parms.list, &tbl->parms_list);
16891730
write_pnet(&tbl->parms.net, &init_net);
16901731
refcount_set(&tbl->parms.refcnt, 1);
@@ -1716,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
17161757
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
17171758

17181759
rwlock_init(&tbl->lock);
1760+
17191761
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
17201762
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
17211763
tbl->parms.reachable_time);
1764+
INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1765+
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1766+
17221767
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
17231768
skb_queue_head_init_class(&tbl->proxy_queue,
17241769
&neigh_table_proxy_queue_class);
@@ -1891,7 +1936,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
18911936
if (tb[NDA_FLAGS_EXT]) {
18921937
u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
18931938

1894-
if (ext & ~0) {
1939+
if (ext & ~NTF_EXT_MASK) {
18951940
NL_SET_ERR_MSG(extack, "Invalid extended flags");
18961941
goto out;
18971942
}
@@ -1927,6 +1972,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19271972
if (ndm_flags & NTF_PROXY) {
19281973
struct pneigh_entry *pn;
19291974

1975+
if (ndm_flags & NTF_MANAGED) {
1976+
NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1977+
goto out;
1978+
}
1979+
19301980
err = -ENOBUFS;
19311981
pn = pneigh_lookup(tbl, net, dst, dev, 1);
19321982
if (pn) {
@@ -1960,7 +2010,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19602010
exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
19612011
ndm_flags & NTF_EXT_LEARNED;
19622012
neigh = ___neigh_create(tbl, dst, dev,
1963-
ndm_flags & NTF_EXT_LEARNED,
2013+
ndm_flags &
2014+
(NTF_EXT_LEARNED | NTF_MANAGED),
19642015
exempt_from_gc, true);
19652016
if (IS_ERR(neigh)) {
19662017
err = PTR_ERR(neigh);
@@ -1984,12 +2035,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
19842035
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
19852036
if (ndm_flags & NTF_ROUTER)
19862037
flags |= NEIGH_UPDATE_F_ISROUTER;
2038+
if (ndm_flags & NTF_MANAGED)
2039+
flags |= NEIGH_UPDATE_F_MANAGED;
19872040
if (ndm_flags & NTF_USE)
19882041
flags |= NEIGH_UPDATE_F_USE;
19892042

19902043
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
19912044
NETLINK_CB(skb).portid, extack);
1992-
if (!err && ndm_flags & NTF_USE) {
2045+
if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
19932046
neigh_event_send(neigh, NULL);
19942047
err = 0;
19952048
}

0 commit comments

Comments
 (0)