Skip to content

Commit c91c46d

Browse files
committed
net: provide macros for commonly copied lockless queue stop/wake code
A lot of drivers follow the same scheme to stop / start queues without introducing locks between xmit and NAPI tx completions. I'm guessing they all copy'n'paste each other's code. The original code dates back all the way to e1000 and Linux 2.6.19. Smaller drivers shy away from the scheme and introduce a lock which may cause deadlocks in netpoll. Provide macros which encapsulate the necessary logic. The macros do not prevent false wake ups, the extra barrier required to close that race is not worth it. See discussion in: https://lore.kernel.org/all/c39312a2-4537-14b4-270c-9fe1fbb91e89@gmail.com/ Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 8336462 commit c91c46d

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed

Documentation/networking/driver.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ and:
104104
TX_BUFFS_AVAIL(dp) > 0)
105105
netif_wake_queue(dp->dev);
106106
107+
Lockless queue stop / wake helper macros
108+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
109+
110+
.. kernel-doc:: include/net/netdev_queues.h
111+
:doc: Lockless queue stopping / waking helpers.
112+
107113
No exclusive ownership
108114
----------------------
109115

include/linux/netdevice.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3335,6 +3335,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
33353335

33363336
static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
33373337
{
3338+
/* Must be an atomic op see netif_txq_try_stop() */
33383339
set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
33393340
}
33403341

include/net/netdev_queues.h

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _LINUX_NET_QUEUES_H
3+
#define _LINUX_NET_QUEUES_H
4+
5+
#include <linux/netdevice.h>
6+
7+
/**
8+
* DOC: Lockless queue stopping / waking helpers.
9+
*
10+
* The netif_txq_maybe_stop() and __netif_txq_completed_wake()
11+
* macros are designed to safely implement stopping
12+
* and waking netdev queues without full lock protection.
13+
*
14+
* We assume that there can be no concurrent stop attempts and no concurrent
15+
* wake attempts. The try-stop should happen from the xmit handler,
16+
* while wake up should be triggered from NAPI poll context.
17+
* The two may run concurrently (single producer, single consumer).
18+
*
19+
* The try-stop side is expected to run from the xmit handler and therefore
20+
* it does not reschedule Tx (netif_tx_start_queue() instead of
21+
* netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
22+
* handler may lead to xmit queue being enabled but not run.
23+
* The waking side does not have similar context restrictions.
24+
*
25+
* The macros guarantee that rings will not remain stopped if there's
26+
* space available, but they do *not* prevent false wake ups when
27+
* the ring is full! Drivers should check for ring full at the start
28+
* for the xmit handler.
29+
*
30+
* All descriptor ring indexes (and other relevant shared state) must
31+
* be updated before invoking the macros.
32+
*/
33+
34+
#define netif_txq_try_stop(txq, get_desc, start_thrs) \
35+
({ \
36+
int _res; \
37+
\
38+
netif_tx_stop_queue(txq); \
39+
/* Producer index and stop bit must be visible \
40+
* to consumer before we recheck. \
41+
* Pairs with a barrier in __netif_txq_maybe_wake(). \
42+
*/ \
43+
smp_mb__after_atomic(); \
44+
\
45+
/* We need to check again in a case another \
46+
* CPU has just made room available. \
47+
*/ \
48+
_res = 0; \
49+
if (unlikely(get_desc >= start_thrs)) { \
50+
netif_tx_start_queue(txq); \
51+
_res = -1; \
52+
} \
53+
_res; \
54+
}) \
55+
56+
/**
57+
* netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
58+
* @txq: struct netdev_queue to stop/start
59+
* @get_desc: get current number of free descriptors (see requirements below!)
60+
* @stop_thrs: minimal number of available descriptors for queue to be left
61+
* enabled
62+
* @start_thrs: minimal number of descriptors to re-enable the queue, can be
63+
* equal to @stop_thrs or higher to avoid frequent waking
64+
*
65+
* All arguments may be evaluated multiple times, beware of side effects.
66+
* @get_desc must be a formula or a function call, it must always
67+
* return up-to-date information when evaluated!
68+
* Expected to be used from ndo_start_xmit, see the comment on top of the file.
69+
*
70+
* Returns:
71+
* 0 if the queue was stopped
72+
* 1 if the queue was left enabled
73+
* -1 if the queue was re-enabled (raced with waking)
74+
*/
75+
#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \
76+
({ \
77+
int _res; \
78+
\
79+
_res = 1; \
80+
if (unlikely(get_desc < stop_thrs)) \
81+
_res = netif_txq_try_stop(txq, get_desc, start_thrs); \
82+
_res; \
83+
}) \
84+
85+
86+
/**
87+
* __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed
88+
* @txq: struct netdev_queue to stop/start
89+
* @get_desc: get current number of free descriptors (see requirements below!)
90+
* @start_thrs: minimal number of descriptors to re-enable the queue
91+
* @down_cond: down condition, predicate indicating that the queue should
92+
* not be woken up even if descriptors are available
93+
*
94+
* All arguments may be evaluated multiple times.
95+
* @get_desc must be a formula or a function call, it must always
96+
* return up-to-date information when evaluated!
97+
*
98+
* Returns:
99+
* 0 if the queue was woken up
100+
* 1 if the queue was already enabled (or disabled but @down_cond is true)
101+
* -1 if the queue was left unchanged (@start_thrs not reached)
102+
*/
103+
#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond) \
104+
({ \
105+
int _res; \
106+
\
107+
_res = -1; \
108+
if (likely(get_desc > start_thrs)) { \
109+
/* Make sure that anybody stopping the queue after \
110+
* this sees the new next_to_clean. \
111+
*/ \
112+
smp_mb(); \
113+
_res = 1; \
114+
if (unlikely(netif_tx_queue_stopped(txq)) && \
115+
!(down_cond)) { \
116+
netif_tx_wake_queue(txq); \
117+
_res = 0; \
118+
} \
119+
} \
120+
_res; \
121+
})
122+
123+
#define netif_txq_maybe_wake(txq, get_desc, start_thrs) \
124+
__netif_txq_maybe_wake(txq, get_desc, start_thrs, false)
125+
126+
/* subqueue variants follow */
127+
128+
#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \
129+
({ \
130+
struct netdev_queue *txq; \
131+
\
132+
txq = netdev_get_tx_queue(dev, idx); \
133+
netif_txq_try_stop(txq, get_desc, start_thrs); \
134+
})
135+
136+
#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
137+
({ \
138+
struct netdev_queue *txq; \
139+
\
140+
txq = netdev_get_tx_queue(dev, idx); \
141+
netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
142+
})
143+
144+
#endif

0 commit comments

Comments
 (0)