Skip to content

Commit 3d48b53

Browse files
cheruskdavem330
authored andcommitted
net: dev_weight: TX/RX orthogonality
Oftenly, introducing side effects on packet processing on the other half of the stack by adjusting one of TX/RX via sysctl is not desirable. There are cases of demand for asymmetric, orthogonal configurability. This holds true especially for nodes where RPS for RFS usage on top is configured and therefore use the 'old dev_weight'. This is quite a common base configuration setup nowadays, even with NICs of superior processing support (e.g. aRFS). A good example use case are nodes acting as noSQL data bases with a large number of tiny requests and rather fewer but large packets as responses. It's affordable to have large budget and rx dev_weights for the requests. But as a side effect having this large a number on TX processed in one run can overwhelm drivers. This patch therefore introduces an independent configurability via sysctl to userland. Signed-off-by: Matthias Tafelmeier <matthias.tafelmeier@gmx.net> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent afbb167 commit 3d48b53

File tree

5 files changed

+62
-4
lines changed

5 files changed

+62
-4
lines changed

Documentation/sysctl/net.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,27 @@ The maximum number of packets that kernel can handle on a NAPI interrupt,
6161
it's a Per-CPU variable.
6262
Default: 64
6363

64+
dev_weight_rx_bias
65+
--------------
66+
67+
RPS (e.g. RFS, aRFS) processing is competing with the registered NAPI poll function
68+
of the driver for the per softirq cycle netdev_budget. This parameter influences
69+
the proportion of the configured netdev_budget that is spent on RPS based packet
70+
processing during RX softirq cycles. It is further meant for making current
71+
dev_weight adaptable for asymmetric CPU needs on RX/TX side of the network stack.
72+
(see dev_weight_tx_bias) It is effective on a per CPU basis. Determination is based
73+
on dev_weight and is calculated multiplicative (dev_weight * dev_weight_rx_bias).
74+
Default: 1
75+
76+
dev_weight_tx_bias
77+
--------------
78+
79+
Scales the maximum number of packets that can be processed during a TX softirq cycle.
80+
Effective on a per CPU basis. Allows scaling of current dev_weight for asymmetric
81+
net stack processing needs. Be careful to avoid making TX softirq processing a CPU hog.
82+
Calculation is based on dev_weight (dev_weight * dev_weight_tx_bias).
83+
Default: 1
84+
6485
default_qdisc
6586
--------------
6687

include/linux/netdevice.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
37953795
extern int netdev_max_backlog;
37963796
extern int netdev_tstamp_prequeue;
37973797
extern int weight_p;
3798+
extern int dev_weight_rx_bias;
3799+
extern int dev_weight_tx_bias;
3800+
extern int dev_rx_weight;
3801+
extern int dev_tx_weight;
37983802

37993803
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
38003804
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,

net/core/dev.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3427,7 +3427,11 @@ EXPORT_SYMBOL(netdev_max_backlog);
34273427

34283428
int netdev_tstamp_prequeue __read_mostly = 1;
34293429
int netdev_budget __read_mostly = 300;
3430-
int weight_p __read_mostly = 64; /* old backlog weight */
3430+
int weight_p __read_mostly = 64; /* old backlog weight */
3431+
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
3432+
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
3433+
int dev_rx_weight __read_mostly = 64;
3434+
int dev_tx_weight __read_mostly = 64;
34313435

34323436
/* Called with irq disabled */
34333437
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4833,7 +4837,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
48334837
net_rps_action_and_irq_enable(sd);
48344838
}
48354839

4836-
napi->weight = weight_p;
4840+
napi->weight = dev_rx_weight;
48374841
while (again) {
48384842
struct sk_buff *skb;
48394843

net/core/sysctl_net_core.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,21 @@ static int set_default_qdisc(struct ctl_table *table, int write,
222222
}
223223
#endif
224224

225+
static int proc_do_dev_weight(struct ctl_table *table, int write,
226+
void __user *buffer, size_t *lenp, loff_t *ppos)
227+
{
228+
int ret;
229+
230+
ret = proc_dointvec(table, write, buffer, lenp, ppos);
231+
if (ret != 0)
232+
return ret;
233+
234+
dev_rx_weight = weight_p * dev_weight_rx_bias;
235+
dev_tx_weight = weight_p * dev_weight_tx_bias;
236+
237+
return ret;
238+
}
239+
225240
static int proc_do_rss_key(struct ctl_table *table, int write,
226241
void __user *buffer, size_t *lenp, loff_t *ppos)
227242
{
@@ -273,7 +288,21 @@ static struct ctl_table net_core_table[] = {
273288
.data = &weight_p,
274289
.maxlen = sizeof(int),
275290
.mode = 0644,
276-
.proc_handler = proc_dointvec
291+
.proc_handler = proc_do_dev_weight,
292+
},
293+
{
294+
.procname = "dev_weight_rx_bias",
295+
.data = &dev_weight_rx_bias,
296+
.maxlen = sizeof(int),
297+
.mode = 0644,
298+
.proc_handler = proc_do_dev_weight,
299+
},
300+
{
301+
.procname = "dev_weight_tx_bias",
302+
.data = &dev_weight_tx_bias,
303+
.maxlen = sizeof(int),
304+
.mode = 0644,
305+
.proc_handler = proc_do_dev_weight,
277306
},
278307
{
279308
.procname = "netdev_max_backlog",

net/sched/sch_generic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
247247

248248
void __qdisc_run(struct Qdisc *q)
249249
{
250-
int quota = weight_p;
250+
int quota = dev_tx_weight;
251251
int packets;
252252

253253
while (qdisc_restart(q, &packets)) {

0 commit comments

Comments
 (0)