Skip to content

Commit 1759335

Browse files
committed
Merge branch 'net-sched-skip_sw'
Asbjørn Sloth Tønnesen says: ==================== make skip_sw actually skip software During development of flower-route[1], which I recently presented at FOSDEM[2], I noticed that CPU usage, would increase the more rules I installed into the hardware for IP forwarding offloading. Since we use TC flower offload for the hottest prefixes, and leave the long tail to the normal (non-TC) Linux network stack for slow-path IP forwarding. We therefore need both the hardware and software datapath to perform well. I found that skip_sw rules, are quite expensive in the kernel datapath, since they must be evaluated and matched upon, before the kernel checks the skip_sw flag. This patchset optimizes the case where all rules are skip_sw, by implementing a TC bypass for these cases, where TC is only used as a control plane for the hardware path. v4: - Rebased onto net-next, now that net-next is open again v3: https://lore.kernel.org/netdev/20240306165813.656931-1-ast@fiberby.net/ - Patch 3: - Fix source_inline - Fix build failure, when CONFIG_NET_CLS without CONFIG_NET_CLS_ACT. v2: https://lore.kernel.org/netdev/20240305144404.569632-1-ast@fiberby.net/ - Patch 1: - Add Reviewed-By from Jiri Pirko - Patch 2: - Move code, to avoid forward declaration (Jiri). - Patch 3 - Refactor to use a static key. - Add performance data for trapping, or sending a packet to a non-existent chain (as suggested by Marcelo). v1: https://lore.kernel.org/netdev/20240215160458.1727237-1-ast@fiberby.net/ [1] flower-route https://github.com/fiberby-dk/flower-route [2] FOSDEM talk https://fosdem.org/2024/schedule/event/fosdem-2024-3337-flying-higher-hardware-offloading-with-bird/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents fd2162a + 047f340 commit 1759335

File tree

4 files changed

+64
-0
lines changed

4 files changed

+64
-0
lines changed

include/net/pkt_cls.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
7474
return block && block->index;
7575
}
7676

77+
#ifdef CONFIG_NET_CLS_ACT
78+
DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
79+
80+
static inline bool tcf_block_bypass_sw(struct tcf_block *block)
81+
{
82+
return block && block->bypass_wanted;
83+
}
84+
#endif
85+
7786
static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
7887
{
7988
WARN_ON(tcf_block_shared(block));

include/net/sch_generic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ struct tcf_proto {
422422
*/
423423
spinlock_t lock;
424424
bool deleting;
425+
bool counted;
425426
refcount_t refcnt;
426427
struct rcu_head rcu;
427428
struct hlist_node destroy_ht_node;
@@ -471,6 +472,9 @@ struct tcf_block {
471472
struct flow_block flow_block;
472473
struct list_head owner_list;
473474
bool keep_dst;
475+
bool bypass_wanted;
476+
atomic_t filtercnt; /* Number of filters */
477+
atomic_t skipswcnt; /* Number of skip_sw filters */
474478
atomic_t offloadcnt; /* Number of oddloaded filters */
475479
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
476480
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */

net/core/dev.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,6 +2083,11 @@ void net_dec_egress_queue(void)
20832083
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
20842084
#endif
20852085

2086+
#ifdef CONFIG_NET_CLS_ACT
2087+
DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
2088+
EXPORT_SYMBOL(tcf_bypass_check_needed_key);
2089+
#endif
2090+
20862091
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
20872092
EXPORT_SYMBOL(netstamp_needed_key);
20882093
#ifdef CONFIG_JUMP_LABEL
@@ -3937,6 +3942,11 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
39373942
if (!miniq)
39383943
return ret;
39393944

3945+
if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
3946+
if (tcf_block_bypass_sw(miniq->block))
3947+
return ret;
3948+
}
3949+
39403950
tc_skb_cb(skb)->mru = 0;
39413951
tc_skb_cb(skb)->post_ct = false;
39423952
tcf_set_drop_reason(skb, *drop_reason);

net/sched/cls_api.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,48 @@ static void tcf_proto_get(struct tcf_proto *tp)
410410
refcount_inc(&tp->refcnt);
411411
}
412412

413+
static void tcf_maintain_bypass(struct tcf_block *block)
414+
{
415+
int filtercnt = atomic_read(&block->filtercnt);
416+
int skipswcnt = atomic_read(&block->skipswcnt);
417+
bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
418+
419+
if (bypass_wanted != block->bypass_wanted) {
420+
#ifdef CONFIG_NET_CLS_ACT
421+
if (bypass_wanted)
422+
static_branch_inc(&tcf_bypass_check_needed_key);
423+
else
424+
static_branch_dec(&tcf_bypass_check_needed_key);
425+
#endif
426+
block->bypass_wanted = bypass_wanted;
427+
}
428+
}
429+
430+
static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
431+
{
432+
lockdep_assert_not_held(&block->cb_lock);
433+
434+
down_write(&block->cb_lock);
435+
if (*counted != add) {
436+
if (add) {
437+
atomic_inc(&block->filtercnt);
438+
*counted = true;
439+
} else {
440+
atomic_dec(&block->filtercnt);
441+
*counted = false;
442+
}
443+
}
444+
tcf_maintain_bypass(block);
445+
up_write(&block->cb_lock);
446+
}
447+
413448
static void tcf_chain_put(struct tcf_chain *chain);
414449

415450
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
416451
bool sig_destroy, struct netlink_ext_ack *extack)
417452
{
418453
tp->ops->destroy(tp, rtnl_held, extack);
454+
tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
419455
if (sig_destroy)
420456
tcf_proto_signal_destroyed(tp->chain, tp);
421457
tcf_chain_put(tp->chain);
@@ -2364,6 +2400,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
23642400
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
23652401
flags, extack);
23662402
if (err == 0) {
2403+
tcf_block_filter_cnt_update(block, &tp->counted, true);
23672404
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
23682405
RTM_NEWTFILTER, false, rtnl_held, extack);
23692406
tfilter_put(tp, fh);
@@ -3483,6 +3520,8 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
34833520
if (*flags & TCA_CLS_FLAGS_IN_HW)
34843521
return;
34853522
*flags |= TCA_CLS_FLAGS_IN_HW;
3523+
if (tc_skip_sw(*flags))
3524+
atomic_inc(&block->skipswcnt);
34863525
atomic_inc(&block->offloadcnt);
34873526
}
34883527

@@ -3491,6 +3530,8 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
34913530
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
34923531
return;
34933532
*flags &= ~TCA_CLS_FLAGS_IN_HW;
3533+
if (tc_skip_sw(*flags))
3534+
atomic_dec(&block->skipswcnt);
34943535
atomic_dec(&block->offloadcnt);
34953536
}
34963537

0 commit comments

Comments
 (0)