Skip to content

Commit 43a71cd

Browse files
xli98kuba-moo
authored andcommitted
net-device: reorganize net_device fast path variables
Reorganize fast path variables on tx-txrx-rx order Fastpath variables end after npinfo. Below data generated with pahole on x86 architecture. Fast path variables span cache lines before change: 12 Fast path variables span cache lines after change: 4 Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Coco Li <lixiaoyan@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/20231204201232.520025-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 5aa00e9 commit 43a71cd

File tree

2 files changed

+120
-53
lines changed

2 files changed

+120
-53
lines changed

include/linux/netdevice.h

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,6 +2097,70 @@ enum netdev_stat_type {
20972097
*/
20982098

20992099
struct net_device {
2100+
/* Cacheline organization can be found documented in
2101+
* Documentation/networking/net_cachelines/net_device.rst.
2102+
* Please update the document when adding new fields.
2103+
*/
2104+
2105+
/* TX read-mostly hotpath */
2106+
__cacheline_group_begin(net_device_read_tx);
2107+
unsigned long long priv_flags;
2108+
const struct net_device_ops *netdev_ops;
2109+
const struct header_ops *header_ops;
2110+
struct netdev_queue *_tx;
2111+
unsigned int real_num_tx_queues;
2112+
unsigned int gso_max_size;
2113+
unsigned int gso_ipv4_max_size;
2114+
u16 gso_max_segs;
2115+
s16 num_tc;
2116+
/* Note : dev->mtu is often read without holding a lock.
2117+
* Writers usually hold RTNL.
2118+
* It is recommended to use READ_ONCE() to annotate the reads,
2119+
* and to use WRITE_ONCE() to annotate the writes.
2120+
*/
2121+
unsigned int mtu;
2122+
unsigned short needed_headroom;
2123+
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
2124+
#ifdef CONFIG_XPS
2125+
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
2126+
#endif
2127+
#ifdef CONFIG_NETFILTER_EGRESS
2128+
struct nf_hook_entries __rcu *nf_hooks_egress;
2129+
#endif
2130+
#ifdef CONFIG_NET_XGRESS
2131+
struct bpf_mprog_entry __rcu *tcx_egress;
2132+
#endif
2133+
__cacheline_group_end(net_device_read_tx);
2134+
2135+
/* TXRX read-mostly hotpath */
2136+
__cacheline_group_begin(net_device_read_txrx);
2137+
unsigned int flags;
2138+
unsigned short hard_header_len;
2139+
netdev_features_t features;
2140+
struct inet6_dev __rcu *ip6_ptr;
2141+
__cacheline_group_end(net_device_read_txrx);
2142+
2143+
/* RX read-mostly hotpath */
2144+
__cacheline_group_begin(net_device_read_rx);
2145+
struct list_head ptype_specific;
2146+
int ifindex;
2147+
unsigned int real_num_rx_queues;
2148+
struct netdev_rx_queue *_rx;
2149+
unsigned long gro_flush_timeout;
2150+
int napi_defer_hard_irqs;
2151+
unsigned int gro_max_size;
2152+
unsigned int gro_ipv4_max_size;
2153+
rx_handler_func_t __rcu *rx_handler;
2154+
void __rcu *rx_handler_data;
2155+
possible_net_t nd_net;
2156+
#ifdef CONFIG_NETPOLL
2157+
struct netpoll_info __rcu *npinfo;
2158+
#endif
2159+
#ifdef CONFIG_NET_XGRESS
2160+
struct bpf_mprog_entry __rcu *tcx_ingress;
2161+
#endif
2162+
__cacheline_group_end(net_device_read_rx);
2163+
21002164
char name[IFNAMSIZ];
21012165
struct netdev_name_node *name_node;
21022166
struct dev_ifalias __rcu *ifalias;
@@ -2121,34 +2185,20 @@ struct net_device {
21212185
struct list_head unreg_list;
21222186
struct list_head close_list;
21232187
struct list_head ptype_all;
2124-
struct list_head ptype_specific;
21252188

21262189
struct {
21272190
struct list_head upper;
21282191
struct list_head lower;
21292192
} adj_list;
21302193

21312194
/* Read-mostly cache-line for fast-path access */
2132-
unsigned int flags;
21332195
xdp_features_t xdp_features;
2134-
unsigned long long priv_flags;
2135-
const struct net_device_ops *netdev_ops;
21362196
const struct xdp_metadata_ops *xdp_metadata_ops;
21372197
const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
2138-
int ifindex;
21392198
unsigned short gflags;
2140-
unsigned short hard_header_len;
21412199

2142-
/* Note : dev->mtu is often read without holding a lock.
2143-
* Writers usually hold RTNL.
2144-
* It is recommended to use READ_ONCE() to annotate the reads,
2145-
* and to use WRITE_ONCE() to annotate the writes.
2146-
*/
2147-
unsigned int mtu;
2148-
unsigned short needed_headroom;
21492200
unsigned short needed_tailroom;
21502201

2151-
netdev_features_t features;
21522202
netdev_features_t hw_features;
21532203
netdev_features_t wanted_features;
21542204
netdev_features_t vlan_features;
@@ -2192,8 +2242,6 @@ struct net_device {
21922242
const struct tlsdev_ops *tlsdev_ops;
21932243
#endif
21942244

2195-
const struct header_ops *header_ops;
2196-
21972245
unsigned char operstate;
21982246
unsigned char link_mode;
21992247

@@ -2234,9 +2282,7 @@ struct net_device {
22342282

22352283

22362284
/* Protocol-specific pointers */
2237-
22382285
struct in_device __rcu *ip_ptr;
2239-
struct inet6_dev __rcu *ip6_ptr;
22402286
#if IS_ENABLED(CONFIG_VLAN_8021Q)
22412287
struct vlan_info __rcu *vlan_info;
22422288
#endif
@@ -2271,26 +2317,14 @@ struct net_device {
22712317
/* Interface address info used in eth_type_trans() */
22722318
const unsigned char *dev_addr;
22732319

2274-
struct netdev_rx_queue *_rx;
22752320
unsigned int num_rx_queues;
2276-
unsigned int real_num_rx_queues;
2277-
22782321
struct bpf_prog __rcu *xdp_prog;
2279-
unsigned long gro_flush_timeout;
2280-
int napi_defer_hard_irqs;
22812322
#define GRO_LEGACY_MAX_SIZE 65536u
22822323
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
22832324
* and shinfo->gso_segs is a 16bit field.
22842325
*/
22852326
#define GRO_MAX_SIZE (8 * 65535u)
2286-
unsigned int gro_max_size;
2287-
unsigned int gro_ipv4_max_size;
22882327
unsigned int xdp_zc_max_segs;
2289-
rx_handler_func_t __rcu *rx_handler;
2290-
void __rcu *rx_handler_data;
2291-
#ifdef CONFIG_NET_XGRESS
2292-
struct bpf_mprog_entry __rcu *tcx_ingress;
2293-
#endif
22942328
struct netdev_queue __rcu *ingress_queue;
22952329
#ifdef CONFIG_NETFILTER_INGRESS
22962330
struct nf_hook_entries __rcu *nf_hooks_ingress;
@@ -2305,25 +2339,13 @@ struct net_device {
23052339
/*
23062340
* Cache lines mostly used on transmit path
23072341
*/
2308-
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
23092342
unsigned int num_tx_queues;
2310-
unsigned int real_num_tx_queues;
23112343
struct Qdisc __rcu *qdisc;
23122344
unsigned int tx_queue_len;
23132345
spinlock_t tx_global_lock;
23142346

23152347
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
23162348

2317-
#ifdef CONFIG_XPS
2318-
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
2319-
#endif
2320-
#ifdef CONFIG_NET_XGRESS
2321-
struct bpf_mprog_entry __rcu *tcx_egress;
2322-
#endif
2323-
#ifdef CONFIG_NETFILTER_EGRESS
2324-
struct nf_hook_entries __rcu *nf_hooks_egress;
2325-
#endif
2326-
23272349
#ifdef CONFIG_NET_SCHED
23282350
DECLARE_HASHTABLE (qdisc_hash, 4);
23292351
#endif
@@ -2362,12 +2384,6 @@ struct net_device {
23622384
bool needs_free_netdev;
23632385
void (*priv_destructor)(struct net_device *dev);
23642386

2365-
#ifdef CONFIG_NETPOLL
2366-
struct netpoll_info __rcu *npinfo;
2367-
#endif
2368-
2369-
possible_net_t nd_net;
2370-
23712387
/* mid-layer private */
23722388
void *ml_priv;
23732389
enum netdev_ml_priv_type ml_priv_type;
@@ -2402,20 +2418,15 @@ struct net_device {
24022418
*/
24032419
#define GSO_MAX_SIZE (8 * GSO_MAX_SEGS)
24042420

2405-
unsigned int gso_max_size;
24062421
#define TSO_LEGACY_MAX_SIZE 65536
24072422
#define TSO_MAX_SIZE UINT_MAX
24082423
unsigned int tso_max_size;
2409-
u16 gso_max_segs;
24102424
#define TSO_MAX_SEGS U16_MAX
24112425
u16 tso_max_segs;
2412-
unsigned int gso_ipv4_max_size;
24132426

24142427
#ifdef CONFIG_DCB
24152428
const struct dcbnl_rtnl_ops *dcbnl_ops;
24162429
#endif
2417-
s16 num_tc;
2418-
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
24192430
u8 prio_tc_map[TC_BITMASK + 1];
24202431

24212432
#if IS_ENABLED(CONFIG_FCOE)

net/core/dev.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11609,6 +11609,60 @@ static struct pernet_operations __net_initdata default_device_ops = {
1160911609
.exit_batch = default_device_exit_batch,
1161011610
};
1161111611

11612+
static void __init net_dev_struct_check(void)
11613+
{
11614+
/* TX read-mostly hotpath */
11615+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
11616+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
11617+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
11618+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
11619+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
11620+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
11621+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
11622+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
11623+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
11624+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
11625+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
11626+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
11627+
#ifdef CONFIG_XPS
11628+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
11629+
#endif
11630+
#ifdef CONFIG_NETFILTER_EGRESS
11631+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
11632+
#endif
11633+
#ifdef CONFIG_NET_XGRESS
11634+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
11635+
#endif
11636+
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152);
11637+
11638+
/* TXRX read-mostly hotpath */
11639+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
11640+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
11641+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
11642+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
11643+
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
11644+
11645+
/* RX read-mostly hotpath */
11646+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
11647+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
11648+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
11649+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
11650+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
11651+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
11652+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
11653+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
11654+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
11655+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
11656+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
11657+
#ifdef CONFIG_NETPOLL
11658+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
11659+
#endif
11660+
#ifdef CONFIG_NET_XGRESS
11661+
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
11662+
#endif
11663+
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96);
11664+
}
11665+
1161211666
/*
1161311667
* Initialize the DEV module. At boot time this walks the device list and
1161411668
* unhooks any devices that fail to initialise (normally hardware not
@@ -11626,6 +11680,8 @@ static int __init net_dev_init(void)
1162611680

1162711681
BUG_ON(!dev_boot_phase);
1162811682

11683+
net_dev_struct_check();
11684+
1162911685
if (dev_proc_init())
1163011686
goto out;
1163111687

0 commit comments

Comments
 (0)