Skip to content

Commit 983f507

Browse files
committed
Merge branch 'net-support-ipv4-big-tcp'
Xin Long says: ==================== net: support ipv4 big tcp This is similar to the BIG TCP patchset added by Eric for IPv6: https://lwn.net/Articles/895398/ Different from IPv6, IPv4 tot_len is 16-bit long only, and IPv4 header doesn't have exthdrs(options) for the BIG TCP packets' length. To make it simple, as David and Paolo suggested, we set IPv4 tot_len to 0 to indicate this might be a BIG TCP packet and use skb->len as the real IPv4 total length. This will work safely, as all BIG TCP packets are GSO/GRO packets and processed on the same host as they were created; There is no padding in GSO/GRO packets, and skb->len - network_offset is exactly the IPv4 packet total length; Also, before implementing the feature, all those places that may get iph tot_len from BIG TCP packets are taken care with some new APIs: Patch 1 adds some APIs for iph tot_len setting and getting, which are used in all these places where IPv4 BIG TCP packets may reach in Patch 2-7, Patch 8 adds a GSO_TCP tp_status for af_packet users, and Patch 9 add new netlink attributes to make IPv4 BIG TCP independent from IPv6 BIG TCP on configuration, and Patch 10 implements this feature. Note that the similar change as in Patch 2-6 are also needed for IPv6 BIG TCP packets, and will be addressed in another patchset. The similar performance test is done for IPv4 BIG TCP with 25Gbit NIC and 1.5K MTU: No BIG TCP: for i in {1..10}; do netperf -t TCP_RR -H 192.168.100.1 -- -r80000,80000 -O MIN_LATENCY,P90_LATENCY,P99_LATENCY,THROUGHPUT|tail -1; done 168 322 337 3776.49 143 236 277 4654.67 128 258 288 4772.83 171 229 278 4645.77 175 228 243 4678.93 149 239 279 4599.86 164 234 268 4606.94 155 276 289 4235.82 180 255 268 4418.95 168 241 249 4417.82 Enable BIG TCP: ip link set dev ens1f0np0 gro_ipv4_max_size 128000 gso_ipv4_max_size 128000 for i in {1..10}; do netperf -t TCP_RR -H 192.168.100.1 -- -r80000,80000 -O MIN_LATENCY,P90_LATENCY,P99_LATENCY,THROUGHPUT|tail -1; done 161 241 252 4821.73 174 205 217 5098.28 167 208 220 5001.43 164 228 249 4883.98 150 233 249 4914.90 180 233 244 4819.66 154 208 219 5004.92 157 209 247 4999.78 160 218 246 4842.31 174 206 217 5080.99 Thanks for the feedback from Eric and David Ahern. ==================== Link: https://lore.kernel.org/r/cover.1674921359.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents d8673af + b1a78b9 commit 983f507

File tree

25 files changed

+130
-38
lines changed

25 files changed

+130
-38
lines changed

drivers/net/ipvlan/ipvlan_core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
157157
return NULL;
158158

159159
ip4h = ip_hdr(skb);
160-
pktlen = ntohs(ip4h->tot_len);
160+
pktlen = skb_ip_totlen(skb);
161161
if (ip4h->ihl < 5 || ip4h->version != 4)
162162
return NULL;
163163
if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))

include/linux/ip.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,25 @@ static inline unsigned int ip_transport_len(const struct sk_buff *skb)
3535
{
3636
return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb);
3737
}
38+
39+
static inline unsigned int iph_totlen(const struct sk_buff *skb, const struct iphdr *iph)
40+
{
41+
u32 len = ntohs(iph->tot_len);
42+
43+
return (len || !skb_is_gso(skb) || !skb_is_gso_tcp(skb)) ?
44+
len : skb->len - skb_network_offset(skb);
45+
}
46+
47+
static inline unsigned int skb_ip_totlen(const struct sk_buff *skb)
48+
{
49+
return iph_totlen(skb, ip_hdr(skb));
50+
}
51+
52+
/* IPv4 datagram length is stored into 16bit field (tot_len) */
53+
#define IP_MAX_MTU 0xFFFFU
54+
55+
static inline void iph_set_totlen(struct iphdr *iph, unsigned int len)
56+
{
57+
iph->tot_len = len <= IP_MAX_MTU ? htons(len) : 0;
58+
}
3859
#endif /* _LINUX_IP_H */

include/linux/netdevice.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,8 @@ enum netdev_ml_priv_type {
19641964
* @gso_max_segs: Maximum number of segments that can be passed to the
19651965
* NIC for GSO
19661966
* @tso_max_segs: Device (as in HW) limit on the max TSO segment count
1967+
* @gso_ipv4_max_size: Maximum size of generic segmentation offload,
1968+
* for IPv4.
19671969
*
19681970
* @dcbnl_ops: Data Center Bridging netlink ops
19691971
* @num_tc: Number of traffic classes in the net device
@@ -2004,6 +2006,8 @@ enum netdev_ml_priv_type {
20042006
* keep a list of interfaces to be deleted.
20052007
* @gro_max_size: Maximum size of aggregated packet in generic
20062008
* receive offload (GRO)
2009+
* @gro_ipv4_max_size: Maximum size of aggregated packet in generic
2010+
* receive offload (GRO), for IPv4.
20072011
*
20082012
* @dev_addr_shadow: Copy of @dev_addr to catch direct writes.
20092013
* @linkwatch_dev_tracker: refcount tracker used by linkwatch.
@@ -2207,6 +2211,7 @@ struct net_device {
22072211
*/
22082212
#define GRO_MAX_SIZE (8 * 65535u)
22092213
unsigned int gro_max_size;
2214+
unsigned int gro_ipv4_max_size;
22102215
rx_handler_func_t __rcu *rx_handler;
22112216
void __rcu *rx_handler_data;
22122217

@@ -2330,6 +2335,7 @@ struct net_device {
23302335
u16 gso_max_segs;
23312336
#define TSO_MAX_SEGS U16_MAX
23322337
u16 tso_max_segs;
2338+
unsigned int gso_ipv4_max_size;
23332339

23342340
#ifdef CONFIG_DCB
23352341
const struct dcbnl_rtnl_ops *dcbnl_ops;

include/net/netfilter/nf_tables_ipv4.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
2929
if (iph->ihl < 5 || iph->version != 4)
3030
return -1;
3131

32-
len = ntohs(iph->tot_len);
32+
len = iph_totlen(pkt->skb, iph);
3333
thoff = iph->ihl * 4;
3434
if (pkt->skb->len < len)
3535
return -1;
@@ -64,7 +64,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
6464
if (iph->ihl < 5 || iph->version != 4)
6565
goto inhdr_error;
6666

67-
len = ntohs(iph->tot_len);
67+
len = iph_totlen(pkt->skb, iph);
6868
thoff = iph->ihl * 4;
6969
if (pkt->skb->len < len) {
7070
__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);

include/net/route.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@
3535
#include <linux/cache.h>
3636
#include <linux/security.h>
3737

38-
/* IPv4 datagram length is stored into 16bit field (tot_len) */
39-
#define IP_MAX_MTU 0xFFFFU
40-
4138
#define RTO_ONLINK 0x01
4239

4340
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))

include/uapi/linux/if_link.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,9 @@ enum {
374374

375375
IFLA_DEVLINK_PORT,
376376

377+
IFLA_GSO_IPV4_MAX_SIZE,
378+
IFLA_GRO_IPV4_MAX_SIZE,
379+
377380
__IFLA_MAX
378381
};
379382

include/uapi/linux/if_packet.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ struct tpacket_auxdata {
115115
#define TP_STATUS_BLK_TMO (1 << 5)
116116
#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */
117117
#define TP_STATUS_CSUM_VALID (1 << 7)
118+
#define TP_STATUS_GSO_TCP (1 << 8)
118119

119120
/* Tx ring - header status */
120121
#define TP_STATUS_AVAILABLE 0

net/bridge/br_netfilter_hooks.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
214214
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
215215
goto csum_error;
216216

217-
len = ntohs(iph->tot_len);
217+
len = skb_ip_totlen(skb);
218218
if (skb->len < len) {
219219
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
220220
goto drop;

net/bridge/netfilter/nf_conntrack_bridge.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ static int nf_ct_br_ip_check(const struct sk_buff *skb)
212212
iph->version != 4)
213213
return -1;
214214

215-
len = ntohs(iph->tot_len);
215+
len = skb_ip_totlen(skb);
216216
if (skb->len < nhoff + len ||
217217
len < (iph->ihl * 4))
218218
return -1;
@@ -256,7 +256,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
256256
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
257257
return NF_ACCEPT;
258258

259-
len = ntohs(ip_hdr(skb)->tot_len);
259+
len = skb_ip_totlen(skb);
260260
if (pskb_trim_rcsum(skb, len))
261261
return NF_ACCEPT;
262262

net/core/dev.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,6 +3001,8 @@ void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
30013001
dev->tso_max_size = min(GSO_MAX_SIZE, size);
30023002
if (size < READ_ONCE(dev->gso_max_size))
30033003
netif_set_gso_max_size(dev, size);
3004+
if (size < READ_ONCE(dev->gso_ipv4_max_size))
3005+
netif_set_gso_ipv4_max_size(dev, size);
30043006
}
30053007
EXPORT_SYMBOL(netif_set_tso_max_size);
30063008

@@ -10614,6 +10616,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
1061410616
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
1061510617
dev->gso_max_segs = GSO_MAX_SEGS;
1061610618
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
10619+
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
10620+
dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
1061710621
dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
1061810622
dev->tso_max_segs = TSO_MAX_SEGS;
1061910623
dev->upper_level = 1;

0 commit comments

Comments
 (0)