Skip to content

Commit d927db9

Browse files
Florian Westphalgregkh
authored andcommitted
netfilter: nft_flowtable.sh: re-run with random mtu sizes
[ Upstream commit d6a367e ] Jakub says: nft_flowtable.sh is one of the most flake-atious test for netdev CI currently :( The root cause is two-fold: 1. the failing part of the test is supposed to make sure that ip fragments are forwarded for offloaded flows. (flowtable has to pass them to classic forward path). path mtu discovery for these subtests is disabled. 2. nft_flowtable.sh has two passes. One with fixed mtus/file size and one where link mtus and file sizes are random. The CI failures all have same pattern: re-run with random mtus and file size: -o 27663 -l 4117 -r 10089 -s 54384840 [..] PASS: dscp_egress: dscp packet counters match FAIL: file mismatch for ns1 -> ns2 In some cases this error triggers a bit ealier, sometimes in a later subtest: re-run with random mtus and file size: -o 20201 -l 4555 -r 12657 -s 9405856 [..] PASS: dscp_egress: dscp packet counters match PASS: dscp_fwd: dscp packet counters match 2025/08/17 20:37:52 socat[18954] E write(7, 0x560716b96000, 8192): Broken pipe FAIL: file mismatch for ns1 -> ns2 -rw------- 1 root root 9405856 Aug 17 20:36 /tmp/tmp.2n63vlTrQe But all logs I saw show same scenario: 1. Failing tests have pmtu discovery off (i.e., ip fragmentation) 2. The test file is much larger than first-pass default (2M Byte) 3. peers have much larger MTUs compared to the 'network'. These errors are very reproducible when re-running the test with the same commandline arguments. The timeout became much more prominent with 1d2fbaa ("tcp: stronger sk_rcvbuf checks"): reassembled packets typically have a skb->truesize more than double the skb length. As that commit is intentional and pmtud-off with large-tcp-packets-as-fragments is not normal adjust the test to use a smaller file for the pmtu-off subtests. While at it, add more information to pass/fail messages and also run the dscp alteration subtest with pmtu discovery enabled. Link: https://netdev.bots.linux.dev/contest.html?test=nft-flowtable-sh Fixes: f84ab63 ("selftests: netfilter: nft_flowtable.sh: re-run with random mtu sizes") Reported-by: Jakub Kicinski <kuba@kernel.org> Closes: https://lore.kernel.org/netdev/20250822071330.4168f0db@kernel.org/ Signed-off-by: Florian Westphal <fw@strlen.de> Link: https://patch.msgid.link/20250828214918.3385-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 3dff390 commit d927db9

File tree

1 file changed

+76
-37
lines changed

1 file changed

+76
-37
lines changed

tools/testing/selftests/net/netfilter/nft_flowtable.sh

Lines changed: 76 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ ret=0
2020
SOCAT_TIMEOUT=60
2121

2222
nsin=""
23+
nsin_small=""
2324
ns1out=""
2425
ns2out=""
2526

@@ -36,7 +37,7 @@ cleanup() {
3637

3738
cleanup_all_ns
3839

39-
rm -f "$nsin" "$ns1out" "$ns2out"
40+
rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
4041

4142
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
4243
}
@@ -72,6 +73,7 @@ lmtu=1500
7273
rmtu=2000
7374

7475
filesize=$((2 * 1024 * 1024))
76+
filesize_small=$((filesize / 16))
7577

7678
usage(){
7779
echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@ do
8991
o) omtu=$OPTARG;;
9092
l) lmtu=$OPTARG;;
9193
r) rmtu=$OPTARG;;
92-
s) filesize=$OPTARG;;
94+
s)
95+
filesize=$OPTARG
96+
filesize_small=$((OPTARG / 16))
97+
;;
9398
*) usage;;
9499
esac
95100
done
@@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
215220
fi
216221

217222
nsin=$(mktemp)
223+
nsin_small=$(mktemp)
218224
ns1out=$(mktemp)
219225
ns2out=$(mktemp)
220226

@@ -265,6 +271,7 @@ check_counters()
265271
check_dscp()
266272
{
267273
local what=$1
274+
local pmtud="$2"
268275
local ok=1
269276

270277
local counter
@@ -277,37 +284,39 @@ check_dscp()
277284
local pc4z=${counter%*bytes*}
278285
local pc4z=${pc4z#*packets}
279286

287+
local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
288+
280289
case "$what" in
281290
"dscp_none")
282291
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
283-
echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
292+
echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
284293
ret=1
285294
ok=0
286295
fi
287296
;;
288297
"dscp_fwd")
289298
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
290-
echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
299+
echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
291300
ret=1
292301
ok=0
293302
fi
294303
;;
295304
"dscp_ingress")
296305
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
297-
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
306+
echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
298307
ret=1
299308
ok=0
300309
fi
301310
;;
302311
"dscp_egress")
303312
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
304-
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
313+
echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
305314
ret=1
306315
ok=0
307316
fi
308317
;;
309318
*)
310-
echo "FAIL: Unknown DSCP check" 1>&2
319+
echo "$failmsg: Unknown DSCP check" 1>&2
311320
ret=1
312321
ok=0
313322
esac
@@ -319,9 +328,9 @@ check_dscp()
319328

320329
check_transfer()
321330
{
322-
in=$1
323-
out=$2
324-
what=$3
331+
local in=$1
332+
local out=$2
333+
local what=$3
325334

326335
if ! cmp "$in" "$out" > /dev/null 2>&1; then
327336
echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +351,39 @@ test_tcp_forwarding_ip()
342351
{
343352
local nsa=$1
344353
local nsb=$2
345-
local dstip=$3
346-
local dstport=$4
354+
local pmtu=$3
355+
local dstip=$4
356+
local dstport=$5
347357
local lret=0
358+
local socatc
359+
local socatl
360+
local infile="$nsin"
361+
362+
if [ $pmtu -eq 0 ]; then
363+
infile="$nsin_small"
364+
fi
348365

349-
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
366+
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
350367
lpid=$!
351368

352369
busywait 1000 listener_ready
353370

354-
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
371+
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
372+
socatc=$?
355373

356374
wait $lpid
375+
socatl=$?
357376

358-
if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
377+
if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
378+
rc=1
379+
fi
380+
381+
if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
359382
lret=1
360383
ret=1
361384
fi
362385

363-
if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
386+
if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
364387
lret=1
365388
ret=1
366389
fi
@@ -370,14 +393,16 @@ test_tcp_forwarding_ip()
370393

371394
test_tcp_forwarding()
372395
{
373-
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
396+
local pmtu="$3"
397+
398+
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
374399

375400
return $?
376401
}
377402

378403
test_tcp_forwarding_set_dscp()
379404
{
380-
check_dscp "dscp_none"
405+
local pmtu="$3"
381406

382407
ip netns exec "$nsr1" nft -f - <<EOF
383408
table netdev dscpmangle {
@@ -388,8 +413,8 @@ table netdev dscpmangle {
388413
}
389414
EOF
390415
if [ $? -eq 0 ]; then
391-
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
392-
check_dscp "dscp_ingress"
416+
test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
417+
check_dscp "dscp_ingress" "$pmtu"
393418

394419
ip netns exec "$nsr1" nft delete table netdev dscpmangle
395420
else
@@ -405,59 +430,64 @@ table netdev dscpmangle {
405430
}
406431
EOF
407432
if [ $? -eq 0 ]; then
408-
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
409-
check_dscp "dscp_egress"
433+
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
434+
check_dscp "dscp_egress" "$pmtu"
410435

411-
ip netns exec "$nsr1" nft flush table netdev dscpmangle
436+
ip netns exec "$nsr1" nft delete table netdev dscpmangle
412437
else
413438
echo "SKIP: Could not load netdev:egress for veth1"
414439
fi
415440

416441
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
417442
# counters should have seen packets (before and after ft offload kicks in).
418443
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
419-
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
420-
check_dscp "dscp_fwd"
444+
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
445+
check_dscp "dscp_fwd" "$pmtu"
421446
}
422447

423448
test_tcp_forwarding_nat()
424449
{
450+
local nsa="$1"
451+
local nsb="$2"
452+
local pmtu="$3"
453+
local what="$4"
425454
local lret
426-
local pmtu
427455

428-
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
429-
lret=$?
456+
[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
430457

431-
pmtu=$3
432-
what=$4
458+
test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
459+
lret=$?
433460

434461
if [ "$lret" -eq 0 ] ; then
435462
if [ "$pmtu" -eq 1 ] ;then
436-
check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
463+
check_counters "flow offload for ns1/ns2 with masquerade $what"
437464
else
438465
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
439466
fi
440467

441-
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
468+
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
442469
lret=$?
443470
if [ "$pmtu" -eq 1 ] ;then
444-
check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
471+
check_counters "flow offload for ns1/ns2 with dnat $what"
445472
elif [ "$lret" -eq 0 ] ; then
446473
echo "PASS: flow offload for ns1/ns2 with dnat $what"
447474
fi
475+
else
476+
echo "FAIL: flow offload for ns1/ns2 with dnat $what"
448477
fi
449478

450479
return $lret
451480
}
452481

453482
make_file "$nsin" "$filesize"
483+
make_file "$nsin_small" "$filesize_small"
454484

455485
# First test:
456486
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
457487
# Due to MTU mismatch in both directions, all packets (except small packets like pure
458488
# acks) have to be handled by normal forwarding path. Therefore, packet counters
459489
# are not checked.
460-
if test_tcp_forwarding "$ns1" "$ns2"; then
490+
if test_tcp_forwarding "$ns1" "$ns2" 0; then
461491
echo "PASS: flow offloaded for ns1/ns2"
462492
else
463493
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -489,8 +519,9 @@ table ip nat {
489519
}
490520
EOF
491521

522+
check_dscp "dscp_none" "0"
492523
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
493-
echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
524+
echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
494525
exit 0
495526
fi
496527

@@ -512,6 +543,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
512543
# are lower than file size and packets were forwarded via flowtable layer.
513544
# For earlier tests (large mtus), packets cannot be handled via flowtable
514545
# (except pure acks and other small packets).
546+
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
547+
ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
548+
549+
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
550+
echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
551+
exit 0
552+
fi
553+
515554
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
516555

517556
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
@@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
644683
ip -net "$ns2" route add default via 10.0.2.1
645684
ip -net "$ns2" route add default via dead:2::1
646685

647-
if test_tcp_forwarding "$ns1" "$ns2"; then
686+
if test_tcp_forwarding "$ns1" "$ns2" 1; then
648687
check_counters "ipsec tunnel mode for ns1/ns2"
649688
else
650689
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -668,7 +707,7 @@ if [ "$1" = "" ]; then
668707
fi
669708

670709
echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
671-
$0 -o "$o" -l "$l" -r "$r" -s "$filesize"
710+
$0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
672711
fi
673712

674713
exit $ret

0 commit comments

Comments
 (0)