Skip to content

Commit d6821c5

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter/IPVS fixes for net: 1) Add selftest for vrf+conntrack, from Florian Westphal. 2) Extend nfqueue selftest to cover nfqueue, also from Florian. 3) Remove duplicated include in nft_payload, from Wan Jiabing. 4) Several improvements to the nat port shadowing selftest, from Phil Sutter. 5) Fix filtering of reply tuple in ctnetlink, from Florent Fourcot. 6) Do not override error with -EINVAL in filter setup path, also from Florent. 7) Honor sysctl_expire_nodest_conn regardless conn_reuse_mode for reused connections, from yangxingwu. 8) Replace snprintf() by sysfs_emit() in xt_IDLETIMER as reported by Coccinelle, from Jing Yao. 9) Incorrect IPv6 tunnel match in flowtable offload, from Will Mortensen. 10) Switch port shadow selftest to use socat, from Florian Westphal. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 8d0112a + a2acf0c commit d6821c5

File tree

10 files changed

+309
-26
lines changed

10 files changed

+309
-26
lines changed

Documentation/networking/ipvs-sysctl.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
3737

3838
0: disable any special handling on port reuse. The new
3939
connection will be delivered to the same real server that was
40-
servicing the previous connection. This will effectively
41-
disable expire_nodest_conn.
40+
servicing the previous connection.
4241

4342
bit 1: enable rescheduling of new connections when it is safe.
4443
That is, whenever expire_nodest_conn and for TCP sockets, when

net/netfilter/ipvs/ip_vs_core.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1919,7 +1919,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
19191919
struct ip_vs_proto_data *pd;
19201920
struct ip_vs_conn *cp;
19211921
int ret, pkts;
1922-
int conn_reuse_mode;
19231922
struct sock *sk;
19241923
int af = state->pf;
19251924

@@ -1997,15 +1996,16 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
19971996
cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
19981997
ipvs, af, skb, &iph);
19991998

2000-
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
2001-
if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
1999+
if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
2000+
int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
20022001
bool old_ct = false, resched = false;
20032002

20042003
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
20052004
unlikely(!atomic_read(&cp->dest->weight))) {
20062005
resched = true;
20072006
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
2008-
} else if (is_new_conn_expected(cp, conn_reuse_mode)) {
2007+
} else if (conn_reuse_mode &&
2008+
is_new_conn_expected(cp, conn_reuse_mode)) {
20092009
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
20102010
if (!atomic_read(&cp->n_control)) {
20112011
resched = true;

net/netfilter/nf_conntrack_netlink.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,11 +1011,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
10111011
CTA_TUPLE_REPLY,
10121012
filter->family,
10131013
&filter->zone,
1014-
filter->orig_flags);
1015-
if (err < 0) {
1016-
err = -EINVAL;
1014+
filter->reply_flags);
1015+
if (err < 0)
10171016
goto err_filter;
1018-
}
10191017
}
10201018

10211019
return filter;

net/netfilter/nf_flow_table_offload.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
6565
sizeof(struct in6_addr));
6666
if (memcmp(&key->enc_ipv6.src, &in6addr_any,
6767
sizeof(struct in6_addr)))
68-
memset(&key->enc_ipv6.src, 0xff,
68+
memset(&mask->enc_ipv6.src, 0xff,
6969
sizeof(struct in6_addr));
7070
if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
7171
sizeof(struct in6_addr)))
72-
memset(&key->enc_ipv6.dst, 0xff,
72+
memset(&mask->enc_ipv6.dst, 0xff,
7373
sizeof(struct in6_addr));
7474
enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
7575
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;

net/netfilter/nft_payload.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include <linux/icmpv6.h>
2323
#include <linux/ip.h>
2424
#include <linux/ipv6.h>
25-
#include <linux/ip.h>
2625
#include <net/sctp/checksum.h>
2726

2827
static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,

net/netfilter/xt_IDLETIMER.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev,
8585
mutex_unlock(&list_mutex);
8686

8787
if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
88-
return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
88+
return sysfs_emit(buf, "%ld\n", time_diff);
8989

90-
return snprintf(buf, PAGE_SIZE, "0\n");
90+
return sysfs_emit(buf, "0\n");
9191
}
9292

9393
static void idletimer_tg_work(struct work_struct *work)

tools/testing/selftests/netfilter/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
55
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
66
nft_concat_range.sh nft_conntrack_helper.sh \
77
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
8-
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
8+
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
9+
conntrack_vrf.sh
910

1011
LDLIBS = -lmnl
1112
TEST_GEN_FILES = nf-queue
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#!/bin/sh
2+
3+
# This script demonstrates interaction of conntrack and vrf.
4+
# The vrf driver calls the netfilter hooks again, with oif/iif
5+
# pointing at the VRF device.
6+
#
7+
# For ingress, this means first iteration has iifname of lower/real
8+
# device. In this script, thats veth0.
9+
# Second iteration is iifname set to vrf device, tvrf in this script.
10+
#
11+
# For egress, this is reversed: first iteration has the vrf device,
12+
# second iteration is done with the lower/real/veth0 device.
13+
#
14+
# test_ct_zone_in demonstrates unexpected change of nftables
15+
# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
16+
# connection on VRF rcv"
17+
#
18+
# It was possible to assign conntrack zone to a packet (or mark it for
19+
# `notracking`) in the prerouting chain before conntrack, based on real iif.
20+
#
21+
# After the change, the zone assignment is lost and the zone is assigned based
22+
# on the VRF master interface (in case such a rule exists).
23+
# assignment is lost. Instead, assignment based on the `iif` matching
24+
# Thus it is impossible to distinguish packets based on the original
25+
# interface.
26+
#
27+
# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
28+
# that was supposed to be fixed by the commit mentioned above to make sure
29+
# that any fix to test case 1 won't break masquerade again.
30+
31+
ksft_skip=4
32+
33+
IP0=172.30.30.1
34+
IP1=172.30.30.2
35+
PFXL=30
36+
ret=0
37+
38+
sfx=$(mktemp -u "XXXXXXXX")
39+
ns0="ns0-$sfx"
40+
ns1="ns1-$sfx"
41+
42+
cleanup()
43+
{
44+
ip netns pids $ns0 | xargs kill 2>/dev/null
45+
ip netns pids $ns1 | xargs kill 2>/dev/null
46+
47+
ip netns del $ns0 $ns1
48+
}
49+
50+
nft --version > /dev/null 2>&1
51+
if [ $? -ne 0 ];then
52+
echo "SKIP: Could not run test without nft tool"
53+
exit $ksft_skip
54+
fi
55+
56+
ip -Version > /dev/null 2>&1
57+
if [ $? -ne 0 ];then
58+
echo "SKIP: Could not run test without ip tool"
59+
exit $ksft_skip
60+
fi
61+
62+
ip netns add "$ns0"
63+
if [ $? -ne 0 ];then
64+
echo "SKIP: Could not create net namespace $ns0"
65+
exit $ksft_skip
66+
fi
67+
ip netns add "$ns1"
68+
69+
trap cleanup EXIT
70+
71+
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
72+
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
73+
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
74+
75+
ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
76+
if [ $? -ne 0 ];then
77+
echo "SKIP: Could not add veth device"
78+
exit $ksft_skip
79+
fi
80+
81+
ip -net $ns0 li add tvrf type vrf table 9876
82+
if [ $? -ne 0 ];then
83+
echo "SKIP: Could not add vrf device"
84+
exit $ksft_skip
85+
fi
86+
87+
ip -net $ns0 li set lo up
88+
89+
ip -net $ns0 li set veth0 master tvrf
90+
ip -net $ns0 li set tvrf up
91+
ip -net $ns0 li set veth0 up
92+
ip -net $ns1 li set veth0 up
93+
94+
ip -net $ns0 addr add $IP0/$PFXL dev veth0
95+
ip -net $ns1 addr add $IP1/$PFXL dev veth0
96+
97+
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
98+
if [ $? -ne 0 ];then
99+
echo "SKIP: Could not start iperf3"
100+
exit $ksft_skip
101+
fi
102+
103+
# test vrf ingress handling.
104+
# The incoming connection should be placed in conntrack zone 1,
105+
# as decided by the first iteration of the ruleset.
106+
test_ct_zone_in()
107+
{
108+
ip netns exec $ns0 nft -f - <<EOF
109+
table testct {
110+
chain rawpre {
111+
type filter hook prerouting priority raw;
112+
113+
iif { veth0, tvrf } counter meta nftrace set 1
114+
iif veth0 counter ct zone set 1 counter return
115+
iif tvrf counter ct zone set 2 counter return
116+
ip protocol icmp counter
117+
notrack counter
118+
}
119+
120+
chain rawout {
121+
type filter hook output priority raw;
122+
123+
oif veth0 counter ct zone set 1 counter return
124+
oif tvrf counter ct zone set 2 counter return
125+
notrack counter
126+
}
127+
}
128+
EOF
129+
ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
130+
131+
# should be in zone 1, not zone 2
132+
count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
133+
if [ $count -eq 1 ]; then
134+
echo "PASS: entry found in conntrack zone 1"
135+
else
136+
echo "FAIL: entry not found in conntrack zone 1"
137+
count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
138+
if [ $count -eq 1 ]; then
139+
echo "FAIL: entry found in zone 2 instead"
140+
else
141+
echo "FAIL: entry not in zone 1 or 2, dumping table"
142+
ip netns exec $ns0 conntrack -L
143+
ip netns exec $ns0 nft list ruleset
144+
fi
145+
fi
146+
}
147+
148+
# add masq rule that gets evaluated w. outif set to vrf device.
149+
# This tests the first iteration of the packet through conntrack,
150+
# oifname is the vrf device.
151+
test_masquerade_vrf()
152+
{
153+
ip netns exec $ns0 conntrack -F 2>/dev/null
154+
155+
ip netns exec $ns0 nft -f - <<EOF
156+
flush ruleset
157+
table ip nat {
158+
chain postrouting {
159+
type nat hook postrouting priority 0;
160+
# NB: masquerade should always be combined with 'oif(name) bla',
161+
# lack of this is intentional here, we want to exercise double-snat.
162+
ip saddr 172.30.30.0/30 counter masquerade random
163+
}
164+
}
165+
EOF
166+
ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
167+
if [ $? -ne 0 ]; then
168+
echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
169+
ret=1
170+
return
171+
fi
172+
173+
# must also check that nat table was evaluated on second (lower device) iteration.
174+
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
175+
if [ $? -eq 0 ]; then
176+
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
177+
else
178+
echo "FAIL: vrf masq rule has unexpected counter value"
179+
ret=1
180+
fi
181+
}
182+
183+
# add masq rule that gets evaluated w. outif set to veth device.
184+
# This tests the 2nd iteration of the packet through conntrack,
185+
# oifname is the lower device (veth0 in this case).
186+
test_masquerade_veth()
187+
{
188+
ip netns exec $ns0 conntrack -F 2>/dev/null
189+
ip netns exec $ns0 nft -f - <<EOF
190+
flush ruleset
191+
table ip nat {
192+
chain postrouting {
193+
type nat hook postrouting priority 0;
194+
meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
195+
}
196+
}
197+
EOF
198+
ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
199+
if [ $? -ne 0 ]; then
200+
echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
201+
ret=1
202+
return
203+
fi
204+
205+
# must also check that nat table was evaluated on second (lower device) iteration.
206+
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
207+
if [ $? -eq 0 ]; then
208+
echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
209+
else
210+
echo "FAIL: vrf masq rule has unexpected counter value"
211+
ret=1
212+
fi
213+
}
214+
215+
test_ct_zone_in
216+
test_masquerade_vrf
217+
test_masquerade_veth
218+
219+
exit $ret

tools/testing/selftests/netfilter/nft_nat.sh

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -759,19 +759,21 @@ test_port_shadow()
759759
local result=""
760760
local logmsg=""
761761

762-
echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
763-
nc_r=$!
762+
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
763+
echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
764764

765-
echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
766-
nc_c=$!
765+
echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
766+
sc_r=$!
767767

768-
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
769-
echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
768+
echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
769+
sc_c=$!
770+
771+
sleep 0.3
770772

771773
# ns1 tries to connect to ns0:1405. With default settings this should connect
772774
# to client, it matches the conntrack entry created above.
773775

774-
result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
776+
result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
775777

776778
if [ "$result" = "$expect" ] ;then
777779
echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@ test_port_shadow()
780782
ret=1
781783
fi
782784

783-
kill $nc_r $nc_c 2>/dev/null
785+
kill $sc_r $sc_c 2>/dev/null
784786

785787
# flush udp entries for next test round, if any
786788
ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@ table $family raw {
816818
chain prerouting {
817819
type filter hook prerouting priority -300; policy accept;
818820
meta iif veth0 udp dport 1405 notrack
819-
udp dport 1405 notrack
820821
}
821822
chain output {
822823
type filter hook output priority -300; policy accept;
823-
udp sport 1405 notrack
824+
meta oif veth0 udp sport 1405 notrack
824825
}
825826
}
826827
EOF
@@ -851,6 +852,18 @@ test_port_shadowing()
851852
{
852853
local family="ip"
853854

855+
conntrack -h >/dev/null 2>&1
856+
if [ $? -ne 0 ];then
857+
echo "SKIP: Could not run nat port shadowing test without conntrack tool"
858+
return
859+
fi
860+
861+
socat -h > /dev/null 2>&1
862+
if [ $? -ne 0 ];then
863+
echo "SKIP: Could not run nat port shadowing test without socat tool"
864+
return
865+
fi
866+
854867
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
855868
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
856869

0 commit comments

Comments
 (0)