Skip to content

Commit

Permalink
conntrack: document all-zero IP SNAT behavior and add a test case
Browse files Browse the repository at this point in the history
Currently, conntrack in the kernel has an undocumented feature referred
to as all-zero IP address SNAT. Basically, when a source port
collision is detected during the commit, the source port will be
translated to an ephemeral port. If there is no collision, no SNAT is
performed.

This patchset documents this behavior and adds a self-test to verify
it's not changing. In addition, a datapath feature flag is added for
the all-zero IP SNAT case. This will help applications on top of OVS,
like OVN, to determine this feature can be used.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
  • Loading branch information
chaudron authored and dceara committed Jun 3, 2021
1 parent a4b0427 commit 9d0ec02
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 2 deletions.
8 changes: 8 additions & 0 deletions lib/ct-dpif.c
Expand Up @@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
dpif, tp_id, dl_type, nw_proto, tp_name, is_generic)
: EOPNOTSUPP);
}

int
ct_dpif_get_features(struct dpif *dpif, enum ct_features *features)
{
return (dpif->dpif_class->ct_get_features
? dpif->dpif_class->ct_get_features(dpif, features)
: EOPNOTSUPP);
}
6 changes: 6 additions & 0 deletions lib/ct-dpif.h
Expand Up @@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy {
* timeout attribute values */
};

/* Conntrack Features. */
enum ct_features {
CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */
};

int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
const uint16_t *zone, int *);
int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
Expand Down Expand Up @@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state);
int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id,
uint16_t dl_type, uint8_t nw_proto,
char **tp_name, bool *is_generic);
int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features);

#endif /* CT_DPIF_H */
1 change: 1 addition & 0 deletions lib/dpif-netdev.c
Expand Up @@ -8516,6 +8516,7 @@ const struct dpif_class dpif_netdev_class = {
NULL, /* ct_timeout_policy_dump_next */
NULL, /* ct_timeout_policy_dump_done */
dpif_netdev_ct_get_timeout_policy_name,
NULL, /* ct_get_features */
dpif_netdev_ipf_set_enabled,
dpif_netdev_ipf_set_min_frag,
dpif_netdev_ipf_set_max_nfrags,
Expand Down
11 changes: 11 additions & 0 deletions lib/dpif-netlink.c
Expand Up @@ -3165,6 +3165,16 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
return 0;
}

static int
dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED,
enum ct_features *features)
{
if (features != NULL) {
*features = CONNTRACK_F_ZERO_SNAT;
}
return 0;
}

#define CT_DPIF_NL_TP_TCP_MAPPINGS \
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \
CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \
Expand Down Expand Up @@ -4007,6 +4017,7 @@ const struct dpif_class dpif_netlink_class = {
dpif_netlink_ct_timeout_policy_dump_next,
dpif_netlink_ct_timeout_policy_dump_done,
dpif_netlink_ct_get_timeout_policy_name,
dpif_netlink_ct_get_features,
NULL, /* ipf_set_enabled */
NULL, /* ipf_set_min_frag */
NULL, /* ipf_set_max_nfrags */
Expand Down
5 changes: 5 additions & 0 deletions lib/dpif-provider.h
Expand Up @@ -81,6 +81,7 @@ struct ct_dpif_dump_state;
struct ct_dpif_entry;
struct ct_dpif_tuple;
struct ct_dpif_timeout_policy;
enum ct_features;

/* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in
* sync with 'ipf_proto_status' and 'ipf_status', but more
Expand Down Expand Up @@ -562,6 +563,10 @@ struct dpif_class {
uint16_t dl_type, uint8_t nw_proto,
char **tp_name, bool *is_generic);

/* Stores the conntrack features supported by 'dpif' into features.
* The value is a bitmap of CONNTRACK_F_* bits. */
int (*ct_get_features)(struct dpif *, enum ct_features *features);

/* IP Fragmentation. */

/* Disables or enables conntrack fragment reassembly. The default
Expand Down
10 changes: 10 additions & 0 deletions lib/ovs-actions.xml
Expand Up @@ -2132,6 +2132,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
connection, will behave the same as a bare <code>nat</code>.
</p>

<p>
For SNAT, there is a special case when the <code>src</code> IP
address is configured as all 0's, i.e.,
<code>nat(src=0.0.0.0)</code>. In this case, when a source port
collision is detected during the commit, the source port will be
translated to an ephemeral port. If there is no collision, no SNAT
is performed. Note that this is currently only implemented in the
Linux kernel datapath.
</p>

<p>
Open vSwitch 2.6 introduced <code>nat</code>. Linux 4.6 was the
earliest upstream kernel that implemented <code>ct</code> support for
Expand Down
22 changes: 21 additions & 1 deletion ofproto/ofproto-dpif.c
Expand Up @@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer)
return !error;
}

/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */
static bool
dpif_supports_ct_zero_snat(struct dpif_backer *backer)
{
enum ct_features features;
bool supported = false;

if (!ct_dpif_get_features(backer->dpif, &features)) {
if (features & CONNTRACK_F_ZERO_SNAT) {
supported = true;
}
}
VLOG_INFO("%s: Datapath %s ct_zero_snat",
dpif_name(backer->dpif), (supported) ? "supports"
: "does not support");
return supported;
}

/* Tests whether 'backer''s datapath supports the
* OVS_ACTION_ATTR_CHECK_PKT_LEN action. */
static bool
Expand Down Expand Up @@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer)
backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
backer->rt_support.explicit_drop_action =
dpif_supports_explicit_drop_action(backer->dpif);
backer->rt_support.lb_output_action=
backer->rt_support.lb_output_action =
dpif_supports_lb_output_action(backer->dpif);
backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer);

/* Flow fields. */
backer->rt_support.odp.ct_state = check_ct_state(backer);
Expand Down Expand Up @@ -5605,6 +5624,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap)
smap_add(cap, "explicit_drop_action",
s.explicit_drop_action ? "true" :"false");
smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false");
}

/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
Expand Down
5 changes: 4 additions & 1 deletion ofproto/ofproto-dpif.h
Expand Up @@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \
\
/* True if the datapath supports balance_tcp optimization */ \
DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")
DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\
\
/* True if the datapath supports all-zero IP SNAT. */ \
DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT")


/* Stores the various features which the corresponding backer supports. */
Expand Down
7 changes: 7 additions & 0 deletions tests/system-kmod-macros.at
Expand Up @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
#
m4_define([CHECK_CONNTRACK_NAT])

# CHECK_CONNTRACK_ZEROIP_SNAT()
#
# Perform requirements checks for running conntrack all-zero IP SNAT tests.
# The kernel always supports all-zero IP SNAT, so no check is needed.
#
m4_define([CHECK_CONNTRACK_ZEROIP_SNAT])

# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
Expand Down
46 changes: 46 additions & 0 deletions tests/system-traffic.at
Expand Up @@ -4433,6 +4433,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP


AT_SETUP([conntrack - all-zero IP SNAT])
AT_SKIP_IF([test $HAVE_NC = no])
CHECK_CONNTRACK()
CHECK_CONNTRACK_ZEROIP_SNAT()
OVS_TRAFFIC_VSWITCHD_START()

ADD_NAMESPACES(at_ns0, at_ns1)
ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])

OVS_START_L7([at_ns1], [http])

AT_DATA([flows.txt], [dnl
table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10)
table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
table=0,priority=10,arp,action=normal
table=0,priority=1,action=drop
table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat)
table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
])
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])

dnl - Test to make sure src nat is NOT done when not needed
NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
])

dnl - Test to make sure src nat is done when needed
NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log])
NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], [0], [dnl
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
])

OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP


AT_SETUP([conntrack - simple DNAT])
CHECK_CONNTRACK()
CHECK_CONNTRACK_NAT()
Expand Down
10 changes: 10 additions & 0 deletions tests/system-userspace-macros.at
Expand Up @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
#
m4_define([CHECK_CONNTRACK_NAT])

# CHECK_CONNTRACK_ZEROIP_SNAT()
#
# Perform requirements checks for running conntrack all-zero IP SNAT tests.
# The userspace datapath does not support all-zero IP SNAT.
#
m4_define([CHECK_CONNTRACK_ZEROIP_SNAT],
[
AT_SKIP_IF([:])
])

# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
Expand Down
9 changes: 9 additions & 0 deletions vswitchd/vswitch.xml
Expand Up @@ -6126,6 +6126,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
True if the datapath supports OVS_ACTION_ATTR_DROP. If false,
explicit drop action will not be sent to the datapath.
</column>
<column name="capabilities" key="ct_zero_snat"
type='{"type": "boolean"}'>
True if the datapath supports all-zero SNAT. This is a special case
if the <code>src</code> IP address is configured as all 0's, i.e.,
<code>nat(src=0.0.0.0)</code>. In this case, when a source port
collision is detected during the commit, the source port will be
translated to an ephemeral port. If there is no collision, no SNAT
is performed.
</column>
</group>

<group title="Common Columns">
Expand Down

0 comments on commit 9d0ec02

Please sign in to comment.