Skip to content

Commit

Permalink
ofproto-dpif-xlate: Fix tun_metadata match after recirc
Browse files Browse the repository at this point in the history
Consider the following OpenFlow rules that match on tun_metadata0 after
recirculation.  If we start ICMP flow with tun_metadata0=0x1 follow by
a flow with tun_metadata0=0x3, OVS will incorrectly match the second
flow with the tun_metadata0=0x1 rule.

table=0, in_port=gnv1, icmp, action=ct(table=1)
table=0, in_port=gnv0, icmp  action=ct(table=1)
table=1, in_port=gnv1, icmp, action=output:gnv0
table=1, in_port=gnv0, icmp, ct_state=+trk+new, action=ct(commit),output:gnv1
table=1, in_port=gnv0, icmp, ct_state=+trk+est, tun_metadata0=0x1 action=output:gnv1
table=1, in_port=gnv0, icmp, ct_state=+trk+est, tun_metadata0=0x3 action=output:gnv1

The root cause of this issue is because during recirculation, OVS will overwrite
the tun_metadata0 with the one stored in the frozen state.  This will result in
erroneous behavior if tun_metadata0 is wildcarded in the megaflow as shown below.
Notice that both the second and the third megaflow both carry 0x1 in
tun_metadata0.

recirc_id(0),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,geneve({}{}{}),
flags(-df-csum+key)),in_port(5),eth(),eth_type(0x0800),ipv4(proto=1,frag=no),
packets:213, bytes:20774, used:0.672s, actions:ct,recirc(0x4)

recirc_id(0x4),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x3}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:98, bytes:9506, used:0.992s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

recirc_id(0x4),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x1}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:112, bytes:10976, used:0.672s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

It seems to be two ways to fix this issue, one is to unwildcarded all the
tunnel metadata when OVS generates megaflow with recirculation.  The other
one is to add a bool flag in the frozen state, and ask OVS to honor the tunnel
metadata from the packet instead of the from the frozen state. Since the
first approach will increase the number of megaflows and may incur performance
impact, this patch takes the second approach.  With this patch, the megaflows
are as following:

recirc_id(0),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({}{}{}),flags(-df-csum+key)),in_port(5),eth(),eth_type(0x0800),
ipv4(proto=1,frag=no), packets:60, bytes:5850, used:0.004s,
actions:ct,recirc(0x8)

recirc_id(0x8),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x1}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:29, bytes:2842, used:0.356s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

recirc_id(0x8),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x3}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk), eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:28, bytes:2716, used:0.004s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x3}),flags(df|key))),5

VMWare-BZ: 2617696
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
  • Loading branch information
YiHungWei committed Aug 19, 2020
1 parent 5601e86 commit 2c5d0f3
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 4 deletions.
8 changes: 6 additions & 2 deletions ofproto/ofproto-dpif-rid.h
Expand Up @@ -131,9 +131,12 @@ frozen_metadata_from_flow(struct frozen_metadata *md,
static inline void
frozen_metadata_to_flow(struct ofproto *ofproto,
const struct frozen_metadata *md,
struct flow *flow)
struct flow *flow,
bool from_tunnel)
{
flow->tunnel = md->tunnel;
if (!from_tunnel) {
flow->tunnel = md->tunnel;
}
flow->tunnel.metadata.tab = ofproto_get_tun_tab(ofproto);
flow->metadata = md->metadata;
memcpy(flow->regs, md->regs, sizeof flow->regs);
Expand All @@ -153,6 +156,7 @@ struct frozen_state {
size_t stack_size;
mirror_mask_t mirrors; /* Mirrors already output. */
bool conntracked; /* Conntrack occurred prior to freeze. */
bool from_tunnel; /* Flow is from a tunnel port. */
bool was_mpls; /* MPLS packet */
struct uuid xport_uuid; /* UUID of 1st port packet received on. */

Expand Down
16 changes: 15 additions & 1 deletion ofproto/ofproto-dpif-upcall.c
Expand Up @@ -1536,8 +1536,22 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
flow_clear_conntrack(&frozen_flow);
}

/* Packet-in message expects tunnel metadata in non-udpif
* format. */
if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
const struct tun_table *tun_tab =
ofproto_get_tun_tab(&upcall->ofproto->up);
int err =
tun_metadata_from_geneve_udpif(tun_tab, &flow->tunnel,
&flow->tunnel,
&frozen_flow.tunnel);
if (err) {
return err;
}
}

frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
&frozen_flow);
&frozen_flow, state->from_tunnel);
flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);

ofproto_dpif_send_async_msg(upcall->ofproto, am);
Expand Down
16 changes: 15 additions & 1 deletion ofproto/ofproto-dpif-xlate.c
Expand Up @@ -396,6 +396,11 @@ struct xlate_ctx {
* state from the datapath should be honored after thawing. */
bool conntracked;

/* True if a packet is form a tunnel port. This is used to determine
* whether tunnel information from the datapath should be honored after
* thawing. */
bool from_tunnel;

/* Pointer to an embedded NAT action in a conntrack action, or NULL. */
struct ofpact_nat *ct_nat_action;

Expand Down Expand Up @@ -4858,6 +4863,7 @@ xlate_controller_action(struct xlate_ctx *ctx, int len,
.stack_size = ctx->stack.size,
.mirrors = ctx->mirrors,
.conntracked = ctx->conntracked,
.from_tunnel = ctx->from_tunnel,
.was_mpls = ctx->was_mpls,
.ofpacts = NULL,
.ofpacts_len = 0,
Expand Down Expand Up @@ -4933,6 +4939,7 @@ finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
.stack_size = ctx->stack.size,
.mirrors = ctx->mirrors,
.conntracked = ctx->conntracked,
.from_tunnel = ctx->from_tunnel,
.was_mpls = ctx->was_mpls,
.xport_uuid = ctx->xin->xport_uuid,
.ofpacts = ctx->frozen_actions.data,
Expand Down Expand Up @@ -7513,6 +7520,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)

.was_mpls = false,
.conntracked = false,
.from_tunnel = false,

.ct_nat_action = NULL,

Expand Down Expand Up @@ -7580,7 +7588,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
/* Restore pipeline metadata. May change flow's in_port and other
* metadata to the values that existed when freezing was triggered. */
frozen_metadata_to_flow(&ctx.xbridge->ofproto->up,
&state->metadata, flow);
&state->metadata, flow,
state->from_tunnel);

/* Restore stack, if any. */
if (state->stack) {
Expand Down Expand Up @@ -7649,6 +7658,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
ctx.xin->xport_uuid = in_port->uuid;
}

if (in_port && in_port->is_tunnel) {
ctx.from_tunnel = true;
}

if (flow->packet_type != htonl(PT_ETH) && in_port &&
in_port->pt_mode == NETDEV_PT_LEGACY_L3 && ctx.table_id == 0) {
/* Add dummy Ethernet header to non-L2 packet if it's coming from a
Expand Down Expand Up @@ -7917,6 +7930,7 @@ xlate_resume(struct ofproto_dpif *ofproto,
.mirrors = pin->mirrors,
.conntracked = pin->conntracked,
.xport_uuid = UUID_ZERO,
.from_tunnel = false,

/* When there are no actions, xlate_actions() will search the flow
* table. We don't want it to do that (we want it to resume), so
Expand Down
31 changes: 31 additions & 0 deletions tests/tunnel.at
Expand Up @@ -949,6 +949,37 @@ Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff
OVS_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([tunnel - Match tun_metadata after recirc])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \
options:remote_ip=1.1.1.1 ofport_request=1 \
-- add-port br0 p2 -- set Interface p2 type=dummy \
ofport_request=2 ofport_request=2])
OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP

AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0,len=4}->tun_metadata0"])

AT_DATA([flows.txt], [dnl
table=0,udp,actions=ct(table=1)
table=1,udp,tun_metadata0=0x11,actions=IN_PORT
table=1,udp,tun_metadata0=0x22,actions=drop
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])

AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x11}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1234,dst=5678)' -generate], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
[Megaflow: recirc_id=0x1,eth,udp,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x11,in_port=1,nw_ecn=0,nw_frag=no
Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x11}),flags(df))),6081
])

AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0x1),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x22}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1234,dst=5678)'], [0], [stdout])
AT_CHECK([tail -2 stdout], [0],
[Megaflow: recirc_id=0x1,eth,udp,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x22,in_port=1,nw_frag=no
Datapath actions: drop
])

OVS_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([tunnel - concomitant IPv6 and IPv4 tunnels])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
options:remote_ip=1.1.1.1 ofport_request=1 \
Expand Down

0 comments on commit 2c5d0f3

Please sign in to comment.