Skip to content

Commit

Permalink
routing: Fix route collisions in AWS ENI
Browse files Browse the repository at this point in the history
This commit fixes a potential route collision in AWS ENI IPAM modes,
where the ifindex could equal the main routing table ID (from 253-255)
[1], causing traffic to be subject to these routes incorrectly. This is
admittedly rare, but we've seen this from a user report. The impact is
that most traffic on the node is suddenly blackholed.

To fix this, we say that each device or interface (ENI) will have their
own dedicated routing table. The table ID will start with an offset of
10 because it is highly unlikely to collide with the main routing table
ID (from 253-255). We grab the number associated with the ENI device
(`Number`) and add the offset. For example, if we have an ENI device
"eni-0" which has a `Number` of 5, then the table ID will be 10 + 5 =
15.

Another important piece to note is that only the egress rule will reside
inside the per-device tables, whereas the ingress rule will stay in the
main routing table. This is because we want the main routing table to
hold the routes to the endpoint.

Moving forward, the ENI datapath will now create rules under a new
egress priority value (RulePriorityEgressv2), as long as the
egress-multi-home-ip-rule-compat flag is false. If it's true, then the
datapath will create rules under the original egress priority value
(RulePriorityEgress). This helps disambiguate when running with the
older or newer ENI datapath.

See cilium#14336.

[1]: See ip-route(8)

Reported-by: Vlad Ungureanu <vladu@palantir.com>
Suggested-by: Joe Stringer <joe@cilium.io>
Suggested-by: Thomas Graf <thomas@cilium.io>
Signed-off-by: Chris Tarazi <chris@isovalent.com>
  • Loading branch information
christarazi authored and joestringer committed Jan 6, 2021
1 parent 1b9ed44 commit 332a3fd
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 27 deletions.
9 changes: 6 additions & 3 deletions cilium-health/launch/endpoint.go
Expand Up @@ -345,9 +345,12 @@ func LaunchAsEndpoint(baseCtx context.Context,
}

if option.Config.IPAM == option.IPAMENI {
if err := routingConfig.Configure(healthIP,
if err := routingConfig.Configure(
healthIP,
mtuConfig.GetDeviceMTU(),
option.Config.Masquerade); err != nil {
option.Config.Masquerade,
option.Config.EgressMultiHomeIPRuleCompat,
); err != nil {

return nil, fmt.Errorf("Error while configuring health endpoint rules and routes: %s", err)
}
Expand All @@ -374,5 +377,5 @@ func LaunchAsEndpoint(baseCtx context.Context,
}

type routingConfigurer interface {
Configure(ip net.IP, mtu int, masq bool) error
Configure(ip net.IP, mtu int, masq, compat bool) error
}
69 changes: 51 additions & 18 deletions pkg/datapath/linux/routing/routing.go
Expand Up @@ -35,19 +35,22 @@ var (

// Configure sets up the rules and routes needed when running in ENI mode.
// These rules and routes direct egress traffic out of the ENI device and
// ingress traffic back to the endpoint (`ip`).
// ingress traffic back to the endpoint (`ip`). The compat flag controls which
// egress priority to consider when deleting the egress rules (see
// option.Config.EgressMultiHomeIPRuleCompat).
//
// ip: The endpoint IP address to direct traffic out / from ENI device.
// info: The ENI device routing info used to create rules and routes.
// mtu: The ENI device MTU.
// masq: Whether masquerading is enabled.
func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq bool) error {
func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq, compat bool) error {
if ip.To4() == nil {
log.WithFields(logrus.Fields{
"endpointIP": ip,
}).Warning("Unable to configure rules and routes because IP is not an IPv4 address")
return errors.New("IP not compatible")
}

ifindex, err := retrieveIfIndexFromMAC(info.MasterIfMAC, mtu)
if err != nil {
return fmt.Errorf("unable to find ifindex for interface MAC: %s", err)
Expand All @@ -58,7 +61,8 @@ func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq bool) error {
Mask: net.CIDRMask(32, 32),
}

// Route all traffic to the ENI address via the main routing table
// On ingress, route all traffic to the endpoint IP via the main routing
// table. Egress rules are created in a per-ENI routing table.
if err := route.ReplaceRule(route.Rule{
Priority: linux_defaults.RulePriorityIngress,
To: &ipWithMask,
Expand All @@ -67,25 +71,34 @@ func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq bool) error {
return fmt.Errorf("unable to install ip rule: %s", err)
}

var egressPriority, tableID int
if compat {
egressPriority = linux_defaults.RulePriorityEgress
tableID = ifindex
} else {
egressPriority = linux_defaults.RulePriorityEgressv2
tableID = computeTableIDFromIfaceNumber(info.InterfaceNumber)
}

if masq {
// Lookup a VPC specific table for all traffic from an endpoint to the
// CIDR configured for the VPC on which the endpoint has the IP on.
for _, cidr := range info.IPv4CIDRs {
if err := route.ReplaceRule(route.Rule{
Priority: linux_defaults.RulePriorityEgress,
Priority: egressPriority,
From: &ipWithMask,
To: &cidr,
Table: ifindex,
Table: tableID,
}); err != nil {
return fmt.Errorf("unable to install ip rule: %s", err)
}
}
} else {
// Lookup a VPC specific table for all traffic from an endpoint.
if err := route.ReplaceRule(route.Rule{
Priority: linux_defaults.RulePriorityEgress,
Priority: egressPriority,
From: &ipWithMask,
Table: ifindex,
Table: tableID,
}); err != nil {
return fmt.Errorf("unable to install ip rule: %s", err)
}
Expand All @@ -99,15 +112,15 @@ func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq bool) error {
LinkIndex: ifindex,
Dst: &net.IPNet{IP: info.IPv4Gateway, Mask: net.CIDRMask(32, 32)},
Scope: netlink.SCOPE_LINK,
Table: ifindex,
Table: tableID,
}); err != nil {
return fmt.Errorf("unable to add L2 nexthop route: %s", err)
}

// Default route to the VPC or subnet gateway
if err := netlink.RouteReplace(&netlink.Route{
Dst: &net.IPNet{IP: net.IPv4zero, Mask: net.CIDRMask(0, 32)},
Table: ifindex,
Table: tableID,
Gw: info.IPv4Gateway,
}); err != nil {
return fmt.Errorf("unable to add L2 nexthop route: %s", err)
Expand All @@ -117,14 +130,25 @@ func (info *RoutingInfo) Configure(ip net.IP, mtu int, masq bool) error {
}

// Delete removes the ingress and egress rules that control traffic for
// endpoints. Note that the routes within these rules are not deleted as they
// can be reused when another endpoint is created on the same node. The reason
// for this is that ENI devices under-the-hood are simply network interfaces
// and all network interfaces have an ifindex. This index is then used as the
// table ID when these rules are created. The routes are created inside a table
// with this ID, and because this table ID equals the ENI ifindex, it's stable
// to rely on and therefore can be reused.
func Delete(ip net.IP) error {
// endpoints. Note that the routes referenced by the rules are not deleted as
// they can be reused when another endpoint is created on the same node. The
// compat flag controls which egress priority to consider when deleting the
// egress rules (see option.Config.EgressMultiHomeIPRuleCompat).
//
// Note that one or more IPs may share the same route table, as identified by
// the interface number of the corresponding device. This function only removes
// the ingress and egress rules to disconnect the per-ENI egress routes from a
// specific local IP, and does not remove the corresponding route table as
// other IPs may still be using that table.
//
// The search for both the ingress & egress rule corresponding to this IP is a
// best-effort based on the respective priority that Cilium uses, which we
// assume full control over. The search for the ingress rule is more likely to
// succeed (albeit very rarely that egress deletion fails) because we are able
// to perform a narrower search on the rule because we know it references the
// main routing table. Deletion of both rules only proceeds if one rule matches
// the IP & priority. If more than one rule match, then deletion is skipped.
func Delete(ip net.IP, compat bool) error {
if ip.To4() == nil {
log.WithFields(logrus.Fields{
"endpointIP": ip,
Expand Down Expand Up @@ -152,9 +176,14 @@ func Delete(ip net.IP) error {

scopedLog.WithField("rule", ingress).Debug("Deleted ingress rule")

priority := linux_defaults.RulePriorityEgressv2
if compat {
priority = linux_defaults.RulePriorityEgress
}

// Egress rules
egress := route.Rule{
Priority: linux_defaults.RulePriorityEgress,
Priority: priority,
From: &ipWithMask,
}
if err := deleteRule(egress); err != nil {
Expand Down Expand Up @@ -225,3 +254,7 @@ func retrieveIfIndexFromMAC(mac mac.MAC, mtu int) (index int, err error) {
err = fmt.Errorf("interface with MAC %s not found", mac)
return
}

func computeTableIDFromIfaceNumber(num int) int {
return linux_defaults.RouteTableInterfacesOffset + num
}
10 changes: 5 additions & 5 deletions pkg/datapath/linux/routing/routing_test.go
Expand Up @@ -60,15 +60,15 @@ func (e *LinuxRoutingSuite) TestConfigureRoutewithIncompatibleIP(c *C) {
_, ri := getFakes(c)
ipv6 := net.ParseIP("fd00::2").To16()
c.Assert(ipv6, NotNil)
err := ri.Configure(ipv6, 1500, true)
err := ri.Configure(ipv6, 1500, true, false)
c.Assert(err, NotNil)
c.Assert(err, ErrorMatches, "IP not compatible")
}

func (e *LinuxRoutingSuite) TestDeleteRoutewithIncompatibleIP(c *C) {
ipv6 := net.ParseIP("fd00::2").To16()
c.Assert(ipv6, NotNil)
err := Delete(ipv6)
err := Delete(ipv6, false)
c.Assert(err, NotNil)
c.Assert(err, ErrorMatches, "IP not compatible")
}
Expand Down Expand Up @@ -137,7 +137,7 @@ func (e *LinuxRoutingSuite) TestDelete(c *C) {
defer ifaceCleanup()

ip := tt.preRun()
err := Delete(ip)
err := Delete(ip, false)
c.Assert((err != nil), Equals, tt.wantErr)
})
}
Expand Down Expand Up @@ -183,12 +183,12 @@ func runConfigureThenDelete(c *C, ri RoutingInfo, ip net.IP, mtu int, masq bool)
}

func runConfigure(c *C, ri RoutingInfo, ip net.IP, mtu int, masq bool) {
err := ri.Configure(ip, mtu, masq)
err := ri.Configure(ip, mtu, masq, false)
c.Assert(err, IsNil)
}

func runDelete(c *C, ip net.IP) {
err := Delete(ip)
err := Delete(ip, false)
c.Assert(err, IsNil)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/endpoint/endpoint.go
Expand Up @@ -2159,7 +2159,7 @@ func (e *Endpoint) Delete(monitor monitorOwner, ipam ipReleaser, manager endpoin
// ingress rule and one egress rule. If we find more than one rule in
// either case, then the rules will be left as-is because there was
// likely manual intervention.
if err := linuxrouting.Delete(e.IPv4.IP()); err != nil {
if err := linuxrouting.Delete(e.IPv4.IP(), option.Config.EgressMultiHomeIPRuleCompat); err != nil {
errs = append(errs, fmt.Errorf("unable to delete endpoint ENI rules: %s", err))
}
}
Expand Down
1 change: 1 addition & 0 deletions plugins/cilium-cni/eni.go
Expand Up @@ -48,6 +48,7 @@ func eniAdd(ipConfig *current.IPConfig, ipam *models.IPAMAddressResponse, conf m
ipConfig.Address.IP,
int(conf.DeviceMTU),
conf.Masquerade,
conf.EgressMultiHomeIPRuleCompat,
); err != nil {
return fmt.Errorf("unable to install ip rules and routes: %s", err)
}
Expand Down

0 comments on commit 332a3fd

Please sign in to comment.