Skip to content

Commit

Permalink
daemon, node: Fix faulty router IP restoration logic
Browse files Browse the repository at this point in the history
[ upstream commit ff63b07 ]

When running in ENI or Alibaba IPAM mode, or any CRD-backed IPAM mode
("crd") and upon Cilium restart, it was very likely that `cilium_host`
was assigned an additional IP. Below is a case where Cilium was
restarted 3 times, hence getting 3 additional router IPs:

```
4: cilium_host@cilium_net: <BROADCAST,MULTICAST,NOARP,UP,LOWER_UP> mtu 9001 qdisc noqueue state UP group default qlen 1000
    link/ether 66:03:3c:07:8c:47 brd ff:ff:ff:ff:ff:ff
    inet 192.168.35.9/32 scope link cilium_host
       valid_lft forever preferred_lft forever
    inet 192.168.34.37/32 scope link cilium_host
       valid_lft forever preferred_lft forever
    inet 192.168.57.107/32 scope link cilium_host
       valid_lft forever preferred_lft forever
    inet6 fe80::6403:3cff:fe07:8c47/64 scope link
       valid_lft forever preferred_lft forever
```

This was because in CRD-backed IPAM modes, we wait until we fully sync
with K8s in order to derive the VPC CIDR, which becomes the pod CIDR on
the node. Since the router IP restoration logic was using a different
pod CIDR during the router IP validation check, it was erroneously
discarding it. This was observed with:

```
2021-06-25T13:59:47.816069937Z level=info msg="The router IP (192.168.135.3) considered for restoration does not belong in the Pod CIDR of the node. Discarding old router IP." cidr=10.8.0.0/16 subsys=node
```

This is problematic because the extraneous router IPs could be also
assigned to pods, which would break pod connectivity.

The fix is to break up the router IP restoration process into 2 parts.
The first is to attempt a restoration of the IP from the filesystem
(`node_config.h`). We also fetch the router IPs from Kubernetes
resources since they were already retrieved prior inside
k8s.WaitForNodeInformation().

Then after the CRD-backed IPAM is initialized and started
(*Daemon).startIPAM() is called, we attempt the second part. This
includes evaluating which IPs (either from filesystem or from K8s)
should be set as the router IPs. The IPs from the filesystem take
precedence. In case the node was rebooted, the filesystem will be wiped
so then we'd rely on the IPs from the K8s resources. At this point in
the daemon initialization, we have the correct CIDR range as the pod
CIDR range to validate the chosen IP.

Fixes: beb8bde ("k8s, node: Restore router IPs (`cilium_host`) from
K8s resource")

Signed-off-by: Chris Tarazi <chris@isovalent.com>
  • Loading branch information
christarazi authored and aanm committed Jun 29, 2021
1 parent 5fa82da commit c349779
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 82 deletions.
47 changes: 47 additions & 0 deletions daemon/cmd/daemon.go
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/cilium/cilium/pkg/bgp/speaker"
"github.com/cilium/cilium/pkg/bpf"
"github.com/cilium/cilium/pkg/cgroups"
"github.com/cilium/cilium/pkg/cidr"
"github.com/cilium/cilium/pkg/clustermesh"
"github.com/cilium/cilium/pkg/controller"
"github.com/cilium/cilium/pkg/counter"
Expand Down Expand Up @@ -260,6 +261,33 @@ func createPrefixLengthCounter() *counter.PrefixLengthCounter {
return counter.DefaultPrefixLengthCounter(max6, max4)
}

// restoreCiliumHostIPs completes the `cilium_host` IP restoration process
// (part 2/2). This function is called after fully syncing with K8s to ensure
// that the most up-to-date information has been retrieved. At this point, the
// daemon is aware of all the necessary information to restore the appropriate
// IP.
func restoreCiliumHostIPs(ipv6 bool, fromK8s net.IP) {
var (
cidr *cidr.CIDR
fromFS net.IP
)

if ipv6 {
cidr = node.GetIPv6AllocRange()
fromFS = node.GetIPv6Router()
} else {
if ipam := option.Config.IPAMMode(); ipam == ipamOption.IPAMCRD || ipam == ipamOption.IPAMENI || ipam == ipamOption.IPAMAlibabaCloud {
// The native routing CIDR is the pod CIDR in these IPAM modes.
cidr = option.Config.IPv4NativeRoutingCIDR()
} else {
cidr = node.GetIPv4AllocRange()
}
fromFS = node.GetInternalIPv4Router()
}

node.RestoreHostIPs(ipv6, fromK8s, fromFS, cidr)
}

// NewDaemon creates and returns a new Daemon with the parameters set in c.
func NewDaemon(ctx context.Context, cancel context.CancelFunc, epMgr *endpointmanager.EndpointManager, dp datapath.Datapath) (*Daemon, *endpointRestoreState, error) {

Expand Down Expand Up @@ -673,6 +701,13 @@ func NewDaemon(ctx context.Context, cancel context.CancelFunc, epMgr *endpointma
bootstrapStats.kvstore.End(true)
}

// Fetch the router (`cilium_host`) IPs in case they were set a priori from
// the Kubernetes or CiliumNode resource in the K8s subsystem from call
// k8s.WaitForNodeInformation(). These will be used later after starting
// IPAM initialization to finish off the `cilium_host` IP restoration (part
// 2/2).
router4FromK8s, router6FromK8s := node.GetInternalIPv4Router(), node.GetIPv6Router()

// Configure IPAM without using the configuration yet.
d.configureIPAM()

Expand Down Expand Up @@ -703,6 +738,18 @@ func NewDaemon(ctx context.Context, cancel context.CancelFunc, epMgr *endpointma
// Start IPAM
d.startIPAM()

// After the IPAM is started, in particular IPAM modes (CRD, ENI, Alibaba)
// which use the VPC CIDR as the pod CIDR, we must attempt restoring the
// router IPs from the K8s resources if we weren't able to restore them
// from the fs. We must do this after IPAM because we must wait until the
// K8s resources have been synced. Part 2/2 of restoration.
if option.Config.EnableIPv4 {
restoreCiliumHostIPs(false, router4FromK8s)
}
if option.Config.EnableIPv6 {
restoreCiliumHostIPs(true, router6FromK8s)
}

// restore endpoints before any IPs are allocated to avoid eventual IP
// conflicts later on, otherwise any IP conflict will result in the
// endpoint not being able to be restored.
Expand Down
61 changes: 28 additions & 33 deletions pkg/node/address.go
Expand Up @@ -350,19 +350,9 @@ func SetIPv6NodeRange(net *cidr.CIDR) {
// AutoComplete completes the parts of addressing that can be auto derived
func AutoComplete() error {
if option.Config.EnableHostIPRestore {
// Fetch the router (`cilium_host`) IPs in case they were set a priori
// from the Kubernetes or CiliumNode resource in the K8s subsystem.
router4FromK8s, router6FromK8s := GetInternalIPv4Router(), GetIPv6Router()
// At the same time, read the previous cilium_host IPs from
// node_config.h for backward compatibility.
router4FromFS, router6FromFS := getCiliumHostIPs()

if option.Config.EnableIPv4 {
restoreHostIPs(false, router4FromK8s, router4FromFS)
}
if option.Config.EnableIPv6 {
restoreHostIPs(true, router6FromK8s, router6FromFS)
}
// At this point, only attempt to restore the `cilium_host` IPs from
// the filesystem because we haven't fully synced with K8s yet.
restoreCiliumHostIPsFromFS()
}

InitDefaultPrefix(option.Config.DirectRoutingDevice)
Expand All @@ -378,30 +368,31 @@ func AutoComplete() error {
return nil
}

// restoreHostIPs restores the router IPs (`cilium_host`) from a previous
// RestoreHostIPs restores the router IPs (`cilium_host`) from a previous
// Cilium run. Router IPs from the filesystem are preferred over the IPs found
// in the Kubernetes resource (Node or CiliumNode), because we consider the
// filesystem to be the most up-to-date source of truth. The chosen router IP
// is then checked whether it is contained inside node CIDR (pod CIDR) range.
// If not, then the router IP is discarded and not restored.
func restoreHostIPs(ipv6 bool, fromK8s, fromFS net.IP) {
func RestoreHostIPs(ipv6 bool, fromK8s, fromFS net.IP, cidr *cidr.CIDR) {
if !option.Config.EnableHostIPRestore {
return
}

var (
getter func() *cidr.CIDR
setter func(net.IP)
)
if ipv6 {
getter = GetIPv6AllocRange
setter = SetIPv6Router
} else {
getter = GetIPv4AllocRange
setter = SetInternalIPv4Router
}

ip, err := chooseHostIPsToRestore(ipv6, fromK8s, fromFS)
ip, err := chooseHostIPsToRestore(ipv6, fromK8s, fromFS, cidr)
switch {
case err != nil && errors.Is(err, errDoesNotBelong):
log.WithFields(logrus.Fields{
logfields.CIDR: getter(),
logfields.CIDR: cidr,
}).Infof(
"The router IP (%s) considered for restoration does not belong in the Pod CIDR of the node. Discarding old router IP.",
ip,
Expand All @@ -418,7 +409,7 @@ func restoreHostIPs(ipv6 bool, fromK8s, fromFS net.IP) {
}
}

func chooseHostIPsToRestore(ipv6 bool, fromK8s, fromFS net.IP) (ip net.IP, err error) {
func chooseHostIPsToRestore(ipv6 bool, fromK8s, fromFS net.IP, cidr *cidr.CIDR) (ip net.IP, err error) {
switch {
case fromK8s != nil && fromFS != nil:
if fromK8s.Equal(fromFS) {
Expand All @@ -437,25 +428,29 @@ func chooseHostIPsToRestore(ipv6 bool, fromK8s, fromFS net.IP) (ip net.IP, err e
return
}

var getter func() *cidr.CIDR
if ipv6 {
getter = GetIPv6AllocRange
} else {
getter = GetIPv4AllocRange
}

// We can assume that the node / pod CIDR has been set already since the
// call path to this function (chooseHostIPsToRestore()) comes through
// AutoComplete(). In other words, the daemon sets the CIDR before calling
// AutoComplete().
if cidr := getter(); cidr != nil && cidr.Contains(ip) {
if cidr != nil && cidr.Contains(ip) {
return
}

err = errDoesNotBelong
return
}

// restoreCiliumHostIPsFromFS restores the router IPs (`cilium_host`) from a
// previous Cilium run. The IPs are restored from the filesystem. This is part
// 1/2 of the restoration.
func restoreCiliumHostIPsFromFS() {
// Read the previous cilium_host IPs from node_config.h for backward
// compatibility.
router4, router6 := getCiliumHostIPs()
if option.Config.EnableIPv4 {
SetInternalIPv4Router(router4)
}
if option.Config.EnableIPv6 {
SetIPv6Router(router6)
}
}

var (
errMismatch = errors.New("mismatched IPs")
errDoesNotBelong = errors.New("IP does not belong to CIDR")
Expand Down
96 changes: 47 additions & 49 deletions pkg/node/address_test.go
Expand Up @@ -54,77 +54,78 @@ func (s *NodeSuite) TestMaskCheck(c *C) {
c.Assert(IsHostIPv6(GetIPv6()), Equals, true)
}

// This also provides cover for RestoreHostIPs.
func (s *NodeSuite) Test_chooseHostIPsToRestore(c *C) {
tests := []struct {
name string
ipv6 bool
fromK8s, fromFS net.IP
setNodeCIDR func()
cidr *cidr.CIDR
expect net.IP
err error
}{
{
name: "restore IP from fs (both provided)",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
fromFS: net.ParseIP("192.0.2.255"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.255"),
err: errMismatch,
name: "restore IP from fs (both provided)",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
fromFS: net.ParseIP("192.0.2.255"),
cidr: cidr.MustParseCIDR("192.0.2.0/24"),
expect: net.ParseIP("192.0.2.255"),
err: errMismatch,
},
{
name: "restore IP from fs",
ipv6: false,
fromFS: net.ParseIP("192.0.2.255"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.255"),
err: nil,
name: "restore IP from fs",
ipv6: false,
fromFS: net.ParseIP("192.0.2.255"),
cidr: cidr.MustParseCIDR("192.0.2.0/24"),
expect: net.ParseIP("192.0.2.255"),
err: nil,
},
{
name: "restore IP from k8s",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.127"),
err: nil,
name: "restore IP from k8s",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
cidr: cidr.MustParseCIDR("192.0.2.0/24"),
expect: net.ParseIP("192.0.2.127"),
err: nil,
},
{
name: "IP not part of CIDR",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.1.2.0/24")) },
expect: net.ParseIP("192.0.2.127"),
err: errDoesNotBelong,
name: "IP not part of CIDR",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
cidr: cidr.MustParseCIDR("192.1.2.0/24"),
expect: net.ParseIP("192.0.2.127"),
err: errDoesNotBelong,
},
{
name: "no IPs to restore",
ipv6: false,
err: nil,
},
{
name: "restore IP from fs (both provided)",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
fromFS: net.ParseIP("ff02::255"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::255"),
err: errMismatch,
name: "restore IP from fs (both provided)",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
fromFS: net.ParseIP("ff02::255"),
cidr: cidr.MustParseCIDR("ff02::/64"),
expect: net.ParseIP("ff02::255"),
err: errMismatch,
},
{
name: "restore IP from fs",
ipv6: true,
fromFS: net.ParseIP("ff02::255"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::255"),
err: nil,
name: "restore IP from fs",
ipv6: true,
fromFS: net.ParseIP("ff02::255"),
cidr: cidr.MustParseCIDR("ff02::/64"),
expect: net.ParseIP("ff02::255"),
err: nil,
},
{
name: "restore IP from k8s",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::127"),
err: nil,
name: "restore IP from k8s",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
cidr: cidr.MustParseCIDR("ff02::/64"),
expect: net.ParseIP("ff02::127"),
err: nil,
},
{
name: "no IPs to restore",
Expand All @@ -134,10 +135,7 @@ func (s *NodeSuite) Test_chooseHostIPsToRestore(c *C) {
}
for _, tt := range tests {
c.Log("Test: " + tt.name)
if tt.setNodeCIDR != nil {
tt.setNodeCIDR()
}
got, err := chooseHostIPsToRestore(tt.ipv6, tt.fromK8s, tt.fromFS)
got, err := chooseHostIPsToRestore(tt.ipv6, tt.fromK8s, tt.fromFS, tt.cidr)
if tt.expect == nil {
// If we don't expect to change it, set it to what's currently the
// router IP.
Expand Down

0 comments on commit c349779

Please sign in to comment.