Skip to content

Commit

Permalink
k8s, node: Restore router IPs (cilium_host) from K8s resource
Browse files Browse the repository at this point in the history
Previously, after a node reboot, Cilium would allocate a new router IP
and append it slice of node IPs. Since the node IPs have already been
synced to the K8s resource, meaning there are already IPs present (from
the previous Cilium instance), the router IP is appended to the slice.
In other parts of Cilium, it is assumed that the router IP is the first
node IP (first element of the slice). Since the new router IP has been
appended to the end, it is no longer where it is expected, aka no longer
the first element. This causes a mismatch of which router IP is to be
used. There should only ever be one router IP (one IPv4 or one IPv6).

In case of a node reboot, the router IPs cannot be restored because they
are wiped away due to the Cilium state dir being mounted as a tmpfs [1].
This commit fixes this to restore the router IPs from the K8s resource
(Node or CiliumNode) if they are present in the annotations. This
prevents the possibility of having more than one router IP, as described
above. Note that router IPs from the K8s resource are only restored if
no router IP was found on the filesystem, which is considered the source
of truth. In other words, the filesystem takes precedence over the K8s
resource. The user is warned in cases of a mismatch between the two
different sources. We also check that the IP to be restored is within
the pod / node CIDR range, otherwise we ignore it from restoration.

[1]: Linux distributions mount /run as tmpfs and Cilium's default state
     directory is created under /run. (It's worth mentioning that it's
     also common for /var/run to be symlinked to /run.)

Fixes: #16279

Signed-off-by: Chris Tarazi <chris@isovalent.com>
  • Loading branch information
christarazi committed Jun 1, 2021
1 parent 12a9fef commit 68cae4c
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 9 deletions.
28 changes: 28 additions & 0 deletions pkg/k8s/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ func WaitForNodeInformation() error {

// K8s Node IP is used by BPF NodePort devices auto-detection
node.SetK8sNodeIP(k8sNodeIP)

restoreRouterHostIPs(n)
} else {
// if node resource could not be received, fail if
// PodCIDR requirement has been requested
Expand All @@ -267,3 +269,29 @@ func WaitForNodeInformation() error {
// want to specify them manually
return nil
}

// restoreRouterHostIPs restores (sets) the router IPs found from the
// Kubernetes resource.
//
// Note that it does not validate the correctness of the IPs, as that is done
// later in the daemon initialization when node.AutoComplete() is called.
func restoreRouterHostIPs(n *nodeTypes.Node) {
if !option.Config.EnableHostIPRestore {
return
}

router4 := n.GetCiliumInternalIP(false)
router6 := n.GetCiliumInternalIP(true)
if router4 != nil {
node.SetInternalIPv4Router(router4)
}
if router6 != nil {
node.SetIPv6Router(router6)
}
if router4 != nil || router6 != nil {
log.WithFields(logrus.Fields{
logfields.IPv4: router4,
logfields.IPv6: router6,
}).Info("Restored router IPs from node information")
}
}
106 changes: 97 additions & 9 deletions pkg/node/address.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package node

import (
"bufio"
"errors"
"fmt"
"net"
"os"
Expand Down Expand Up @@ -349,16 +350,18 @@ func SetIPv6NodeRange(net *cidr.CIDR) {
// AutoComplete completes the parts of addressing that can be auto derived
func AutoComplete() error {
if option.Config.EnableHostIPRestore {
// Read the previous cilium host IPs from node_config.h for backward
// compatibility
ipv4GW, ipv6Router := getCiliumHostIPs()

if ipv4GW != nil && option.Config.EnableIPv4 {
SetInternalIPv4Router(ipv4GW)
// Fetch the router (`cilium_host`) IPs in case they were set a priori
// from the Kubernetes or CiliumNode resource in the K8s subsystem.
router4FromK8s, router6FromK8s := GetInternalIPv4Router(), GetIPv6Router()
// At the same time, read the previous cilium_host IPs from
// node_config.h for backward compatibility.
router4FromFS, router6FromFS := getCiliumHostIPs()

if option.Config.EnableIPv4 {
restoreHostIPs(false, router4FromK8s, router4FromFS)
}

if ipv6Router != nil && option.Config.EnableIPv6 {
SetIPv6Router(ipv6Router)
if option.Config.EnableIPv6 {
restoreHostIPs(true, router6FromK8s, router6FromFS)
}
}

Expand All @@ -375,6 +378,91 @@ func AutoComplete() error {
return nil
}

// restoreHostIPs restores the router IPs (`cilium_host`) from a previous
// Cilium run. Router IPs from the filesystem are preferred over the IPs found
// in the Kubernetes resource (Node or CiliumNode), because we consider the
// filesystem to be the most up-to-date source of truth. The chosen router IP
// is then checked whether it is contained inside node CIDR (pod CIDR) range.
// If not, then the router IP is discarded and not restored.
func restoreHostIPs(ipv6 bool, fromK8s, fromFS net.IP) {
var (
getter func() *cidr.CIDR
setter func(net.IP)
)
if ipv6 {
getter = GetIPv6AllocRange
setter = SetIPv6Router
} else {
getter = GetIPv4AllocRange
setter = SetInternalIPv4Router
}

ip, err := chooseHostIPsToRestore(ipv6, fromK8s, fromFS)
switch {
case err != nil && errors.Is(err, errDoesNotBelong):
log.WithFields(logrus.Fields{
logfields.CIDR: getter(),
}).Infof(
"The router IP (%s) considered for restoration does not belong in the Pod CIDR of the node. Discarding old router IP.",
ip,
)
setter(nil)
case err != nil && errors.Is(err, errMismatch):
log.Warnf(
mismatchRouterIPsMsg,
fromK8s, fromFS, option.LocalRouterIPv4, option.LocalRouterIPv6,
)
fallthrough // Above is just a warning; we still want to set the router IP regardless.
case err == nil:
setter(ip)
}
}

func chooseHostIPsToRestore(ipv6 bool, fromK8s, fromFS net.IP) (ip net.IP, err error) {
switch {
case fromK8s != nil && fromFS != nil:
if fromK8s.Equal(fromFS) {
ip = fromK8s
} else {
ip = fromFS
err = errMismatch
}
case fromK8s == nil && fromFS != nil:
ip = fromFS
case fromK8s != nil && fromFS == nil:
ip = fromK8s
case fromK8s == nil && fromFS == nil:
// We do nothing in this case because there are no router IPs to
// restore.
return
}

var getter func() *cidr.CIDR
if ipv6 {
getter = GetIPv6AllocRange
} else {
getter = GetIPv4AllocRange
}

// We can assume that the node / pod CIDR has been set already since the
// call path to this function (chooseHostIPsToRestore()) comes through
// AutoComplete(). In other words, the daemon sets the CIDR before calling
// AutoComplete().
if cidr := getter(); cidr != nil && cidr.Contains(ip) {
return
}

err = errDoesNotBelong
return
}

var (
errMismatch = errors.New("mismatched IPs")
errDoesNotBelong = errors.New("IP does not belong to CIDR")
)

const mismatchRouterIPsMsg = "Mismatch of router IPs found during restoration. The Kubernetes resource contained %s, while the filesystem contained %s. Using the router IP from the filesystem. To change the router IP, specify --%s and/or --%s."

// ValidatePostInit validates the entire addressing setup and completes it as
// required
func ValidatePostInit() error {
Expand Down
106 changes: 106 additions & 0 deletions pkg/node/address_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ type NodeSuite struct{}

var _ = Suite(&NodeSuite{})

func (s *NodeSuite) TearDownTest(c *C) {
Uninitialize()
}

func (s *NodeSuite) TestMaskCheck(c *C) {
InitDefaultPrefix("")

Expand All @@ -50,6 +54,108 @@ func (s *NodeSuite) TestMaskCheck(c *C) {
c.Assert(IsHostIPv6(GetIPv6()), Equals, true)
}

func (s *NodeSuite) Test_chooseHostIPsToRestore(c *C) {
tests := []struct {
name string
ipv6 bool
fromK8s, fromFS net.IP
setNodeCIDR func()
expect net.IP
err error
}{
{
name: "restore IP from fs (both provided)",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
fromFS: net.ParseIP("192.0.2.255"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.255"),
err: errMismatch,
},
{
name: "restore IP from fs",
ipv6: false,
fromFS: net.ParseIP("192.0.2.255"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.255"),
err: nil,
},
{
name: "restore IP from k8s",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.0.2.0/24")) },
expect: net.ParseIP("192.0.2.127"),
err: nil,
},
{
name: "IP not part of CIDR",
ipv6: false,
fromK8s: net.ParseIP("192.0.2.127"),
setNodeCIDR: func() { SetIPv4AllocRange(cidr.MustParseCIDR("192.1.2.0/24")) },
expect: net.ParseIP("192.0.2.127"),
err: errDoesNotBelong,
},
{
name: "no IPs to restore",
ipv6: false,
err: nil,
},
{
name: "restore IP from fs (both provided)",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
fromFS: net.ParseIP("ff02::255"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::255"),
err: errMismatch,
},
{
name: "restore IP from fs",
ipv6: true,
fromFS: net.ParseIP("ff02::255"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::255"),
err: nil,
},
{
name: "restore IP from k8s",
ipv6: true,
fromK8s: net.ParseIP("ff02::127"),
setNodeCIDR: func() { SetIPv6NodeRange(cidr.MustParseCIDR("ff02::/64")) },
expect: net.ParseIP("ff02::127"),
err: nil,
},
{
name: "no IPs to restore",
ipv6: true,
err: nil,
},
}
for _, tt := range tests {
c.Log("Test: " + tt.name)
if tt.setNodeCIDR != nil {
tt.setNodeCIDR()
}
got, err := chooseHostIPsToRestore(tt.ipv6, tt.fromK8s, tt.fromFS)
if tt.expect == nil {
// If we don't expect to change it, set it to what's currently the
// router IP.
if tt.ipv6 {
tt.expect = GetIPv6Router()
} else {
tt.expect = GetInternalIPv4Router()
}
}
c.Assert(err, checker.DeepEquals, tt.err)
if tt.ipv6 {
c.Assert(got, checker.DeepEquals, tt.expect)
} else {
c.Assert(got, checker.DeepEquals, tt.expect)
}
}
}

func (s *NodeSuite) Test_getCiliumHostIPsFromFile(c *C) {
tmpDir := c.MkDir()
allIPsCorrect := filepath.Join(tmpDir, "node_config.h")
Expand Down

0 comments on commit 68cae4c

Please sign in to comment.