Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

daemon: Enable device auto detection for host-fw when BPF NodePort is disabled #12050

Merged
merged 5 commits into from Jun 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion daemon/cmd/daemon.go
Expand Up @@ -429,7 +429,9 @@ func NewDaemon(ctx context.Context, dp datapath.Datapath) (*Daemon, *endpointRes
// retrieving Node object for self is needed by BPF NodePort device selection,
// and the k8s watcher depends on option.Config.EnableNodePort flag which
// can be modified by the initialization routine.
initKubeProxyReplacementOptions()
strict := initKubeProxyReplacementOptions()
detectDevicesForNodePortAndHostFirewall(strict)
finishKubeProxyReplacementInit()
if option.Config.EnableNodePort {
if err := node.InitNodePortAddrs(option.Config.Devices); err != nil {
log.WithError(err).Fatal("Failed to initialize NodePort addrs")
Expand Down
267 changes: 141 additions & 126 deletions daemon/cmd/daemon_main.go
Expand Up @@ -1550,7 +1550,7 @@ func initClockSourceOption() {
}
}

func initKubeProxyReplacementOptions() {
func initKubeProxyReplacementOptions() (strict bool) {
if option.Config.KubeProxyReplacement != option.KubeProxyReplacementStrict &&
option.Config.KubeProxyReplacement != option.KubeProxyReplacementPartial &&
option.Config.KubeProxyReplacement != option.KubeProxyReplacementProbe &&
Expand All @@ -1572,9 +1572,7 @@ func initKubeProxyReplacementOptions() {
option.EnableExternalIPs, option.EnableHostReachableServices,
option.EnableHostPort)

option.Config.EnableHostPort = false
option.Config.EnableNodePort = false
option.Config.EnableExternalIPs = false
disableNodePort()
option.Config.EnableHostReachableServices = false
option.Config.EnableHostServicesTCP = false
option.Config.EnableHostServicesUDP = false
Expand All @@ -1586,7 +1584,7 @@ func initKubeProxyReplacementOptions() {
probesManager := probes.NewProbeManager()

// strict denotes to panic if any to-be enabled feature cannot be enabled
strict := option.Config.KubeProxyReplacement != option.KubeProxyReplacementProbe
strict = option.Config.KubeProxyReplacement != option.KubeProxyReplacementProbe

if option.Config.KubeProxyReplacement == option.KubeProxyReplacementProbe ||
option.Config.KubeProxyReplacement == option.KubeProxyReplacementStrict {
Expand All @@ -1607,14 +1605,12 @@ func initKubeProxyReplacementOptions() {

if option.Config.EnableNodePort {
if option.Config.EnableIPSec {
msg := "IPSec cannot be used with NodePort BPF."
msg := "IPSec cannot be used with BPF NodePort."
if strict {
log.Fatal(msg)
} else {
option.Config.EnableHostPort = false
option.Config.EnableNodePort = false
option.Config.EnableExternalIPs = false
log.Warn(msg + " Disabling the feature.")
disableNodePort()
log.Warn(msg + " Disabling BPF NodePort feature.")
pchaigno marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -1647,110 +1643,17 @@ func initKubeProxyReplacementOptions() {
if strict {
log.Fatal(msg)
} else {
log.Warn(msg + " Disabling the feature.")
option.Config.EnableHostPort = false
option.Config.EnableNodePort = false
option.Config.EnableExternalIPs = false
disableNodePort()
log.Warn(msg + " Disabling BPF NodePort.")
}
}

if err := checkNodePortAndEphemeralPortRanges(); err != nil {
if strict {
log.Fatal(err)
} else {
log.Warn(fmt.Sprintf("%s Disabling the feature.", err))
option.Config.EnableHostPort = false
option.Config.EnableNodePort = false
option.Config.EnableExternalIPs = false
}
}
}

detectNodePortDevs := option.Config.EnableNodePort && len(option.Config.Devices) == 0
detectDirectRoutingDev := option.Config.EnableNodePort &&
option.Config.DirectRoutingDevice == ""
if detectNodePortDevs || detectDirectRoutingDev {
if err := detectDevices(detectNodePortDevs, detectDirectRoutingDev); err != nil {
msg := "Unable to detect devices for BPF NodePort"
if strict {
log.WithError(err).Fatal(msg)
} else {
log.WithError(err).Warn(msg + " Disabling BPF NodePort feature.")
option.Config.EnableHostPort = false
option.Config.EnableNodePort = false
option.Config.EnableExternalIPs = false
}
} else {
l := log
if detectNodePortDevs {
l = l.WithField(logfields.Devices, option.Config.Devices)
}
if detectDirectRoutingDev {
l = l.WithField(logfields.DirectRoutingDevice, option.Config.DirectRoutingDevice)
}
l.Info("Using auto-derived devices for BPF node port")
}
}

if option.Config.EnableNodePort &&
option.Config.NodePortAcceleration != option.NodePortAccelerationDisabled {
if option.Config.Tunnel != option.TunnelDisabled {
log.Fatalf("Cannot use NodePort acceleration with tunneling. Either run cilium-agent with --%s=%s or --%s=%s",
option.NodePortAcceleration, option.NodePortAccelerationDisabled, option.TunnelName, option.TunnelDisabled)
}

if option.Config.XDPDevice != "undefined" &&
(option.Config.DirectRoutingDevice == "" ||
option.Config.XDPDevice != option.Config.DirectRoutingDevice) {
log.Fatalf("Cannot set NodePort acceleration device: mismatch between Prefilter device %s and NodePort device %s",
option.Config.XDPDevice, option.Config.DirectRoutingDevice)
}
option.Config.XDPDevice = option.Config.DirectRoutingDevice
if err := loader.SetXDPMode(option.Config.NodePortAcceleration); err != nil {
log.WithError(err).Fatal("Cannot set NodePort acceleration")
}
}
if option.Config.EnableNodePort {
for _, iface := range option.Config.Devices {
link, err := netlink.LinkByName(iface)
if err != nil {
log.WithError(err).Fatalf("Cannot retrieve %s link", iface)
}
if strings.ContainsAny(iface, "=;") {
// Because we pass IPV{4,6}_NODEPORT addresses to bpf/init.sh
// in a form "$IFACE_NAME1=$IPV{4,6}_ADDR1;$IFACE_NAME2=...",
// we need to restrict the iface names. Otherwise, bpf/init.sh
// won't properly parse the mappings.
log.Fatalf("%s link name contains '=' or ';' character which is not allowed",
iface)
}
if idx := link.Attrs().Index; idx > math.MaxUint16 {
log.Fatalf("%s link ifindex %d exceeds max(uint16)", iface, idx)
}
}

if option.Config.EnableIPv4 &&
option.Config.Tunnel == option.TunnelDisabled &&
option.Config.NodePortMode != option.NodePortModeSNAT &&
len(option.Config.Devices) > 1 {

// In the case of the multi-dev NodePort DSR, if a request from an
// external client was sent to a device which is not used for direct
// routing, such request might be dropped by the destination node
// if the destination node's direct routing device's rp_filter = 1
// and the client IP is reachable via other device than the direct
// routing one.

iface := option.Config.Devices[0] // direct routing interface
if val, err := sysctl.Read(fmt.Sprintf("net.ipv4.conf.%s.rp_filter", iface)); err != nil {
log.Warnf("Unable to read net.ipv4.conf.%s.rp_filter: %s. Ignoring the check",
iface, err)
} else {
if val == "1" {
log.Warnf(`DSR might not work for requests sent to other than %s device. `+
`Run 'sysctl -w net.ipv4.conf.%s.rp_filter=2' (or set to '0') on each node to fix`,
iface, iface)
}
disableNodePort()
log.Warn(fmt.Sprintf("%s. Disabling BPF NodePort.", err))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using log.WithError(err) here?

Suggested change
log.Warn(fmt.Sprintf("%s. Disabling BPF NodePort.", err))
log.WithError(err).Warn("Disabling BPF NodePort.")

}
}
}
Expand Down Expand Up @@ -1796,23 +1699,6 @@ func initKubeProxyReplacementOptions() {
option.Config.EnableHostServicesUDP = false
}

if !option.Config.EnableNodePort {
option.Config.EnableHostPort = false
option.Config.EnableExternalIPs = false
} else {
if option.Config.Tunnel != option.TunnelDisabled &&
option.Config.NodePortMode != option.NodePortModeSNAT {

log.Warnf("Disabling NodePort's %q mode feature due to tunneling mode being enabled",
option.Config.NodePortMode)
option.Config.NodePortMode = option.NodePortModeSNAT
}

option.Config.NodePortHairpin =
option.Config.NodePortAcceleration != option.NodePortAccelerationDisabled ||
len(option.Config.Devices) == 1
}

if option.Config.EnableSessionAffinity {
if !probesManager.GetMapTypes().HaveLruHashMapType {
msg := "SessionAffinity feature requires BPF LRU maps"
Expand All @@ -1825,7 +1711,6 @@ func initKubeProxyReplacementOptions() {

}
}

if option.Config.EnableSessionAffinity && option.Config.EnableHostReachableServices {
found1, found2 := false, false
if h := probesManager.GetHelpers("cgroup_sock"); h != nil {
Expand All @@ -1840,6 +1725,134 @@ func initKubeProxyReplacementOptions() {
"will be selected from all network namespaces on the host.")
}
}

if option.Config.EnableNodePort {
if option.Config.Tunnel != option.TunnelDisabled &&
option.Config.NodePortMode != option.NodePortModeSNAT {

log.Warnf("Disabling NodePort's %q mode feature due to tunneling mode being enabled",
option.Config.NodePortMode)
option.Config.NodePortMode = option.NodePortModeSNAT
}

if option.Config.NodePortAcceleration != option.NodePortAccelerationDisabled {
if option.Config.Tunnel != option.TunnelDisabled {
log.Fatalf("Cannot use NodePort acceleration with tunneling. Either run cilium-agent with --%s=%s or --%s=%s",
option.NodePortAcceleration, option.NodePortAccelerationDisabled, option.TunnelName, option.TunnelDisabled)
}
}
}

return
}

// detectDevicesForNodePortAndHostFirewall tries to detect bpf_host devices
// (if needed).
func detectDevicesForNodePortAndHostFirewall(strict bool) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably out of scope of this PR, but do you think it would be possible for us to call all these init functions with config as an argument instead of using global one explicitely? This would make it possible to unit test them to make sure that we don't mess things up, or is there anything aside from config manipulation that makes it unfeasible?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea! Added to my TODO list.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, would be nice to get some test coverage on this entire part of the code (i.e. dependent option handling)

detectNodePortDevs := len(option.Config.Devices) == 0 &&
(option.Config.EnableNodePort || option.Config.EnableHostFirewall)
detectDirectRoutingDev := option.Config.EnableNodePort &&
option.Config.DirectRoutingDevice == ""
if detectNodePortDevs || detectDirectRoutingDev {
if err := detectDevices(detectNodePortDevs, detectDirectRoutingDev); err != nil {
msg := "Unable to detect devices for BPF NodePort."
if strict {
log.WithError(err).Fatal(msg)
} else {
disableNodePort()
log.WithError(err).Warn(msg + " Disabling BPF NodePort.")
}
} else {
l := log
if detectNodePortDevs {
l = l.WithField(logfields.Devices, option.Config.Devices)
}
if detectDirectRoutingDev {
l = l.WithField(logfields.DirectRoutingDevice, option.Config.DirectRoutingDevice)
}
l.Info("Using auto-derived devices for BPF node port")
}
}
}

// finishKubeProxyReplacementInit finishes initialization of kube-proxy
// replacement after all devices are known.
func finishKubeProxyReplacementInit() {
if !option.Config.EnableNodePort {
// Make sure that NodePort dependencies are disabled
disableNodePort()
return
}

// After this point, BPF NodePort should not be disabled
brb marked this conversation as resolved.
Show resolved Hide resolved

if option.Config.NodePortAcceleration != option.NodePortAccelerationDisabled {
if option.Config.XDPDevice != "undefined" &&
(option.Config.DirectRoutingDevice == "" ||
option.Config.XDPDevice != option.Config.DirectRoutingDevice) {
log.Fatalf("Cannot set NodePort acceleration device: mismatch between Prefilter device %s and NodePort device %s",
option.Config.XDPDevice, option.Config.DirectRoutingDevice)
}
option.Config.XDPDevice = option.Config.DirectRoutingDevice
if err := loader.SetXDPMode(option.Config.NodePortAcceleration); err != nil {
log.WithError(err).Fatal("Cannot set NodePort acceleration")
}
}

for _, iface := range option.Config.Devices {
link, err := netlink.LinkByName(iface)
if err != nil {
log.WithError(err).Fatalf("Cannot retrieve %s link", iface)
}
if strings.ContainsAny(iface, "=;") {
// Because we pass IPV{4,6}_NODEPORT addresses to bpf/init.sh
// in a form "$IFACE_NAME1=$IPV{4,6}_ADDR1;$IFACE_NAME2=...",
// we need to restrict the iface names. Otherwise, bpf/init.sh
// won't properly parse the mappings.
log.Fatalf("%s link name contains '=' or ';' character which is not allowed",
iface)
}
if idx := link.Attrs().Index; idx > math.MaxUint16 {
log.Fatalf("%s link ifindex %d exceeds max(uint16)", iface, idx)
}
}

if option.Config.EnableIPv4 &&
option.Config.Tunnel == option.TunnelDisabled &&
option.Config.NodePortMode != option.NodePortModeSNAT &&
len(option.Config.Devices) > 1 {

// In the case of the multi-dev NodePort DSR, if a request from an
// external client was sent to a device which is not used for direct
// routing, such request might be dropped by the destination node
// if the destination node's direct routing device's rp_filter = 1
// and the client IP is reachable via other device than the direct
// routing one.

iface := option.Config.DirectRoutingDevice
if val, err := sysctl.Read(fmt.Sprintf("net.ipv4.conf.%s.rp_filter", iface)); err != nil {
log.Warnf("Unable to read net.ipv4.conf.%s.rp_filter: %s. Ignoring the check",
iface, err)
} else {
if val == "1" {
log.Warnf(`DSR might not work for requests sent to other than %s device. `+
`Run 'sysctl -w net.ipv4.conf.%s.rp_filter=2' (or set to '0') on each node to fix`,
iface, iface)
}
}
}

option.Config.NodePortHairpin =
option.Config.NodePortAcceleration != option.NodePortAccelerationDisabled ||
len(option.Config.Devices) == 1
}

// disableNodePort disables BPF NodePort and friends who are dependent from
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:

Suggested change
// disableNodePort disables BPF NodePort and friends who are dependent from
// disableNodePort disables BPF NodePort and friends which are dependent on

// the latter.
func disableNodePort() {
option.Config.EnableNodePort = false
option.Config.EnableHostPort = false
option.Config.EnableExternalIPs = false
}

// detectDevices tries to detect device names which are going to be used for
Expand Down Expand Up @@ -1889,7 +1902,9 @@ func detectDevices(detectNodePortDevs, detectDirectRoutingDev bool) error {
}
}
}
devSet[option.Config.DirectRoutingDevice] = struct{}{}
if option.Config.DirectRoutingDevice != "" {
devSet[option.Config.DirectRoutingDevice] = struct{}{}
}

option.Config.Devices = make([]string, 0, len(devSet))
for dev := range devSet {
Expand Down
5 changes: 3 additions & 2 deletions pkg/option/config.go
Expand Up @@ -2914,6 +2914,7 @@ func EndpointStatusValuesMap() (values map[string]struct{}) {
// MightAutoDetectDevices returns true if the device auto-detection might take
// place.
func MightAutoDetectDevices() bool {
return Config.KubeProxyReplacement != KubeProxyReplacementDisabled &&
(len(Config.Devices) == 0 || Config.DirectRoutingDevice == "")
return (Config.EnableHostFirewall && len(Config.Devices) == 0) ||
(Config.KubeProxyReplacement != KubeProxyReplacementDisabled &&
(len(Config.Devices) == 0 || Config.DirectRoutingDevice == ""))
}