Skip to content

Commit

Permalink
datapath: Create sysctl rp_filter overwrite config on agent init
Browse files Browse the repository at this point in the history
SystemD versions greater than 245 will create sysctl config which sets
the `rp_filter` value for all network interfaces to 1. This conflicts
with cilium which requires `rp_filter` to be 0 on interfaces it uses.

This commit adds a small utility/tool: `sysctlfix` which will insert
a config file into the `/etc/sysctl.d` dir with the highest priority
containing directives to disable `rp_filter` and perhaps to contain
other sysctl config in future.

This utility is called as an init container before the cilium agent
starts. Because the sysctl config is in place before the agent starts,
all interfaces created by the agent and matching the patten in the
config file will have `rp_filter` disabled, even when SystemD >=245 is
installed.

Fixes: #10645
Fixes: #19909
Signed-off-by: Dylan Reimerink <dylan.reimerink@isovalent.com>
  • Loading branch information
dylandreimerink authored and joestringer committed Jul 15, 2022
1 parent 3f822c2 commit cabc658
Show file tree
Hide file tree
Showing 60 changed files with 8,704 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ debug: all

include Makefile.defs

SUBDIRS_CILIUM_CONTAINER := proxylib envoy bpf cilium daemon cilium-health bugtool tools/mount
SUBDIRS_CILIUM_CONTAINER := proxylib envoy bpf cilium daemon cilium-health bugtool tools/mount tools/sysctlfix
SUBDIRS := $(SUBDIRS_CILIUM_CONTAINER) operator plugins tools hubble-relay

SUBDIRS_CILIUM_CONTAINER += plugins/cilium-cni
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/cncf/udpa/go v0.0.0-20201211205326-cc1b757b3edd // indirect
github.com/containernetworking/cni v0.8.1
github.com/containernetworking/plugins v0.9.0
github.com/coreos/go-systemd/v22 v22.3.2
github.com/davecgh/go-spew v1.1.1
github.com/docker/distribution v2.7.1+incompatible // indirect
github.com/docker/docker v0.7.3-0.20190327010347-be7ac8be2ae0
Expand Down
5 changes: 5 additions & 0 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions install/kubernetes/cilium/templates/cilium-agent-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,31 @@ spec:
securityContext:
privileged: true
{{- end }}
- name: apply-sysctl-overwrites
image: "{{ if .Values.image.override }}{{ .Values.image.override }}{{ else }}{{ .Values.image.repository }}:{{ .Values.image.tag }}{{ if .Values.image.useDigest }}@{{ .Values.image.digest }}{{ end }}{{ end }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: BIN_PATH
value: {{ .Values.cni.binPath }}
command:
- sh
- -ec
# The statically linked Go program binary is invoked to avoid any
# dependency on utilities like sh that can be missing on certain
# distros installed on the underlying host. Copy the binary to the
# same directory where we install cilium cni plugin so that exec permissions
# are available.
- |
cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix;
nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix";
rm /hostbin/cilium-sysctlfix
volumeMounts:
- name: hostproc
mountPath: /hostproc
- name: cni-path
mountPath: /hostbin
securityContext:
privileged: true
{{- if and .Values.nodeinit.enabled (not (eq .Values.nodeinit.bootstrapFile "")) }}
- name: wait-for-node-init
command: ['sh', '-c', 'until test -s {{ (print "/tmp/cilium-bootstrap.d/" (.Values.nodeinit.bootstrapFile | base)) | quote }}; do echo "Waiting on node-init to run..."; sleep 1; done']
Expand Down
1 change: 1 addition & 0 deletions tools/sysctlfix/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cilium-sysctlfix
27 changes: 27 additions & 0 deletions tools/sysctlfix/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright Authors of Cilium
# SPDX-License-Identifier: Apache-2.0

include ../../Makefile.defs

TARGET := cilium-sysctlfix

.PHONY: all $(TARGET) $(SUBDIRS) clean install

all: $(TARGET)

$(TARGET):
@$(ECHO_GO)
$(QUIET)$(GO_BUILD) -o $@

clean:
@$(ECHO_CLEAN)
-$(QUIET)rm -f $(TARGET)
$(QUIET)$(GO_CLEAN)

install:
$(QUIET)$(INSTALL) -m 0755 -d $(DESTDIR)$(BINDIR)
$(QUIET)$(INSTALL) -m 0755 $(TARGET) $(DESTDIR)$(BINDIR)

install-binary: install

install-bash-completion:
139 changes: 139 additions & 0 deletions tools/sysctlfix/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package main

import (
"context"
"fmt"
"io"
"os"
"path"
"time"

"github.com/coreos/go-systemd/v22/dbus"
"github.com/spf13/pflag"
)

// This tool attempts to write a sysctl config file to the sysctl config directory with the highest precedence so
// we can overwrite any other config and ensure correct sysctl options for Cilium to function.

var (
flagSet = pflag.NewFlagSet(os.Args[0], pflag.ContinueOnError)

sysctlD = flagSet.String("sysctl-conf-dir", "/etc/sysctl.d/", "Path to the sysctl config directory")
// The 99-zzz prefix ensures our config file gets precedence over most if not all other files.
ciliumOverwrites = flagSet.String(
"sysctl-config-file",
"99-zzz-override_cilium.conf",
"Filename of the cilium sysctl overwrites config file",
)
// Name of the systemd-sysctl unit to restart after making changes
sysctlUnit = flagSet.String(
"systemd-sysctl-unit",
"systemd-sysctl.service",
"Name of the systemd sysctl unit to reload",
)
)

var sysctlConfig = `
# Disable rp_filter on Cilium interfaces since it may cause mangled packets to be dropped
net.ipv4.conf.lxc*.rp_filter = 0
net.ipv4.conf.cilium_*.rp_filter = 0
# The kernel uses max(conf.all, conf.{dev}) as its value, so we need to set .all. to 0 as well.
# Otherwise it will overrule the device specific settings.
net.ipv4.conf.all.rp_filter = 0
`

// This program is executed by an init container so we purposely don't
// exit with any error codes. In case of errors, the function will print warnings,
// but we don't block cilium agent pod from running.
func main() {
err := flagSet.Parse(os.Args[1:])
if err != nil {
fmt.Printf("parse flags: %s\n", err)
return
}

info, err := os.Stat(*sysctlD)
if err != nil {
fmt.Printf("can't stat sysctl.d dir '%s': %s\n", *sysctlD, err)
return
}

if !info.IsDir() {
fmt.Printf("'%s' is not a directory\n", *sysctlD)
return
}

overwritesPath := path.Join(*sysctlD, *ciliumOverwrites)
f, err := os.OpenFile(overwritesPath, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
fmt.Printf("unable to create cilium sysctl overwrites config: %s\n", err)
return
}
defer f.Close()

currentContents, err := io.ReadAll(f)
if err != nil {
fmt.Printf("read config: %s\n", err)
return
}

if string(currentContents) == sysctlConfig {
fmt.Println("sysctl config up-to-date, nothing to do")
return
}

_, err = f.Seek(0, io.SeekStart)
if err != nil {
fmt.Printf("error while seeking to start of sysctl config: %s\n", err)
return
}

// Truncate the whole file
err = f.Truncate(0)
if err != nil {
fmt.Printf("error while truncating sysctl config: %s\n", err)
return
}

_, err = fmt.Fprint(f, sysctlConfig)
if err != nil {
fmt.Printf("error while writing to sysctl config: %s\n", err)
return
}

fmt.Println("sysctl config created/updated")

ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()

conn, err := dbus.NewSystemdConnectionContext(ctx)
if err != nil {
fmt.Printf("error while creating SystemD D-Bus connection: %s\n", err)
return
}

_, err = conn.GetUnitPropertiesContext(ctx, *sysctlUnit)
if err != nil {
fmt.Printf("can't verify unit '%s' exists: %s\n", *sysctlUnit, err)
return
}

// https://www.freedesktop.org/wiki/Software/systemd/dbus/
// "The mode needs to be one of replace, fail, isolate, ignore-dependencies, ignore-requirements.
// If "replace" the call will start the unit and its dependencies, possibly replacing already queued jobs that
// conflict with this."
const mode = "replace"

// Restart the systemd-sysctl unit, this will trigger SystemD to apply the new config to all existing interfaces
// which is required for host-interfaces and reloads on existing cilium deployments.
_, err = conn.RestartUnitContext(ctx, *sysctlUnit, mode, nil)
if err != nil {
fmt.Printf("error while restarting unit '%s': %s\n", *sysctlUnit, err)
return
}

fmt.Printf("systemd unit '%s' restarted\n", *sysctlUnit)
}
Loading

0 comments on commit cabc658

Please sign in to comment.