From 9d75d560f5325b92c056b16414f649cda37c33de Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Fri, 5 Dec 2025 11:19:33 -0800 Subject: [PATCH] Make --allow-packet-socket-write a public flag. This flag is now required for Docker versions 28 and beyond to function inside gVisor, as dockerd sends unsolicited ARP/NA requests using AF_PACKET sockets. The flag is no longer test-only. The docker behavior to send the ARP/NA pkts was introduced in eaa84bc [1]. Failure to send was made a hard error in 422e056 [2], and in bc130f36 [3], docker evolved to rely on these packets being sent. [1] https://github.com/moby/moby/commit/eaa84bc8f4 [2] https://github.com/moby/moby/commit/422e056b0f [3] https://github.com/moby/moby/commit/bc130f367d PiperOrigin-RevId: 840805091 --- Makefile | 10 +++++----- .../user_guide/tutorials/docker-in-gvisor.md | 19 +++++++++++++++---- images/basic/docker/Dockerfile | 2 +- runsc/config/config.go | 2 +- runsc/config/flags.go | 2 +- test/runner/main.go | 2 +- 6 files changed, 24 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 5e3d1c9afa..0cd9b1835f 100644 --- a/Makefile +++ b/Makefile @@ -369,7 +369,7 @@ INTEGRATION_TARGETS := //test/image:image_test //test/e2e:integration_test docker-tests: load-basic $(RUNTIME_BIN) @$(call install_runtime,$(RUNTIME),) # Clear flags. - @$(call install_runtime,$(RUNTIME)-docker,--net-raw) # Used by TestDocker*. + @$(call install_runtime,$(RUNTIME)-docker,--net-raw --allow-packet-socket-write) # Used by TestDocker*. @$(call install_runtime,$(RUNTIME)-fdlimit,--fdlimit=2000) # Used by TestRlimitNoFile. @$(call install_runtime,$(RUNTIME)-dcache,--fdlimit=2000 --dcache=100) # Used by TestDentryCacheLimit. @$(call install_runtime,$(RUNTIME)-host-uds,--host-uds=all) # Used by TestHostSocketConnect. @@ -386,13 +386,13 @@ plugin-network-tests: RUNSC_TARGET=--config plugin-tldk //runsc:runsc-plugin-sta overlay-tests: load-basic $(RUNTIME_BIN) @$(call install_runtime,$(RUNTIME)-overlay,--overlay2=all:dir=/tmp) - @$(call install_runtime,$(RUNTIME)-overlay-docker,--net-raw --overlay2=all:dir=/tmp) + @$(call install_runtime,$(RUNTIME)-overlay-docker,--net-raw --allow-packet-socket-write --overlay2=all:dir=/tmp) @$(call test_runtime_cached,$(RUNTIME)-overlay,--test_env=TEST_OVERLAY=true $(INTEGRATION_TARGETS)) .PHONY: overlay-tests swgso-tests: load-basic $(RUNTIME_BIN) @$(call install_runtime,$(RUNTIME)-swgso,--software-gso=true --gso=false) - @$(call install_runtime,$(RUNTIME)-swgso-docker,--net-raw --software-gso=true --gso=false) + @$(call install_runtime,$(RUNTIME)-swgso-docker,--net-raw --allow-packet-socket-write --software-gso=true --gso=false) @$(call test_runtime_cached,$(RUNTIME)-swgso,$(INTEGRATION_TARGETS)) .PHONY: swgso-tests @@ -406,13 +406,13 @@ kvm-tests: load-basic $(RUNTIME_BIN) @if ! test -w /dev/kvm; then sudo chmod a+rw /dev/kvm; fi @$(call test,//pkg/sentry/platform/kvm:kvm_test) @$(call install_runtime,$(RUNTIME)-kvm,--platform=kvm) - @$(call install_runtime,$(RUNTIME)-kvm-docker,--net-raw --platform=kvm) + @$(call install_runtime,$(RUNTIME)-kvm-docker,--net-raw --allow-packet-socket-write --platform=kvm) @$(call test_runtime_cached,$(RUNTIME)-kvm,$(INTEGRATION_TARGETS)) .PHONY: kvm-tests systrap-tests: load-basic $(RUNTIME_BIN) @$(call install_runtime,$(RUNTIME)-systrap,--platform=systrap) - @$(call install_runtime,$(RUNTIME)-systrap-docker,--net-raw --platform=systrap) + @$(call install_runtime,$(RUNTIME)-systrap-docker,--net-raw --allow-packet-socket-write --platform=systrap) @$(call test_runtime_cached,$(RUNTIME)-systrap,$(INTEGRATION_TARGETS)) .PHONY: systrap-tests diff --git a/g3doc/user_guide/tutorials/docker-in-gvisor.md b/g3doc/user_guide/tutorials/docker-in-gvisor.md index e9b52c0f67..a7794561f3 100644 --- a/g3doc/user_guide/tutorials/docker-in-gvisor.md +++ b/g3doc/user_guide/tutorials/docker-in-gvisor.md @@ -9,9 +9,19 @@ network driver and the bridge network driver are tested and supported. ### NOTE on runsc setup To run docker within gvisor, runsc must be enabled to allow raw sockets. This is -not the default, `--net-raw` must be passed to runsc. To use the following -tutorial, that means having the following runtimes configuration in -`/etc/docker/daemon.json`: +not the default, `--net-raw` must be passed to runsc. + +In addition, Docker versions 28 and beyond need the ability to write to +AF_PACKET sockets. This is because dockerd sends unsolicited ARP/NA requests +when bringing up interfaces. To allow this, the `--allow-packet-socket-write` is +also to be supplied (the default behavior is to disallow writes to AF_PACKET +sockets). + +To use the following tutorial, that means having the following runtimes +configuration in `/etc/docker/daemon.json`: + +> **Note:** `--allow-packet-socket-write` allows sandboxed code to craft +> arbitrary packets. It is only needed for Docker versions 28 and beyond. ```json { @@ -19,7 +29,8 @@ tutorial, that means having the following runtimes configuration in "runsc": { "path": "/usr/local/bin/runsc", "runtimeArgs": [ - "--net-raw" + "--net-raw", + "--allow-packet-socket-write" ] } } diff --git a/images/basic/docker/Dockerfile b/images/basic/docker/Dockerfile index fa30056dc6..ef73a66679 100644 --- a/images/basic/docker/Dockerfile +++ b/images/basic/docker/Dockerfile @@ -8,7 +8,7 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list && \ apt-get update && \ - apt-get install -qqy docker-ce=5:27.5.1-1~ubuntu.24.04~noble docker-ce-cli=5:27.5.1-1~ubuntu.24.04~noble containerd.io docker-buildx-plugin docker-compose-plugin iproute2 + apt-get install -qqy docker-ce=5:28.5.2-1~ubuntu.24.04~noble docker-ce-cli=5:28.5.2-1~ubuntu.24.04~noble containerd.io docker-buildx-plugin docker-compose-plugin iproute2 COPY start-dockerd.sh . CMD /start-dockerd.sh diff --git a/runsc/config/config.go b/runsc/config/config.go index a4e28e1588..ca38c87163 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -120,7 +120,7 @@ type Config struct { EnableRaw bool `flag:"net-raw"` // AllowPacketEndpointWrite enables write operations on packet endpoints. - AllowPacketEndpointWrite bool `flag:"TESTONLY-allow-packet-endpoint-write"` + AllowPacketEndpointWrite bool `flag:"allow-packet-socket-write"` // HostGSO indicates that host segmentation offload is enabled. HostGSO bool `flag:"gso"` diff --git a/runsc/config/flags.go b/runsc/config/flags.go index c80eec4cde..c61606edaf 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -145,6 +145,7 @@ func RegisterFlags(flagSet *flag.FlagSet) { // Flags that control sandbox runtime behavior: network related. flagSet.Var(networkTypePtr(NetworkSandbox), "network", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") flagSet.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.") + flagSet.Bool("allow-packet-socket-write", false, "allow writes on AF_PACKET sockets. When false, writes on AF_PACKET sockets will fail. When turned on, untrusted workloads may potentially attack the network because of the ability to craft arbitrary packets.") flagSet.Bool("gso", true, "enable host segmentation offload if it is supported by a network device.") flagSet.Bool("software-gso", true, "enable gVisor segmentation offload when host offload can't be enabled.") flagSet.Bool("gvisor-gro", false, "enable gVisor generic receive offload") @@ -171,7 +172,6 @@ func RegisterFlags(flagSet *flag.FlagSet) { // Test flags, not to be used outside tests, ever. flagSet.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") flagSet.String("TESTONLY-test-name-env", "", "TEST ONLY; do not ever use! Used for automated tests to improve logging.") - flagSet.Bool("TESTONLY-allow-packet-endpoint-write", false, "TEST ONLY; do not ever use! Used for tests to allow writes on packet sockets.") flagSet.Bool("TESTONLY-afs-syscall-panic", false, "TEST ONLY; do not ever use! Used for tests exercising gVisor panic reporting.") flagSet.String("TESTONLY-autosave-image-path", "", "TEST ONLY; enable auto save for syscall tests and set path for state file.") flagSet.Bool("TESTONLY-autosave-resume", false, "TEST ONLY; enable auto save and resume for syscall tests and set path for state file.") diff --git a/test/runner/main.go b/test/runner/main.go index 7483bbb05a..20d4de41bf 100644 --- a/test/runner/main.go +++ b/test/runner/main.go @@ -320,7 +320,7 @@ func runRunsc(tc *gtest.TestCase, spec *specs.Spec) error { "-network", *network, "-log-format=text", "-TESTONLY-unsafe-nonroot=true", - "-TESTONLY-allow-packet-endpoint-write=true", + "-allow-packet-socket-write=true", fmt.Sprintf("-panic-signal=%d", unix.SIGTERM), fmt.Sprintf("-iouring=%t", *ioUring), "-watchdog-action=panic",