Skip to content

Commit

Permalink
runsc: add flag --EXPERIMENTAL-reproduce-nat
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 578393763
  • Loading branch information
kevinGC authored and gvisor-bot committed Nov 1, 2023
1 parent 7f08016 commit b119cc3
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 4 deletions.
5 changes: 4 additions & 1 deletion runsc/boot/controller.go
Expand Up @@ -188,7 +188,10 @@ func newController(fd int, l *Loader) (*controller, error) {
ctrl.srv.Register(&debug{})

if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
ctrl.srv.Register(&Network{Stack: eps.Stack})
ctrl.srv.Register(&Network{
Stack: eps.Stack,
Kernel: l.k,
})
}
if l.root.conf.ProfileEnable {
ctrl.srv.Register(control.NewProfile(l.k))
Expand Down
33 changes: 30 additions & 3 deletions runsc/boot/network.go
Expand Up @@ -16,6 +16,7 @@ package boot

import (
"fmt"
"io"
"net"
"os"
"runtime"
Expand All @@ -25,6 +26,8 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/hostos"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/link/ethernet"
"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
Expand Down Expand Up @@ -68,7 +71,8 @@ var (

// Network exposes methods that can be used to configure a network stack.
type Network struct {
Stack *stack.Stack
Stack *stack.Stack
Kernel *kernel.Kernel
}

// Route represents a route in the network stack.
Expand Down Expand Up @@ -152,6 +156,10 @@ type CreateLinksAndRoutesArgs struct {

// PCAP indicates that FilePayload also contains a PCAP log file.
PCAP bool

// NATBlob indicates whether FilePayload also contains an iptables NAT
// ruleset.
NATBlob bool
}

// IPWithPrefix is an address with its subnet prefix length.
Expand Down Expand Up @@ -200,6 +208,9 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
if args.PCAP {
wantFDs++
}
if args.NATBlob {
wantFDs++
}
if got := len(args.FilePayload.Files); got != wantFDs {
return fmt.Errorf("args.FilePayload.Files has %d FDs but we need %d entries based on FDBasedLinks, XDPLinks, and PCAP", got, wantFDs)
}
Expand Down Expand Up @@ -237,6 +248,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}

// Setup fdbased or XDP links.
fdOffset := 0
if len(args.FDBasedLinks) > 0 {
// Choose a dispatch mode.
dispatchMode := fdbased.RecvMMsg
Expand All @@ -250,7 +262,6 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
log.Infof("Host kernel version < 5.6, falling back to RecvMMsg dispatch")
}

fdOffset := 0
for _, link := range args.FDBasedLinks {
nicID++
nicids[link.Name] = nicID
Expand Down Expand Up @@ -329,7 +340,6 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
nicids[link.Name] = nicID

// Get the AF_XDP socket.
fdOffset := 0
oldFD := args.FilePayload.Files[fdOffset].Fd()
fd, err := unix.Dup(int(oldFD))
if err != nil {
Expand Down Expand Up @@ -437,6 +447,23 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct

log.Infof("Setting routes %+v", routes)
n.Stack.SetRouteTable(routes)

// Set NAT table rules if necessary.
if args.NATBlob {
log.Infof("Replacing NAT table")
if _, err := unix.Seek(int(args.FilePayload.Files[fdOffset].Fd()), 0, unix.SEEK_SET); err != nil {
return fmt.Errorf("failed to seek: %v", err)
}
iptReplaceBlob, err := io.ReadAll(args.FilePayload.Files[fdOffset])
if err != nil {
return fmt.Errorf("failed to read iptables blob: %v", err)
}
fdOffset++
if err := netfilter.SetEntries(n.Kernel.RootUserNamespace(), n.Stack, iptReplaceBlob, false); err != nil {
return fmt.Errorf("failed to SetEntries: %v", err)
}
}

return nil
}

Expand Down
4 changes: 4 additions & 0 deletions runsc/config/config.go
Expand Up @@ -330,6 +330,10 @@ type Config struct {
// explicitlySet contains whether a flag was explicitly set on the command-line from which this
// Config was constructed. Nil when the Config was not initialized from a FlagSet.
explicitlySet map[string]struct{}

// ScrapeNAT, when true, tells runsc to scrape the host network
// namespace's NAT iptables and reproduce it inside the sandbox.
ReproduceNAT bool `flag:"EXPERIMENTAL-reproduce-nat"`
}

func (c *Config) validate() error {
Expand Down
1 change: 1 addition & 0 deletions runsc/config/flags.go
Expand Up @@ -120,6 +120,7 @@ func RegisterFlags(flagSet *flag.FlagSet) {
flagSet.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
flagSet.Bool("buffer-pooling", true, "enable allocation of buffers from a shared pool instead of the heap.")
flagSet.Bool("EXPERIMENTAL-afxdp", false, "EXPERIMENTAL. Use an AF_XDP socket to receive packets.")
flagSet.Bool("EXPERIMENTAL-reproduce-nat", false, "EXPERIMENTAL. Scrape the host netns NAT table and reproduce it in the sandbox.")

// Flags that control sandbox runtime behavior: accelerator related.
flagSet.Bool("nvproxy", false, "EXPERIMENTAL: enable support for Nvidia GPUs")
Expand Down
11 changes: 11 additions & 0 deletions runsc/sandbox/network.go
Expand Up @@ -319,6 +319,17 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, conf *con
args.FilePayload.Files = append(args.FilePayload.Files, pcap)
}

// Pass the host's NAT table if requested.
if conf.ReproduceNAT {
args.NATBlob = true
f, cleanup, err := writeNATBlob()
if err != nil {
return fmt.Errorf("failed to write NAT blob: %v", err)
}
defer cleanup()
args.FilePayload.Files = append(args.FilePayload.Files, f)
}

log.Debugf("Setting up network, config: %+v", args)
if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil {
return fmt.Errorf("creating links and routes: %w", err)
Expand Down
79 changes: 79 additions & 0 deletions runsc/sandbox/network_unsafe.go
Expand Up @@ -15,9 +15,12 @@
package sandbox

import (
"fmt"
"os"
"unsafe"

"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
)

type ethtoolValue struct {
Expand Down Expand Up @@ -53,3 +56,79 @@ func isGSOEnabled(fd int, intf string) (bool, error) {

return val.val != 0, nil
}

func writeNATBlob() (*os.File, func(), error) {
// Open a socket to use with iptables.
iptSock, err := unix.Socket(unix.AF_INET, unix.SOCK_RAW, unix.IPPROTO_ICMP)
if err != nil {
return nil, nil, fmt.Errorf("failed to open socket for iptables: %v", err)
}
defer unix.Close(iptSock)

// Get the iptables info.
var NATName = [linux.XT_TABLE_MAXNAMELEN]byte([]byte("nat\x00"))
natInfo := linux.IPTGetinfo{Name: NATName}
natInfoLen := int32(unsafe.Sizeof(linux.IPTGetinfo{}))
_, _, errno := unix.Syscall6(unix.SYS_GETSOCKOPT,
uintptr(iptSock),
unix.SOL_IP,
linux.IPT_SO_GET_INFO,
uintptr(unsafe.Pointer(&natInfo)),
uintptr(unsafe.Pointer(&natInfoLen)),
0)
if errno != 0 {
return nil, nil, fmt.Errorf("failed to call IPT_SO_GET_INFO: %v", err)
}

// Get the iptables entries.
entries := linux.IPTGetEntries{Name: NATName, Size: natInfo.Size}
entriesBufLen := uint32(unsafe.Sizeof(entries)) + natInfo.Size
entriesBuf := make([]byte, entriesBufLen)
entries.MarshalUnsafe(entriesBuf[:unsafe.Sizeof(entries)])
_, _, errno = unix.Syscall6(unix.SYS_GETSOCKOPT,
uintptr(iptSock),
unix.SOL_IP,
linux.IPT_SO_GET_ENTRIES,
uintptr(unsafe.Pointer(&entriesBuf[0])),
uintptr(unsafe.Pointer(&entriesBufLen)),
0)
if errno != 0 {
return nil, nil, fmt.Errorf("failed to call IPT_SO_GET_ENTRIES: %v", errno)
}
var gotEntries linux.IPTGetEntries
gotEntries.UnmarshalUnsafe(entriesBuf[:unsafe.Sizeof(entries)])

// Construct an IPTReplace that can be used to set rules.
replace := linux.IPTReplace{
Name: NATName,
ValidHooks: natInfo.ValidHooks,
NumEntries: natInfo.NumEntries,
Size: natInfo.Size,
HookEntry: natInfo.HookEntry,
Underflow: natInfo.Underflow,
// We don't implement counters yet.
NumCounters: 0,
Counters: 0,
}

// Marshal into a blob.
replaceBuf := make([]byte, unsafe.Sizeof(replace)+uintptr(natInfo.Size))
replace.MarshalUnsafe(replaceBuf[:unsafe.Sizeof(replace)])
if n := copy(replaceBuf[unsafe.Sizeof(replace):], entriesBuf[unsafe.Sizeof(entries):]); uint32(n) != natInfo.Size {
panic(fmt.Sprintf("failed to populate entry table: copied %d bytes, but wanted to copy %d", n, natInfo.Size))
}

// Write blob to file.
blobFile, err := os.CreateTemp("", "iptables-blob-")
if err != nil {
return nil, nil, fmt.Errorf("failed to create iptables blob file: %v", err)
}
if n, err := blobFile.Write(replaceBuf); n != len(replaceBuf) || err != nil {
os.Remove(blobFile.Name())
return nil, nil, fmt.Errorf("failed to write iptables blob: wrote %d bytes (%d expected) and got error: %v", n, len(replaceBuf), err)
}
cleanup := func() {
os.Remove(blobFile.Name())
}
return blobFile, cleanup, nil
}

0 comments on commit b119cc3

Please sign in to comment.