From 1fd676a3623200bff38503cb2d0c4941170af14b Mon Sep 17 00:00:00 2001 From: Konstantin Bogomolov Date: Mon, 6 May 2024 16:10:06 -0700 Subject: [PATCH] Set limit on how big MemoryFile.Allocate calls can be. Either TotalHostMem or TotalMem are good candidates for limits because in case either of these is set we should not be going over them. The motivations of this is to help catch syscalls causing allocations with size values that are blatantly bad. PiperOrigin-RevId: 631215732 --- pkg/sentry/pgalloc/pgalloc.go | 29 ++++++++++++++++++++++++++ pkg/sentry/platform/systrap/systrap.go | 4 +++- pkg/sentry/usage/memory.go | 5 +++++ runsc/boot/loader.go | 9 +++++++- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index 8ce1c9f9d5..4ae06cbbef 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -222,6 +222,11 @@ type MemoryFileOpts struct { // RestoreID is an opaque string used to reassociate the MemoryFile with its // replacement during restore. RestoreID string + + // EnforceMaximumAllocatable is a flag that governs whether the MemoryFile + // will be limited in size of total allocations by + // usage.MaximumAllocatableBytes. + EnforceMaximumAllocatable bool } // DelayedEvictionType is the type of MemoryFileOpts.DelayedEviction. @@ -539,6 +544,11 @@ func (f *MemoryFile) allocate(length uint64, opts *AllocOpts) (memmap.FileRange, f.mu.Lock() defer f.mu.Unlock() + if !f.hasSpaceToAllocate(length) { + log.Debugf("Enforcing memory limit on allocation of size %d, max is %d, already have %d", length, usage.MaximumAllocatableBytes, f.usageExpected) + return memmap.FileRange{}, linuxerr.ENOMEM + } + // Align hugepage-and-larger allocations on hugepage boundaries to try // to take advantage of hugetmpfs. alignment := uint64(hostarch.PageSize) @@ -583,6 +593,25 @@ func (f *MemoryFile) allocate(length uint64, opts *AllocOpts) (memmap.FileRange, return fr, nil } +func (f *MemoryFile) hasSpaceToAllocate(length uint64) bool { + if f.opts.EnforceMaximumAllocatable && usage.MaximumAllocatableBytes != 0 && ((f.usageExpected+length) > usage.MaximumAllocatableBytes || (f.usageExpected+length) < f.usageExpected) { + // f.usageExpected is not guaranteed to be correct because it is + // updated only when f.UpdateUsage is called periodically. + // To eliminate false-positives double check against the exact + // measure; we don't care as much about false-negatives, which + // helps avoid a host-syscall via f.TotalUsage in the happy-path. + exactUsage, err := f.TotalUsage() + if err != nil { + log.Warningf("Failed to fetch total usage for memory file: %v", err) + return false + } + if (exactUsage+length) > usage.MaximumAllocatableBytes || (exactUsage+length) < exactUsage { + return false + } + } + return true +} + // findAvailableRange returns an available range in the usageSet. // // Note that scanning for available slots takes place from end first backwards, diff --git a/pkg/sentry/platform/systrap/systrap.go b/pkg/sentry/platform/systrap/systrap.go index 5da730e762..10bf91f89c 100644 --- a/pkg/sentry/platform/systrap/systrap.go +++ b/pkg/sentry/platform/systrap/systrap.go @@ -427,7 +427,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { return nil, fmt.Errorf("error creating memfd: %v", err) } memfile := os.NewFile(uintptr(fd), memfileName) - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) + mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{ + EnforceMaximumAllocatable: true, + }) if err != nil { memfile.Close() return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %v", err) diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index d03274057e..675a47be1d 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -374,6 +374,11 @@ var ( // MaximumTotalMemoryBytes is the maximum reported total system memory. // The 0 value indicates no maximum. MaximumTotalMemoryBytes uint64 + + // MaximumAllocatableBytes is the maximum allowed to be allocated from a + // single memory file. Usually this is the same as + // MaximumTotalMemoryBytes. + MaximumAllocatableBytes uint64 ) // TotalMemory returns the "total usable memory" available. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 34193e7aaf..d18081e2ba 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -513,6 +513,9 @@ func New(args Args) (*Loader, error) { // As per tmpfs(5), the default size limit is 50% of total physical RAM. // See mm/shmem.c:shmem_default_max_blocks(). tmpfs.SetDefaultSizeLimit(args.TotalHostMem / 2) + // Set a generous but sane on maximum allowable size for memory + // file allocates. + usage.MaximumAllocatableBytes = args.TotalHostMem } if args.TotalMem > 0 { @@ -520,6 +523,8 @@ func New(args Args) (*Loader, error) { // use /proc/meminfo can make allocations based on this limit. usage.MinimumTotalMemoryBytes = args.TotalMem usage.MaximumTotalMemoryBytes = args.TotalMem + // Reset max allocatable to TotalMem because it's smaller than TotalHostMem. + usage.MaximumAllocatableBytes = args.TotalMem log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } @@ -733,7 +738,9 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) { // We can't enable pgalloc.MemoryFileOpts.UseHostMemcgPressure even if // there are memory cgroups specified, because at this point we're already // in a mount namespace in which the relevant cgroupfs is not visible. - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) + mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{ + EnforceMaximumAllocatable: true, + }) if err != nil { _ = memfile.Close() return nil, fmt.Errorf("error creating pgalloc.MemoryFile: %w", err)