diff --git a/pkg/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s index 0acf3a96c2..a804b5d694 100644 --- a/pkg/ring0/entry_amd64.s +++ b/pkg/ring0/entry_amd64.s @@ -88,13 +88,8 @@ #define PTRACE_FS_BASE 168 // +checkoffset linux PtraceRegs.Fs_base #define PTRACE_GS_BASE 176 // +checkoffset linux PtraceRegs.Gs_base -// The value for XCR0 is defined to xsave/xrstor everything except for PKRU and -// AMX regions. -// TODO(gvisor.dev/issues/9896): Implement AMX support. -// TODO(gvisor.dev/issues/10087): Implement PKRU support. -#define XCR0_DISABLED_MASK ((1 << 9) | (1 << 17) | (1 << 18)) -#define XCR0_EAX (0xffffffff ^ XCR0_DISABLED_MASK) -#define XCR0_EDX 0xffffffff +#define XCR0_EAX 231 // +checkconst fpu SupportedXFeatureStates +#define XCR0_EDX 0 // Saves a register set. // diff --git a/pkg/sentry/arch/fpu/fpu_amd64.go b/pkg/sentry/arch/fpu/fpu_amd64.go index 7c6a10f51a..a85e37dd08 100644 --- a/pkg/sentry/arch/fpu/fpu_amd64.go +++ b/pkg/sentry/arch/fpu/fpu_amd64.go @@ -18,6 +18,7 @@ package fpu import ( + "errors" "fmt" "io" @@ -60,12 +61,37 @@ const ( const ( // XFEATURE_MASK_FPSSE is xsave features that are always enabled in // signal frame fpstate. - XFEATURE_MASK_FPSSE = 0x3 + XFEATURE_MASK_FPSSE = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE // FXSAVE_AREA_SIZE is the size of the FXSAVE area. FXSAVE_AREA_SIZE = 512 ) +// LINT.IfChange +const ( + // SupportedXFeatureStates contains all xfeatures supported right now. + // + // TODO(gvisor.dev/issues/9896): Implement AMX support. + // TODO(gvisor.dev/issues/10087): Implement PKRU support. + SupportedXFeatureStates = cpuid.XSAVEFeatureX87 | + cpuid.XSAVEFeatureSSE | + cpuid.XSAVEFeatureAVX | + cpuid.XSAVEFeatureAVX512op | + cpuid.XSAVEFeatureAVX512zmm0 | + cpuid.XSAVEFeatureAVX512zmm16 + + // ignoredXFeatureStates contains those xfeatures that may be + // present in a buffer but are safe to skip during restore. + // + // Intel MPX deprecated in the Linux kernel. + // PKRU states could be leaked. This issue was fixed in the 6.6 kernel. + ignoredXFeatureStates = cpuid.XSAVEFeatureBNDREGS | + cpuid.XSAVEFeatureBNDCSR | + cpuid.XSAVEFeaturePKRU +) + +// LINT.ThenChange(../../platform/systrap/sysmsg/sysmsg_offsets.h) + // initX86FPState (defined in asm files) sets up initial state. func initX86FPState(data *byte, useXsave bool) @@ -261,8 +287,13 @@ func (s *State) PtraceSetXstateRegs(src io.Reader, maxlen int, featureSet cpuid. return n, nil } +// ErrUnsupportedStateCleared is reported if an userspace state contains any +// unsupported features. +var ErrUnsupportedStateCleared = errors.New("contains unsupported states") + // SanitizeUser mutates s to ensure that restoring it is safe. -func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) { +func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) error { + var err error f := *s // Force reserved bits in MXCSR to 0. This is consistent with Linux. @@ -271,12 +302,17 @@ func (s *State) SanitizeUser(featureSet cpuid.FeatureSet) { if len(f) >= minXstateBytes { // Users can't enable *more* XCR0 bits than what we, and the CPU, support. xstateBV := hostarch.ByteOrder.Uint64(f[xstateBVOffset:]) - xstateBV &= featureSet.ValidXCR0Mask() + xcr0Mask := featureSet.ValidXCR0Mask() & (SupportedXFeatureStates | ignoredXFeatureStates) + if xstateBV&xcr0Mask != xstateBV { + err = ErrUnsupportedStateCleared + } + xstateBV &= featureSet.ValidXCR0Mask() & SupportedXFeatureStates hostarch.ByteOrder.PutUint64(f[xstateBVOffset:], xstateBV) // Force XCOMP_BV and reserved bytes in the XSAVE header to 0. reserved := f[xsaveHeaderZeroedOffset : xsaveHeaderZeroedOffset+xsaveHeaderZeroedBytes] clear(reserved) } + return err } var ( @@ -357,7 +393,7 @@ func (s *State) AfterLoad() { supportedBV := fxsaveBV hostFeatureSet := cpuid.HostFeatureSet() if hostFeatureSet.UseXsave() { - supportedBV = hostFeatureSet.ValidXCR0Mask() + supportedBV = hostFeatureSet.ValidXCR0Mask() & SupportedXFeatureStates } // What was in use? @@ -367,7 +403,7 @@ func (s *State) AfterLoad() { } // Supported features must be a superset of saved features. - if savedBV&^supportedBV != 0 { + if savedBV&^(supportedBV|ignoredXFeatureStates) != 0 { panic(ErrLoadingState{supportedFeatures: supportedBV, savedFeatures: savedBV}) } diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 4dd7e13329..f8c6580f69 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -299,7 +299,10 @@ func (c *Context64) SignalRestore(st *Stack, rt bool, featureSet cpuid.FeatureSe c.fpState.Reset() return 0, linux.SignalStack{}, err } - c.fpState.SanitizeUser(featureSet) + if err := c.fpState.SanitizeUser(featureSet); err != nil { + c.fpState.Reset() + return 0, linux.SignalStack{}, linuxerr.EFAULT + } } return uc.Sigset, uc.Stack, nil diff --git a/pkg/sentry/platform/systrap/sysmsg/sysmsg_offsets.h b/pkg/sentry/platform/systrap/sysmsg/sysmsg_offsets.h index 1e74f89fd9..e433656952 100644 --- a/pkg/sentry/platform/systrap/sysmsg/sysmsg_offsets.h +++ b/pkg/sentry/platform/systrap/sysmsg/sysmsg_offsets.h @@ -20,13 +20,11 @@ // for the pkg/sentry/platform/systrap/usertrap package. #define FAULT_OPCODE 0x06 -// The value for XCR0 is defined to xsave/xrstor everything except for PKRU and -// AMX regions. -// TODO(gvisor.dev/issues/9896): Implement AMX support. -// TODO(gvisor.dev/issues/10087): Implement PKRU support. -#define XCR0_DISABLED_MASK ((1 << 9) | (1 << 17) | (1 << 18)) -#define XCR0_EAX (0xffffffff ^ XCR0_DISABLED_MASK) -#define XCR0_EDX 0xffffffff +// LINT.IfChange +#define XCR0_EAX 0xe7UL +#define XCR0_EDX 0 +#define XCR0_DISABLED_MASK (~XCR0_EAX) +// LINT.ThenChange(../../../arch/fpu/fpu_amd64.go) // LINT.IfChange #define MAX_FPSTATE_LEN 3584 diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index dd53cc79b5..603cccfd79 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1001,6 +1001,7 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", + "@com_google_absl//absl/log", ], ) diff --git a/test/syscalls/linux/fpsig_mut_amd64.cc b/test/syscalls/linux/fpsig_mut_amd64.cc index f76ce8a9b0..21e8be46fd 100644 --- a/test/syscalls/linux/fpsig_mut_amd64.cc +++ b/test/syscalls/linux/fpsig_mut_amd64.cc @@ -15,10 +15,16 @@ // This program verifies that application floating point state is visible in // signal frames, and that changes to said state is visible after the signal // handler returns. +#include +#include #include #include +#include + +#include #include "gtest/gtest.h" +#include "absl/log/log.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -56,6 +62,19 @@ void sigusr1(int s, siginfo_t* siginfo, void* _uc) { uc_xmm0->element[0] = static_cast(kNewFPRegValue); } +volatile uint64_t testXsaveFeature; +void sigusr2(int s, siginfo_t* siginfo, void* _uc) { + ucontext_t* uc = reinterpret_cast(_uc); + *(uint64_t*)((uint8_t*)uc->uc_mcontext.fpregs + 512) |= 1 << testXsaveFeature; +} + +ucontext_t safe_ctx; +volatile bool segvTriggered; +void sigsegv(int s, siginfo_t* siginfo, void* _uc) { + segvTriggered = true; + setcontext(&safe_ctx); +} + TEST(FPSigTest, StateInFrame) { pid = getpid(); tid = gettid(); @@ -105,6 +124,50 @@ TEST(FPSigTest, StateInFrame) { EXPECT_EQ(got, kNewFPRegValue); } +TEST(FPSigTest, XFeaturesInFrame) { + pid = getpid(); + tid = gettid(); + + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigusr2; + ASSERT_THAT(sigaction(SIGUSR2, &sa, nullptr), SyscallSucceeds()); + sa.sa_sigaction = sigsegv; + ASSERT_THAT(sigaction(SIGSEGV, &sa, nullptr), SyscallSucceeds()); + + for (testXsaveFeature = 0; testXsaveFeature < 64; testXsaveFeature++) { + getcontext(&safe_ctx); + if (segvTriggered) { + LOG(INFO) << testXsaveFeature << " triggered SEGV"; + segvTriggered = false; + } else { + asm volatile( + "movl %[pid], %%edi;" + "movl %[tid], %%esi;" + "movl %[sig], %%edx;" + "movl %[killnr], %%eax;" + "syscall;" + : + : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid), + [sig] "i"(SIGUSR2) + : "rax", "rdi", "rsi", "rdx", + // Clobbered by syscall. + "rcx", "r11"); + switch (testXsaveFeature) { + case 8: // PT (Processor Trace) + case 13: // HDC (Hardware Debug Controls) + case 17: // LBR (Last Branch Record) + case 18: // CET (Control-flow Enforcement Technology) + case 19: // HRESET (Host Reset) + FAIL() << testXsaveFeature << " didn't trigger a fault"; + default: + LOG(INFO) << testXsaveFeature << " has been accepted"; + } + } + } +} + } // namespace } // namespace testing