From ce3204e967fc283c6513d5d717e1167fd86c42d0 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 30 Jan 2018 08:48:09 -0800 Subject: [PATCH] Fix detection of YMM registers presence It was found that we incorrectly try to restore YMM registers in RtlRestoreContext when the processor supports xstate, but doesn't have YMM registers. This change fixes that by testing the YMM presence flag too. --- src/pal/src/include/pal/context.h | 20 ++++++++++++++++---- src/pal/src/thread/context.cpp | 4 ++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/pal/src/include/pal/context.h b/src/pal/src/include/pal/context.h index bd6797760d37..a488e0af0928 100644 --- a/src/pal/src/include/pal/context.h +++ b/src/pal/src/include/pal/context.h @@ -157,6 +157,13 @@ using asm_sigcontext::_xstate; #define FPSTATE_RESERVED padding #endif +// The mask for YMM registers presence flag stored in the xstate_bv. On current Linuxes, this definition is +// only in internal headers, so we define it here. The xstate_bv is extracted from the processor xstate bit +// vector register, so the value is OS independent. +#ifndef XSTATE_YMM +#define XSTATE_YMM 4 +#endif + inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) { // Bytes 464..511 in the FXSAVE format are available for software to use for any purpose. In this case, they are used to @@ -174,7 +181,7 @@ inline UINT32 FPREG_ExtendedSize(const ucontext_t *uc) return FPREG_FpxSwBytes(uc)->extended_size; } -inline bool FPREG_HasExtendedState(const ucontext_t *uc) +inline bool FPREG_HasYmmRegisters(const ucontext_t *uc) { // See comments in /usr/include/x86_64-linux-gnu/asm/sigcontext.h for info on how to detect if extended state is present static_assert_no_msg(FP_XSTATE_MAGIC2_SIZE == sizeof(UINT32)); @@ -191,14 +198,19 @@ inline bool FPREG_HasExtendedState(const ucontext_t *uc) } _ASSERTE(extendedSize >= FP_XSTATE_MAGIC2_SIZE); - return *reinterpret_cast(reinterpret_cast(FPREG_Fpstate(uc)) + (extendedSize - FP_XSTATE_MAGIC2_SIZE)) - == FP_XSTATE_MAGIC2; + if (*reinterpret_cast(reinterpret_cast(FPREG_Fpstate(uc)) + (extendedSize - FP_XSTATE_MAGIC2_SIZE)) + != FP_XSTATE_MAGIC2) + { + return false; + } + + return (FPREG_FpxSwBytes(uc)->xstate_bv & XSTATE_YMM) != 0; } inline void *FPREG_Xstate_Ymmh(const ucontext_t *uc) { static_assert_no_msg(sizeof(reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space) == 16 * 16); - _ASSERTE(FPREG_HasExtendedState(uc)); + _ASSERTE(FPREG_HasYmmRegisters(uc)); return reinterpret_cast<_xstate *>(FPREG_Fpstate(uc))->ymmh.ymmh_space; } diff --git a/src/pal/src/thread/context.cpp b/src/pal/src/thread/context.cpp index cc794d5f6974..0707f4cc6b96 100644 --- a/src/pal/src/thread/context.cpp +++ b/src/pal/src/thread/context.cpp @@ -470,7 +470,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) #if defined(_AMD64_) && defined(XSTATE_SUPPORTED) if ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { - _ASSERTE(FPREG_HasExtendedState(native)); + _ASSERTE(FPREG_HasYmmRegisters(native)); memcpy_s(FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16, lpContext->VectorRegister, sizeof(M128A) * 16); } #endif //_AMD64_ && XSTATE_SUPPORTED @@ -569,7 +569,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex { // TODO: Enable for all Unix systems #if XSTATE_SUPPORTED - if (FPREG_HasExtendedState(native)) + if (FPREG_HasYmmRegisters(native)) { memcpy_s(lpContext->VectorRegister, sizeof(M128A) * 16, FPREG_Xstate_Ymmh(native), sizeof(M128A) * 16); }