Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86 Processor Flags Refinement #4183

Merged
merged 3 commits into from
Aug 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions compiler/env/ProcessorInfo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ enum TR_X86ProcessorFeatures
TR_X86ProcessorInfoInitialized = 0x80000000 // FIXME: Using a reserved bit for our purposes.
};

inline uint32_t getFeatureFlagsMask()
{
return TR_BuiltInFPU
| TR_CMPXCHG8BInstruction
| TR_CMOVInstructions
| TR_MMXInstructions
| TR_SSE
| TR_SSE2;
}

enum TR_X86ProcessorFeatures2
{
TR_SSE3 = 0x00000001,
Expand Down Expand Up @@ -124,6 +134,16 @@ enum TR_X86ProcessorFeatures2
// Not used by Intel = 0x80000000,
};

inline uint32_t getFeatureFlags2Mask()
{
return TR_SSSE3
| TR_SSE4_1
| TR_POPCNT
| TR_AESNI
| TR_OSXSAVE
| TR_AVX;
}

enum TR_X86ProcessorFeatures8
{
TR_FSGSBASE = 0x00000001,
Expand Down Expand Up @@ -160,6 +180,12 @@ enum TR_X86ProcessorFeatures8
// Reserved by Intel = 0x80000000,
};

inline uint32_t getFeatureFlags8Mask()
{
return TR_HLE
| TR_RTM;
}

enum TR_ProcessorDescription
{
TR_ProcessorUnknown = 0x00000000,
Expand Down
159 changes: 111 additions & 48 deletions compiler/x/codegen/OMRCodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespace OMR { typedef OMR::X86::CodeGenerator CodeGeneratorConnector; }
#include "il/symbol/ResolvedMethodSymbol.hpp"
#include "infra/BitVector.hpp"
#include "infra/TRlist.hpp"
#include "infra/Assert.hpp"
#include "x/codegen/X86Ops.hpp"
#include "x/codegen/X86Register.hpp"
#include "env/CompilerEnv.hpp"
Expand Down Expand Up @@ -127,54 +128,54 @@ struct TR_X86ProcessorInfo
TR_UnknownVendor = 0x04
};

bool enabledXSAVE() {return _featureFlags2.testAny(TR_OSXSAVE);}
bool hasBuiltInFPU() {return _featureFlags.testAny(TR_BuiltInFPU);}
bool supportsVirtualModeExtension() {return _featureFlags.testAny(TR_VirtualModeExtension);}
bool supportsDebuggingExtension() {return _featureFlags.testAny(TR_DebuggingExtension);}
bool supportsPageSizeExtension() {return _featureFlags.testAny(TR_PageSizeExtension);}
bool supportsRDTSCInstruction() {return _featureFlags.testAny(TR_RDTSCInstruction);}
bool hasModelSpecificRegisters() {return _featureFlags.testAny(TR_ModelSpecificRegisters);}
bool supportsPhysicalAddressExtension() {return _featureFlags.testAny(TR_PhysicalAddressExtension);}
bool supportsMachineCheckException() {return _featureFlags.testAny(TR_MachineCheckException);}
bool supportsCMPXCHG8BInstruction() {return _featureFlags.testAny(TR_CMPXCHG8BInstruction);}
bool supportsCMPXCHG16BInstruction() {return _featureFlags2.testAny(TR_CMPXCHG16BInstruction);}
bool hasAPICHardware() {return _featureFlags.testAny(TR_APICHardware);}
bool hasMemoryTypeRangeRegisters() {return _featureFlags.testAny(TR_MemoryTypeRangeRegisters);}
bool supportsPageGlobalFlag() {return _featureFlags.testAny(TR_PageGlobalFlag);}
bool hasMachineCheckArchitecture() {return _featureFlags.testAny(TR_MachineCheckArchitecture);}
bool supportsCMOVInstructions() {return _featureFlags.testAny(TR_CMOVInstructions);}
bool supportsFCOMIInstructions() {return _featureFlags.testAll(TR_BuiltInFPU | TR_CMOVInstructions);}
bool hasPageAttributeTable() {return _featureFlags.testAny(TR_PageAttributeTable);}
bool has36BitPageSizeExtension() {return _featureFlags.testAny(TR_36BitPageSizeExtension);}
bool hasProcessorSerialNumber() {return _featureFlags.testAny(TR_ProcessorSerialNumber);}
bool supportsCLFLUSHInstruction() {return _featureFlags.testAny(TR_CLFLUSHInstruction);}
bool supportsDebugTraceStore() {return _featureFlags.testAny(TR_DebugTraceStore);}
bool hasACPIRegisters() {return _featureFlags.testAny(TR_ACPIRegisters);}
bool supportsMMXInstructions() {return _featureFlags.testAny(TR_MMXInstructions);}
bool supportsFastFPSavesRestores() {return _featureFlags.testAny(TR_FastFPSavesRestores);}
bool supportsSSE() {return _featureFlags.testAny(TR_SSE);}
bool supportsSSE2() {return _featureFlags.testAny(TR_SSE2);}
bool supportsSSE3() {return _featureFlags2.testAny(TR_SSE3);}
bool supportsSSSE3() {return _featureFlags2.testAny(TR_SSSE3);}
bool supportsSSE4_1() {return _featureFlags2.testAny(TR_SSE4_1);}
bool supportsSSE4_2() {return _featureFlags2.testAny(TR_SSE4_2);}
bool supportsAVX() {return _featureFlags2.testAny(TR_AVX) && enabledXSAVE();}
bool supportsAVX2() {return _featureFlags8.testAny(TR_AVX2) && enabledXSAVE();}
bool supportsBMI1() {return _featureFlags8.testAny(TR_BMI1) && enabledXSAVE();}
bool supportsBMI2() {return _featureFlags8.testAny(TR_BMI2) && enabledXSAVE();}
bool supportsFMA() {return _featureFlags2.testAny(TR_FMA) && enabledXSAVE();}
bool supportsCLMUL() {return _featureFlags2.testAny(TR_CLMUL);}
bool supportsAESNI() {return _featureFlags2.testAny(TR_AESNI);}
bool supportsPOPCNT() {return _featureFlags2.testAny(TR_POPCNT);}
bool supportsSelfSnoop() {return _featureFlags.testAny(TR_SelfSnoop);}
bool supportsTM() {return _featureFlags8.testAny(TR_RTM);}
bool supportsHyperThreading() {return _featureFlags.testAny(TR_HyperThreading);}
bool supportsHLE() {return _featureFlags8.testAny(TR_HLE);}
bool hasThermalMonitor() {return _featureFlags.testAny(TR_ThermalMonitor);}

bool supportsMFence() {return _featureFlags.testAny(TR_SSE2);}
bool supportsLFence() {return _featureFlags.testAny(TR_SSE2);}
bool supportsSFence() {return _featureFlags.testAny(TR_SSE | TR_MMXInstructions);}
bool enabledXSAVE() {return testFeatureFlags2(TR_OSXSAVE);}
bool hasBuiltInFPU() {return testFeatureFlags(TR_BuiltInFPU);}
bool supportsVirtualModeExtension() {return testFeatureFlags(TR_VirtualModeExtension);}
bool supportsDebuggingExtension() {return testFeatureFlags(TR_DebuggingExtension);}
bool supportsPageSizeExtension() {return testFeatureFlags(TR_PageSizeExtension);}
bool supportsRDTSCInstruction() {return testFeatureFlags(TR_RDTSCInstruction);}
bool hasModelSpecificRegisters() {return testFeatureFlags(TR_ModelSpecificRegisters);}
bool supportsPhysicalAddressExtension() {return testFeatureFlags(TR_PhysicalAddressExtension);}
bool supportsMachineCheckException() {return testFeatureFlags(TR_MachineCheckException);}
bool supportsCMPXCHG8BInstruction() {return testFeatureFlags(TR_CMPXCHG8BInstruction);}
bool supportsCMPXCHG16BInstruction() {return testFeatureFlags2(TR_CMPXCHG16BInstruction);}
bool hasAPICHardware() {return testFeatureFlags(TR_APICHardware);}
bool hasMemoryTypeRangeRegisters() {return testFeatureFlags(TR_MemoryTypeRangeRegisters);}
bool supportsPageGlobalFlag() {return testFeatureFlags(TR_PageGlobalFlag);}
bool hasMachineCheckArchitecture() {return testFeatureFlags(TR_MachineCheckArchitecture);}
bool supportsCMOVInstructions() {return testFeatureFlags(TR_CMOVInstructions);}
bool supportsFCOMIInstructions() {return testFeatureFlags(TR_BuiltInFPU | TR_CMOVInstructions);}
bool hasPageAttributeTable() {return testFeatureFlags(TR_PageAttributeTable);}
bool has36BitPageSizeExtension() {return testFeatureFlags(TR_36BitPageSizeExtension);}
bool hasProcessorSerialNumber() {return testFeatureFlags(TR_ProcessorSerialNumber);}
bool supportsCLFLUSHInstruction() {return testFeatureFlags(TR_CLFLUSHInstruction);}
bool supportsDebugTraceStore() {return testFeatureFlags(TR_DebugTraceStore);}
bool hasACPIRegisters() {return testFeatureFlags(TR_ACPIRegisters);}
bool supportsMMXInstructions() {return testFeatureFlags(TR_MMXInstructions);}
bool supportsFastFPSavesRestores() {return testFeatureFlags(TR_FastFPSavesRestores);}
bool supportsSSE() {return testFeatureFlags(TR_SSE);}
bool supportsSSE2() {return testFeatureFlags(TR_SSE2);}
bool supportsSSE3() {return testFeatureFlags2(TR_SSE3);}
bool supportsSSSE3() {return testFeatureFlags2(TR_SSSE3);}
bool supportsSSE4_1() {return testFeatureFlags2(TR_SSE4_1);}
bool supportsSSE4_2() {return testFeatureFlags2(TR_SSE4_2);}
bool supportsAVX() {return testFeatureFlags2(TR_AVX) && enabledXSAVE();}
bool supportsAVX2() {return testFeatureFlags8(TR_AVX2) && enabledXSAVE();}
bool supportsBMI1() {return testFeatureFlags8(TR_BMI1) && enabledXSAVE();}
bool supportsBMI2() {return testFeatureFlags8(TR_BMI2) && enabledXSAVE();}
bool supportsFMA() {return testFeatureFlags2(TR_FMA) && enabledXSAVE();}
bool supportsCLMUL() {return testFeatureFlags2(TR_CLMUL);}
bool supportsAESNI() {return testFeatureFlags2(TR_AESNI);}
bool supportsPOPCNT() {return testFeatureFlags2(TR_POPCNT);}
bool supportsSelfSnoop() {return testFeatureFlags(TR_SelfSnoop);}
bool supportsTM() {return testFeatureFlags8(TR_RTM);}
bool supportsHyperThreading() {return testFeatureFlags(TR_HyperThreading);}
bool supportsHLE() {return testFeatureFlags8(TR_HLE);}
bool hasThermalMonitor() {return testFeatureFlags(TR_ThermalMonitor);}

bool supportsMFence() {return testFeatureFlags(TR_SSE2);}
bool supportsLFence() {return testFeatureFlags(TR_SSE2);}
bool supportsSFence() {return testFeatureFlags(TR_SSE | TR_MMXInstructions);}
bool prefersMultiByteNOP() {return getX86Architecture() && isGenuineIntel() && !isIntelPentium();}

uint32_t getCPUStepping(uint32_t signature) {return (signature & CPUID_SIGNATURE_STEPPING_MASK);}
Expand Down Expand Up @@ -223,6 +224,68 @@ struct TR_X86ProcessorInfo
friend class OMR::X86::CodeGenerator;

void initialize();

/**
* @brief testFlag Ensures that the feature being tested for exists in the mask
* and then checks whether the feature is set in the flag. The
* reason for this is to facilitate correctness checks for
* relocatable compilations. In order for the compiler to use a
* processor feature, the feature flag should be added to the
* mask so that the processor validation code also accounts for
* the use of said feature.
*
* @param flag Either _featureFlags, _featureFlags2, or _featureFlags8
* @param feature The feature being tested for
* @param mask The mask returned by either getFeatureFlagsMask(),
* getFeatureFlags2Mask(), or getFeatureFlags8Mask()
*
* @return The result of flag.testAny(feature)
*/
bool testFlag(flags32_t &flag, uint32_t feature, uint32_t mask)
{
TR_ASSERT_FATAL(feature & mask, "The %x feature needs to be added to the "
"getFeatureFlagsMask (or variant) function "
"for correctness in relocatable compiles!\n",
feature);

return flag.testAny(feature);
}

/**
* @brief testFeatureFlags Wrapper around testFlag
*
* @param feature The feature being tested for
*
* @return The result of testFlag
*/
bool testFeatureFlags(uint32_t feature)
{
return testFlag(_featureFlags, feature, getFeatureFlagsMask());
}

/**
* @brief testFeatureFlags2 Wrapper around testFlag
*
* @param feature The feature being tested for
*
* @return The result of testFlag
*/
bool testFeatureFlags2(uint32_t feature)
{
return testFlag(_featureFlags2, feature, getFeatureFlags2Mask());
}

/**
* @brief testFeatureFlags8 Wrapper around testFlag
*
* @param feature The feature being tested for
*
* @return The result of testFlag
*/
bool testFeatureFlags8(uint32_t feature)
{
return testFlag(_featureFlags8, feature, getFeatureFlags8Mask());
}
};

enum TR_PaddingProperties
Expand Down
24 changes: 22 additions & 2 deletions compiler/x/runtime/X86Runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ inline unsigned long long _xgetbv(unsigned int ecx)
}
#endif /* defined(OMR_OS_WINDOWS) */

/**
* @brief maskProcessorFlags
* @param pBuffer
*
* Masks out the processor features the compiler does not
* care about.
*/
inline void maskProcessorFlags(TR_X86CPUIDBuffer* pBuffer)
{
pBuffer->_featureFlags &= getFeatureFlagsMask();
pBuffer->_featureFlags2 &= getFeatureFlags2Mask();
pBuffer->_featureFlags8 &= getFeatureFlags8Mask();
}

char* feGetEnv(const char*);
inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer)
{
Expand Down Expand Up @@ -73,14 +87,20 @@ inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer)
pBuffer->_featureFlags8 = CPUInfo[EBX];

// Check for XSAVE
if(pBuffer->_featureFlags2 & 0x08000000) // OSXSAVE
if(pBuffer->_featureFlags2 & TR_OSXSAVE)
{
if(((6 & _xgetbv(0)) != 6) || feGetEnv("TR_DisableAVX")) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
{
// Unset OSXSAVE if not enabled via CR0
pBuffer->_featureFlags2 &= ~0x08000000; // OSXSAVE
pBuffer->_featureFlags2 &= ~TR_OSXSAVE;
}
}

/* Mask out the bits the compiler does not care about.
* This is necessary for relocatable compilations; without
* this step, validations might fail because of mismatches
* in unused hardware features */
maskProcessorFlags(pBuffer);
return true;
}
else
Expand Down