Skip to content

Commit

Permalink
Disable x87 floating point for 32-bit code generation
Browse files Browse the repository at this point in the history
The minimum target processor level for OMR code generation is Pentium 4,
which has support for SSE2.  All modern operating systems are assumed
to support preserving the SSE state.  Disable x87 code generation in
preparation for removal.

Issue: #946

Signed-off-by: Daryl Maier <maier@ca.ibm.com>
  • Loading branch information
0xdaryl committed Apr 27, 2021
1 parent 8b6089f commit 576745e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 48 deletions.
19 changes: 13 additions & 6 deletions compiler/x/codegen/FPTreeEvaluator.cpp
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2020 IBM Corp. and others
* Copyright (c) 2000, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -513,8 +513,19 @@ TR::Register *OMR::X86::TreeEvaluator::fpReturnEvaluator(TR::Node *node, TR::Cod
TR_ASSERT(returnRegister, "Return node's child should evaluate to a register");
TR::Compilation *comp = cg->comp();

const TR::X86LinkageProperties &linkageProperties = cg->getProperties();
TR::RealRegister::RegNum machineReturnRegister =
(returnRegister->isSinglePrecision())? linkageProperties.getFloatReturnRegister() : linkageProperties.getDoubleReturnRegister();

/**
* On 32-bit targets, regardless of whether the target processor
* supports SSE or not, some linkages may still require a floating
* point value to be returned on the x87 stack (in ST0, for
* example). If so, the value in an XMM register needs to be
* coerced into the appropriate x87 register.
*/
if (cg->comp()->target().is32Bit() &&
!cg->useSSEForDoublePrecision() &&
(machineReturnRegister >= TR::RealRegister::FirstFPR && machineReturnRegister <= TR::RealRegister::LastFPR) &&
returnRegister->getKind() == TR_FPR)
{
// TODO: Modify linkage to allow the returned value to remain in an XMMR.
Expand All @@ -532,10 +543,6 @@ TR::Register *OMR::X86::TreeEvaluator::fpReturnEvaluator(TR::Node *node, TR::Cod
generateMemInstruction(LDCWMem, node, generateX86MemoryReference(cg->findOrCreate2ByteConstant(node, DOUBLE_PRECISION_ROUND_TO_NEAREST), cg), cg);
}

const TR::X86LinkageProperties &linkageProperties = cg->getProperties();
TR::RealRegister::RegNum machineReturnRegister =
(returnRegister->isSinglePrecision())? linkageProperties.getFloatReturnRegister() : linkageProperties.getDoubleReturnRegister();

TR::RegisterDependencyConditions *dependencies = NULL;
if (machineReturnRegister != TR::RealRegister::NoReg)
{
Expand Down
60 changes: 21 additions & 39 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Expand Up @@ -228,12 +228,17 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
// Determine whether or not x87 or SSE should be used for floating point.
//

#if defined(TR_TARGET_X86) && !defined(J9HAMMER)
#if defined(TR_TARGET_X86)
#if !defined(J9HAMMER)
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) == _targetProcessorInfo.supportsSSE2(), "supportsSSE2() failed\n");

if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) && comp->target().cpu.testOSForSSESupport())
supportsSSE2 = true;
#endif // defined(TR_TARGET_X86) && !defined(J9HAMMER)
#else
// 64-bit targets all support SSE2
supportsSSE2 = true;
#endif // !defined(J9HAMMER)
#endif // defined(TR_TARGET_X86)

TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_RTM) == _targetProcessorInfo.supportsTM(), "supportsTM() failed\n");

Expand All @@ -255,51 +260,28 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
}
}

if (comp->target().is64Bit()
#if defined(TR_TARGET_X86) && !defined(J9HAMMER)
|| supportsSSE2
#endif
)
{
self()->setUseSSEForSinglePrecision();
self()->setUseSSEForDoublePrecision();
self()->setSupportsAutoSIMD();
self()->setSupportsJavaFloatSemantics();
}
else
{
self()->setDisableFloatingPointGRA();
}
TR_ASSERT_FATAL(supportsSSE2, "Target processor/OS must support SSE2");

self()->setUseSSEForSinglePrecision();
self()->setUseSSEForDoublePrecision();
self()->setSupportsAutoSIMD();
self()->setSupportsJavaFloatSemantics();

// Choose the best XMM double precision load instruction for the target architecture.
//
if (self()->useSSEForDoublePrecision())
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->target().cpu.isAuthenticAMD() == _targetProcessorInfo.isAuthenticAMD(), "isAuthenticAMD() failed\n");
static char *forceMOVLPD = feGetEnv("TR_forceMOVLPDforDoubleLoads");
if (comp->target().cpu.isAuthenticAMD() || forceMOVLPD)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->target().cpu.isAuthenticAMD() == _targetProcessorInfo.isAuthenticAMD(), "isAuthenticAMD() failed\n");
static char *forceMOVLPD = feGetEnv("TR_forceMOVLPDforDoubleLoads");
if (comp->target().cpu.isAuthenticAMD() || forceMOVLPD)
{
self()->setXMMDoubleLoadOpCode(MOVLPDRegMem);
}
else
{
self()->setXMMDoubleLoadOpCode(MOVSDRegMem);
}
self()->setXMMDoubleLoadOpCode(MOVLPDRegMem);
}

#if defined(TR_TARGET_X86) && !defined(J9HAMMER)
// Determine if software prefetches are supported.
//
// 32-bit platforms must check the processor and OS.
// 64-bit platforms unconditionally support prefetching.
//
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE) == _targetProcessorInfo.supportsSSE(), "supportsSSE() failed\n");
if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE) && comp->target().cpu.testOSForSSESupport())
#endif // defined(TR_TARGET_X86) && !defined(J9HAMMER)
else
{
self()->setTargetSupportsSoftwarePrefetches();
self()->setXMMDoubleLoadOpCode(MOVSDRegMem);
}

self()->setTargetSupportsSoftwarePrefetches();

// Enable software prefetch of the TLH and configure the TLH prefetching
// geometry.
//
Expand Down
14 changes: 11 additions & 3 deletions compiler/x/env/OMRCPU.hpp
Expand Up @@ -93,16 +93,24 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU
}
bool isGenuineIntel();
bool isAuthenticAMD();

bool requiresLFence();
bool supportsFCOMIInstructions();
bool supportsMFence();
bool supportsLFence();
bool supportsSFence();
bool prefersMultiByteNOP();
bool supportsAVX();
bool testOSForSSESupport() { return false; }


/**
* It is generally safe to assume that all modern operating systems
* support preserving the SSE state. However, to be strictly
* correct, this support should be verified.
*
* See issue #5964.
*/
bool testOSForSSESupport() { return true; }

/**
* @brief Determines whether 32bit integer rotate is available
*
Expand Down

0 comments on commit 576745e

Please sign in to comment.