Skip to content

Commit

Permalink
Remove the concept of flipped CCRs from the Power codegen
Browse files Browse the repository at this point in the history
In order to avoid FXU rejects on POWER6 chips, the Power codegen
contains logic to flip the order of operands when performing a
comparison when determined to be necessary.

Unfortunately, the implementation of this optimization left much to be
desired. The generateTrg1Src2Instruction helper would set a special flag
on the target register and silently flip the compare operand order if it
was deemed to be necessary. The generateConditionalBranch helper (and
related helpers) would then check this flag to determine whether they
needed to flip the branch. This is obviously very surprising and also
doesn't correctly handle CR logical instructions.

Since the evaluation of comparisons to a condition register has now been
centralized in evaluateIntCompareToConditionRegister, the logic can now
be moved there, with the condition returned being flipped rather than
using hacky flags. This should prevent any unfortunate mistakes in the
future.

Signed-off-by: Ben Thomas <ben@benthomas.ca>
  • Loading branch information
aviansie-ben committed Sep 2, 2020
1 parent 5ed32ed commit 8ed69bc
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 112 deletions.
93 changes: 88 additions & 5 deletions compiler/p/codegen/ControlFlowEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,36 @@ CompareCondition reverseCondition(CompareCondition cond)
}
}

/**
* \brief
* Returns a condition that should be used if the order of operands to the compare instruction
* is flipped.
*
* \param cond
* The original condition.
*
* \return
* The condition that should be used if the compare operands are flipped.
*/
CompareCondition flipConditionOrder(CompareCondition cond)
{
switch (cond)
{
case CompareCondition::eq:
return CompareCondition::eq;
case CompareCondition::ne:
return CompareCondition::ne;
case CompareCondition::lt:
return CompareCondition::gt;
case CompareCondition::ge:
return CompareCondition::le;
case CompareCondition::gt:
return CompareCondition::lt;
case CompareCondition::le:
return CompareCondition::ge;
}
}

/**
* \brief
* Gets the opcode of a conditional branch instruction that branches when the provided condition
Expand Down Expand Up @@ -546,10 +576,33 @@ CompareCondition evaluateDualIntCompareToConditionRegister(
return CompareCondition::eq;
}

static bool registerRecentlyWritten(TR::Register *reg, uint32_t windowSize, TR::CodeGenerator *cg)
{
uint32_t i = 0;
TR::Instruction *cursor = cg->getAppendInstruction();

while (cursor && i < windowSize)
{
if (cursor->getOpCode().getMaxBinaryLength() != 0)
{
if (cursor->getTargetRegister(0) == reg)
return true;
i++;
}

cursor = cursor->getPrev();
}

return false;
}

/**
* \brief
* Evaluates an integral comparison to all bits of a CR field using a cmp or cmpi instruction.
*
* Under certain conditions, this function may actually evaluate the comparison by reversing its
* operands, in which case this routine will return true to indicate that this has been done.
*
* This function is not capable of handling 64-bit integral comparisons on 32-bit machines, as
* such comparisons cannot be performed in a single instruction and would require many extra CR
* logical instructions, rendering it too inefficient for general use.
Expand All @@ -572,8 +625,11 @@ CompareCondition evaluateDualIntCompareToConditionRegister(
*
* \param cg
* The code generator.
*
* \return
* true if the order of operands to the compare instruction was reversed; false otherwise.
*/
void evaluateThreeWayIntCompareToConditionRegister(
bool evaluateThreeWayIntCompareToConditionRegister(
TR::Register *condReg,
TR::Node *node,
TR::Node *firstChild,
Expand Down Expand Up @@ -638,10 +694,31 @@ void evaluateThreeWayIntCompareToConditionRegister(
? is16BitUnsignedImmediate(secondChild->get64bitIntegralValueAsUnsigned())
: is16BitSignedImmediate(secondChild->get64bitIntegralValue()));

static bool disableFlipCompare = feGetEnv("TR_DisableFlipCompare") != NULL;
CompareCondition cond = compareInfo.cond;
bool wasFlipped = false;

if (canUseCmpi)
{
generateTrg1Src1ImmInstruction(cg, cmpiOp, node, condReg, firstReg, secondChild->get64bitIntegralValue());
}
else if (
(firstReg->containsInternalPointer() || registerRecentlyWritten(firstReg, 4, cg)) &&
performTransformation(
cg->comp(),
"O^O evaluateIntCompareToConditionRegister: flipping order of compare operands (n%dn, n%dn) while evaluating n%dn to avoid P6 FXU reject",
firstChild->getGlobalIndex(),
secondChild->getGlobalIndex(),
node->getGlobalIndex()
)
)
{
TR::Register *secondReg = evaluateAndExtend(secondChild, compareInfo.isUnsigned, false, cg);
generateTrg1Src2Instruction(cg, cmpOp, node, condReg, secondReg, firstReg);
stopUsingExtendedRegister(secondReg, secondChild, cg);

wasFlipped = true;
}
else
{
TR::Register *secondReg = evaluateAndExtend(secondChild, compareInfo.isUnsigned, false, cg);
Expand All @@ -650,6 +727,7 @@ void evaluateThreeWayIntCompareToConditionRegister(
}

stopUsingExtendedRegister(firstReg, firstChild, cg);
return wasFlipped;
}

/**
Expand Down Expand Up @@ -690,8 +768,10 @@ CompareCondition evaluateIntCompareToConditionRegister(
if (compareInfo.type == TR::Int64 && !cg->comp()->target().is64Bit())
return evaluateDualIntCompareToConditionRegister(condReg, node, firstChild, secondChild, compareInfo, cg);

evaluateThreeWayIntCompareToConditionRegister(condReg, node, firstChild, secondChild, compareInfo, cg);
return compareInfo.cond;
if (evaluateThreeWayIntCompareToConditionRegister(condReg, node, firstChild, secondChild, compareInfo, cg))
return flipConditionOrder(compareInfo.cond);
else
return compareInfo.cond;
}

/**
Expand Down Expand Up @@ -2846,10 +2926,13 @@ TR::Register *intOrderEvaluator(TR::Node *node, const CompareInfo& compareInfo,
}
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P9))
{
CRCompareCondition crCond = compareConditionInCR(compareInfo.cond);
CompareCondition cond = compareInfo.cond;
TR::Register *condReg = cg->allocateRegister(TR_CCR);

evaluateThreeWayIntCompareToConditionRegister(condReg, node, firstChild, secondChild, compareInfo, cg);
if (evaluateThreeWayIntCompareToConditionRegister(condReg, node, firstChild, secondChild, compareInfo, cg))
cond = flipConditionOrder(compareInfo.cond);

CRCompareCondition crCond = compareConditionInCR(cond);

if (crCond.crcc == TR::RealRegister::CRCC_LT)
{
Expand Down
93 changes: 0 additions & 93 deletions compiler/p/codegen/GenerateInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ namespace TR { class LabelSymbol; }
namespace TR { class RegisterDependencyConditions; }
namespace TR { class SymbolReference; }

TR::InstOpCode::Mnemonic flipBranch(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op);
int estimateLikeliness(TR::CodeGenerator *cg, TR::Node *n);

TR::Instruction *generateMvFprGprInstructions(TR::CodeGenerator *cg, TR::Node *node, MvFprGprMode mode, bool nonops, TR::Register *reg0, TR::Register *reg1, TR::Register *reg2, TR::Register * reg3, TR::Instruction *cursor)
Expand Down Expand Up @@ -318,9 +317,6 @@ TR::Instruction *generateConditionalBranchInstruction(TR::CodeGenerator *cg, TR:
if (!cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_GP))
return generateConditionalBranchInstruction(cg, op, n, sym, cr, preced);

if (cr->isFlippedCCR())
op = flipBranch(cg, op);

if (preced)
return new (cg->trHeapMemory()) TR::PPCConditionalBranchInstruction(op, n, sym, cr, preced, cg, likeliness);
return new (cg->trHeapMemory()) TR::PPCConditionalBranchInstruction(op, n, sym, cr, cg, likeliness);
Expand All @@ -332,9 +328,6 @@ TR::Instruction *generateDepConditionalBranchInstruction(TR::CodeGenerator *cg,
// if processor does not support branch hints
if (!cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_GP))
return generateDepConditionalBranchInstruction(cg, op, n, sym, cr, cond, preced);

if (cr->isFlippedCCR())
op = flipBranch(cg, op);

if (preced)
return new (cg->trHeapMemory()) TR::PPCDepConditionalBranchInstruction(op, n, sym, cr, cond, preced, cg, likeliness);
Expand All @@ -344,9 +337,6 @@ TR::Instruction *generateDepConditionalBranchInstruction(TR::CodeGenerator *cg,
TR::Instruction *generateConditionalBranchInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::LabelSymbol *sym, TR::Register *cr, TR::Instruction *preced)
{
if (cr->isFlippedCCR())
op = flipBranch(cg, op);

int prediction = estimateLikeliness(cg, n);
bool likeliness;
if(prediction < 0)
Expand All @@ -368,9 +358,6 @@ TR::Instruction *generateConditionalBranchInstruction(TR::CodeGenerator *cg, TR:
TR::Instruction *generateDepConditionalBranchInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::LabelSymbol *sym, TR::Register *cr, TR::RegisterDependencyConditions *cond, TR::Instruction *preced)
{
if (cr->isFlippedCCR())
op = flipBranch(cg, op);

int prediction = estimateLikeliness(cg, n);
bool likeliness;
if(prediction < 0)
Expand All @@ -393,8 +380,6 @@ TR::Instruction *generateDepConditionalBranchInstruction(TR::CodeGenerator *cg,
TR::Instruction *generateTrg1Src1ImmInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::Register *treg, TR::Register *s1reg, intptr_t imm, TR::Instruction *preced)
{
if (cg->comp()->target().cpu.is(OMR_PROCESSOR_PPC_P6) && TR::InstOpCode(op).isCompare())
treg->resetFlippedCCR();
if (preced)
return new (cg->trHeapMemory()) TR::PPCTrg1Src1ImmInstruction(op, n, treg, s1reg, imm, preced, cg);
return new (cg->trHeapMemory()) TR::PPCTrg1Src1ImmInstruction(op, n,treg, s1reg, imm, cg);
Expand All @@ -403,8 +388,6 @@ TR::Instruction *generateTrg1Src1ImmInstruction(TR::CodeGenerator *cg, TR::InstO
TR::Instruction *generateTrg1Src1ImmInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::Register *treg, TR::Register *s1reg, TR::Register *cr0reg, int32_t imm, TR::Instruction *preced)
{
if (cg->comp()->target().cpu.is(OMR_PROCESSOR_PPC_P6))
cr0reg->resetFlippedCCR();
if (preced)
return new (cg->trHeapMemory()) TR::PPCTrg1Src1ImmInstruction(op, n,treg, s1reg, cr0reg, imm, preced, cg);
return new (cg->trHeapMemory()) TR::PPCTrg1Src1ImmInstruction(op, n,treg, s1reg, cr0reg, imm, cg);
Expand Down Expand Up @@ -442,48 +425,9 @@ TR::Instruction *generateTrg1Src1Instruction(TR::CodeGenerator *cg, TR::InstOpCo
return new (cg->trHeapMemory()) TR::PPCTrg1Src1Instruction(op, n, treg, s1reg, cg);
}

static bool registerRecentlyWritten(TR::Register *reg, TR::Instruction *cursor)
{
const uint32_t windowSize = 4;
uint32_t window = 0;

while (cursor && window <= windowSize)
{
// Walk past pseudo instructions (which have binary encoding == 0)
if (cursor->getOpCode().getOpCodeBinaryEncoding())
{
if (cursor->getTargetRegister(0) == reg)
return true;
++window;
}
cursor = cursor->getPrev();
}
return false;
}

TR::Instruction *generateTrg1Src2Instruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg, TR::Instruction *preced)
{
TR::Compilation * comp = cg->comp();
static bool disableFlipCompare = feGetEnv("TR_DisableFlipCompare") != NULL;
if (!disableFlipCompare && cg->comp()->target().cpu.is(OMR_PROCESSOR_PPC_P6) &&
TR::InstOpCode(op).isCompare() &&
n->getOpCode().isBranch() && n->getOpCode().isBooleanCompare())
{
treg->resetFlippedCCR();
if (s1reg->containsInternalPointer() ||
(!TR::InstOpCode(op).isFloat() && registerRecentlyWritten(s1reg, preced ? preced : cg->getAppendInstruction())))
{
//Swap registers to avoid p6 fxu reject.
TR::Register *temp = s1reg;
s1reg = s2reg;
s2reg = temp;
treg->setFlippedCCR();
if (cg->getDebug())
traceMsg(comp, "Flipping CCR operands for compare generated for node [%p]\n", n);
}
}

if (preced)
return new (cg->trHeapMemory()) TR::PPCTrg1Src2Instruction(op, n, treg, s1reg, s2reg, preced, cg);
return new (cg->trHeapMemory()) TR::PPCTrg1Src2Instruction(op, n, treg, s1reg, s2reg, cg);
Expand All @@ -492,8 +436,6 @@ TR::Instruction *generateTrg1Src2Instruction(TR::CodeGenerator *cg, TR::InstOpCo
TR::Instruction *generateTrg1Src2Instruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg, TR::Register *cr0Reg, TR::Instruction *preced)
{
if (cg->comp()->target().cpu.is(OMR_PROCESSOR_PPC_P6))
cr0Reg->resetFlippedCCR();
return new (cg->trHeapMemory()) TR::PPCTrg1Src2Instruction(op, n, treg, s1reg, s2reg, cr0Reg, preced, cg);
}

Expand Down Expand Up @@ -524,8 +466,6 @@ TR::Instruction *generateTrg1Src1Imm2Instruction(TR::CodeGenerator *cg, TR::Inst
TR::Instruction *generateTrg1Src1Imm2Instruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node * n,
TR::Register *trgReg, TR::Register *srcReg, TR::Register *cr0reg, int32_t imm1, int64_t imm2, TR::Instruction *preced)
{
if (cg->comp()->target().cpu.is(OMR_PROCESSOR_PPC_P6))
cr0reg->resetFlippedCCR();
if (preced)
return new (cg->trHeapMemory()) TR::PPCTrg1Src1Imm2Instruction(op, n, trgReg, srcReg, cr0reg, imm1, imm2, preced, cg);
return new (cg->trHeapMemory()) TR::PPCTrg1Src1Imm2Instruction(op, n, trgReg, srcReg, cr0reg, imm1, imm2, cg);
Expand Down Expand Up @@ -616,39 +556,6 @@ TR::Instruction *generateTrg1Instruction(TR::CodeGenerator *cg, TR::InstOpCode::
return new (cg->trHeapMemory()) TR::PPCTrg1Instruction(op, n, trg, cg);
}

TR::InstOpCode::Mnemonic flipBranch(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op)
{
switch(op)
{
case TR::InstOpCode::bge:
return TR::InstOpCode::ble;
case TR::InstOpCode::bgel:
return TR::InstOpCode::blel;
case TR::InstOpCode::bgt:
return TR::InstOpCode::blt;
case TR::InstOpCode::bgtl:
return TR::InstOpCode::bltl;
case TR::InstOpCode::ble:
return TR::InstOpCode::bge;
case TR::InstOpCode::blel:
return TR::InstOpCode::bgel;
case TR::InstOpCode::blt:
return TR::InstOpCode::bgt;
case TR::InstOpCode::bltl:
return TR::InstOpCode::bgtl;
case TR::InstOpCode::beq:
case TR::InstOpCode::beql:
case TR::InstOpCode::bne:
case TR::InstOpCode::bnel:
case TR::InstOpCode::bdz:
case TR::InstOpCode::bdnz:
return op;
default:
TR_ASSERT(0, "Cannot use this branch opcode to branch off the comparison of an internal pointer : %d\n", (int32_t)op);
return TR::InstOpCode::bad;
}
}

/*
*returns 0 if the branch is not biased and a hint shouldn't be used.
*returns 1 if a branch likely hint should be used.
Expand Down
14 changes: 0 additions & 14 deletions compiler/p/codegen/OMRRegister.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,8 @@ class OMR_EXTENSIBLE Register: public OMR::Register
uint64_t getInterference() {return _liveRegisterInfo._interference;}
uint64_t setInterference(uint64_t i) {return (_liveRegisterInfo._interference = i);}


/*
* Method for manipulating flags
*/
bool isFlippedCCR() {return _flags.testAny(FlipBranchOffThisCCR);}
void setFlippedCCR() {_flags.set(FlipBranchOffThisCCR);}
void resetFlippedCCR() {_flags.reset(FlipBranchOffThisCCR);}


private:

enum
{
FlipBranchOffThisCCR = 0x4000, // PPC may have swapped register positions in compare
};

// Both x and z also have this union but ppc uses uint64_t instead of uint32_t
union
{
Expand Down

0 comments on commit 8ed69bc

Please sign in to comment.