Skip to content

Commit

Permalink
Merge pull request #6599 from BradleyWood/uvops
Browse files Browse the repository at this point in the history
Implement batch of vector opcodes (128, 256, 512-bit) on x86
  • Loading branch information
0xdaryl committed Aug 5, 2022
2 parents c1d7801 + dd3391d commit 16e7fa8
Show file tree
Hide file tree
Showing 12 changed files with 959 additions and 1,399 deletions.
82 changes: 63 additions & 19 deletions compiler/il/OMRILOps.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,22 @@ class ILOpCode
return TR::BadILOp;
}

static TR::ILOpCodes indirectStoreOpCode(TR::DataType type)
{
switch(type)
{
case TR::Int8: return TR::bstorei;
case TR::Int16: return TR::sstorei;
case TR::Int32: return TR::istorei;
case TR::Int64: return TR::lstorei;
case TR::Address: return TR::astorei;
case TR::Float: return TR::fstorei;
case TR::Double: return TR::dstorei;
default: TR_ASSERT(0, "no load opcode for this datatype");
}
return TR::BadILOp;
}

static TR::ILOpCodes absOpCode(TR::DataType type)
{
if (type.isVector()) return createVectorOpCode(TR::vabs, type);
Expand Down Expand Up @@ -1453,110 +1469,138 @@ class ILOpCode
if (!elementType.isVectorElement()) return TR::BadILOp;

TR::DataTypes vectorType = TR::DataType::createVectorType(elementType.getDataType(), vectorLength);
TR::VectorOperation vectorOperation;

switch (op)
{
case TR::fsqrt:
case TR::dsqrt:
vectorOperation = TR::vsqrt;
break;
case TR::imin:
case TR::lmin:
case TR::fmin:
case TR::dmin:
vectorOperation = TR::vmin;
break;
case TR::imax:
case TR::lmax:
case TR::fmax:
case TR::dmax:
vectorOperation = TR::vmax;
break;
case TR::bload:
case TR::sload:
case TR::iload:
case TR::lload:
case TR::fload:
case TR::dload:
return ILOpCode::createVectorOpCode(TR::vload, vectorType);
vectorOperation = TR::vload;
break;
case TR::bloadi:
case TR::sloadi:
case TR::iloadi:
case TR::lloadi:
case TR::floadi:
case TR::dloadi:
return ILOpCode::createVectorOpCode(TR::vloadi, vectorType);
vectorOperation = TR::vloadi;
break;
case TR::bstore:
case TR::sstore:
case TR::istore:
case TR::lstore:
case TR::fstore:
case TR::dstore:
return ILOpCode::createVectorOpCode(TR::vstore, vectorType);
vectorOperation = TR::vstore;
break;
case TR::bstorei:
case TR::sstorei:
case TR::istorei:
case TR::lstorei:
case TR::fstorei:
case TR::dstorei:
return ILOpCode::createVectorOpCode(TR::vstorei, vectorType);

vectorOperation = TR::vstorei;
break;
case TR::badd:
case TR::sadd:
case TR::iadd:
case TR::ladd:
case TR::fadd:
case TR::dadd:
return ILOpCode::createVectorOpCode(TR::vadd, vectorType);
vectorOperation = TR::vadd;
break;
case TR::bsub:
case TR::ssub:
case TR::isub:
case TR::lsub:
case TR::fsub:
case TR::dsub:
return ILOpCode::createVectorOpCode(TR::vsub, vectorType);
vectorOperation = TR::vsub;
break;
case TR::bmul:
case TR::smul:
case TR::imul:
case TR::lmul:
case TR::fmul:
case TR::dmul:
return ILOpCode::createVectorOpCode(TR::vmul, vectorType);
vectorOperation = TR::vmul;
break;
case TR::bdiv:
case TR::sdiv:
case TR::idiv:
case TR::ldiv:
case TR::fdiv:
case TR::ddiv:
return ILOpCode::createVectorOpCode(TR::vdiv, vectorType);
vectorOperation = TR::vdiv;
break;
case TR::bconst:
case TR::sconst:
case TR::iconst:
case TR::lconst:
case TR::fconst:
case TR::dconst:
return ILOpCode::createVectorOpCode(TR::vsplats, vectorType);
vectorOperation = TR::vsplats;
break;
case TR::bneg:
case TR::sneg:
case TR::ineg:
case TR::lneg:
case TR::fneg:
case TR::dneg:
return ILOpCode::createVectorOpCode(TR::vneg, vectorType);

vectorOperation = TR::vneg;
break;
case TR::iabs:
case TR::labs:
case TR::fabs:
case TR::dabs:
return ILOpCode::createVectorOpCode(TR::vabs, vectorType);

vectorOperation = TR::vabs;
break;
case TR::bor:
case TR::sor:
case TR::ior:
case TR::lor:
return ILOpCode::createVectorOpCode(TR::vor, vectorType);
vectorOperation = TR::vor;
break;
case TR::band:
case TR::sand:
case TR::iand:
case TR::land:
return ILOpCode::createVectorOpCode(TR::vand, vectorType);
vectorOperation = TR::vand;
break;
case TR::bxor:
case TR::sxor:
case TR::ixor:
case TR::lxor:
return ILOpCode::createVectorOpCode(TR::vxor, vectorType);
vectorOperation = TR::vxor;
break;
case TR::l2d:
return ILOpCode::createVectorOpCode(TR::vconv, TR::DataType::createVectorType(TR::Int64, vectorLength),
TR::DataType::createVectorType(TR::Double, vectorLength));
default:
return TR::BadILOp;

}
return TR::BadILOp;

return ILOpCode::createVectorOpCode(vectorOperation, vectorType);
}

static TR::ILOpCodes getRotateOpCodeFromDt(TR::DataType type)
Expand Down
6 changes: 3 additions & 3 deletions compiler/x/amd64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1472,19 +1472,19 @@ OMR::X86::AMD64::TreeEvaluator::vfmaEvaluator(TR::Node *node, TR::CodeGenerator
TR::Register*
OMR::X86::AMD64::TreeEvaluator::vabsEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
return TR::TreeEvaluator::unaryVectorArithmeticEvaluator(node, cg);
}

TR::Register*
OMR::X86::AMD64::TreeEvaluator::vminEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg);
}

TR::Register*
OMR::X86::AMD64::TreeEvaluator::vmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
return TR::TreeEvaluator::vectorBinaryArithmeticEvaluator(node, cg);
}

TR::Register*
Expand Down
2 changes: 1 addition & 1 deletion compiler/x/codegen/FPTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ TR::Register *OMR::X86::TreeEvaluator::dsqrtEvaluator(TR::Node *node, TR::CodeGe

TR::Register* OMR::X86::TreeEvaluator::vsqrtEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT_FATAL(node->getDataType().getVectorElementType() == TR::Double, "Unsupported datatype for vsqrt opcode");
TR_ASSERT_FATAL(node->getDataType().getVectorElementType().isFloatingPoint(), "Unsupported datatype for vsqrt opcode");
return TR::TreeEvaluator::unaryVectorArithmeticEvaluator(node, cg);
}

Expand Down
14 changes: 10 additions & 4 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,8 +1010,16 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO
switch (opcode.getVectorOperation())
{
case TR::vneg:
if (ot.getVectorLength() == TR::VectorLength128)
return true;
switch (ot.getVectorLength()) {
case TR::VectorLength128:
return true;
case TR::VectorLength256:
return cpu->supportsFeature(OMR_FEATURE_X86_AVX2);
case TR::VectorLength512:
return cpu->supportsFeature(OMR_FEATURE_X86_AVX512F);
default:
return false;
}
case TR::vload:
case TR::vloadi:
case TR::vstore:
Expand All @@ -1036,8 +1044,6 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO
return ot.getVectorLength() == TR::VectorLength128;
else
return false;
case TR::vsqrt:
return (et == TR::Double);

/*
* GRA does not work with vector registers on 32 bit due to a bug where xmm registers are not being assigned.
Expand Down
40 changes: 40 additions & 0 deletions compiler/x/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,43 @@
#define INSTRUCTION(name, mnemonic, binary, property0, property1, features) name
#include "codegen/X86Ops.ins"
#undef INSTRUCTION

// Alias RegMem & RegReg form instructions
PMULLWRegMem = PMULLWRegReg,
PMULLDRegMem = PMULLDRegReg,
PADDBRegMem = PADDBRegReg,
PADDWRegMem = PADDWRegReg,
PADDDRegMem = PADDDRegReg,
PADDQRegMem = PADDQRegReg,
PSUBBRegMem = PSUBBRegReg,
PSUBWRegMem = PSUBWRegReg,
PSUBDRegMem = PSUBDRegReg,
PSUBQRegMem = PSUBQRegReg,
PANDRegMem = PANDRegReg,
PORRegMem = PORRegReg,
PXORRegMem = PXORRegReg,
ADDPSRegMem = ADDPSRegReg,
ADDPDRegMem = ADDPDRegReg,
DIVPSRegMem = DIVPSRegReg,
DIVPDRegMem = DIVPDRegReg,
MULPSRegMem = MULPSRegReg,
MULPDRegMem = MULPDRegReg,
MOVDQURegMem = MOVDQURegReg,
PABSBRegMem = PABSBRegReg,
PABSWRegMem = PABSWRegReg,
PABSDRegMem = PABSDRegReg,
PABSQRegMem = PABSQRegReg,
PMINSBRegMem = PMINSBRegReg,
PMINSWRegMem = PMINSWRegReg,
PMINSDRegMem = PMINSDRegReg,
PMINSQRegMem = PMINSQRegReg,
MINPSRegMem = MINPSRegReg,
MINPDRegMem = MINPDRegReg,
PMAXSBRegMem = PMAXSBRegReg,
PMAXSWRegMem = PMAXSWRegReg,
PMAXSDRegMem = PMAXSDRegReg,
PMAXSQRegMem = PMAXSQRegReg,
MAXPSRegMem = MAXPSRegReg,
MAXPDRegMem = MAXPDRegReg,
SUBPSRegMem = SUBPSRegReg,
SUBPDRegMem = SUBPDRegReg,
23 changes: 15 additions & 8 deletions compiler/x/codegen/OMRInstOpCode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ namespace TR { class Register; }
#define IA32OpProp_TargetRegisterInModRM 0x08000000
#define IA32OpProp_TargetRegisterIgnored 0x10000000
#define IA32OpProp_SourceRegisterInModRM 0x20000000

#define IA32OpProp_SourceRegisterIgnored 0x40000000
#define IA32OpProp_BranchOp 0x80000000

Expand Down Expand Up @@ -102,6 +103,9 @@ namespace TR { class Register; }
#define IA32OpProp1_NeedsLockPrefix 0x00004000
#define IA32OpProp1_CallOp 0x00010000
#define IA32OpProp1_SourceIsMemRef 0x00020000

// For cases when source operand can be a register or mem-ref
#define IA32OpProp1_SourceCanBeMemRef 0x00020000
#define IA32OpProp1_SourceRegIsImplicit 0x00040000
#define IA32OpProp1_TargetRegIsImplicit 0x00080000
#define IA32OpProp1_FusableCompare 0x00100000
Expand Down Expand Up @@ -479,11 +483,11 @@ class InstOpCode: public OMR::InstOpCode
{
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512F);

if (flags & X86FeatureProp_EVEX128RequiresAVX512VL)
if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512VL)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512VL);
if (flags & X86FeatureProp_EVEX128RequiresAVX512BW)
if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512BW)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512BW);
if (flags & X86FeatureProp_EVEX128RequiresAVX512DQ)
if (supported && flags & X86FeatureProp_EVEX128RequiresAVX512DQ)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512DQ);

if (supported)
Expand All @@ -492,7 +496,10 @@ class InstOpCode: public OMR::InstOpCode

if (flags & X86FeatureProp_VEX128Supported)
{
if (flags & X86FeatureProp_VEX128RequiresAVX )
if (flags & X86FeatureProp_VEX128RequiresAVX && target->supportsFeature(OMR_FEATURE_X86_AVX))
return OMR::X86::VEX_L128;

if (flags & X86FeatureProp_VEX128RequiresAVX2 && target->supportsFeature(OMR_FEATURE_X86_AVX2))
return OMR::X86::VEX_L128;
}

Expand All @@ -511,11 +518,11 @@ class InstOpCode: public OMR::InstOpCode
{
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512F);

if (flags & X86FeatureProp_EVEX256RequiresAVX512VL)
if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512VL)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512VL);
if (flags & X86FeatureProp_EVEX256RequiresAVX512BW)
if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512BW)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512BW);
if (flags & X86FeatureProp_EVEX256RequiresAVX512DQ)
if (supported && flags & X86FeatureProp_EVEX256RequiresAVX512DQ)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX512DQ);

if (supported)
Expand All @@ -526,7 +533,7 @@ class InstOpCode: public OMR::InstOpCode
{
supported = target->supportsFeature(OMR_FEATURE_X86_AVX);

if (flags & X86FeatureProp_VEX256RequiresAVX2)
if (supported && flags & X86FeatureProp_VEX256RequiresAVX2)
supported = target->supportsFeature(OMR_FEATURE_X86_AVX2);

if (supported)
Expand Down

0 comments on commit 16e7fa8

Please sign in to comment.