Skip to content

Commit

Permalink
Merge pull request #6521 from pshipton/revert-6509-vops
Browse files Browse the repository at this point in the history
Revert "Implement 256 and 512-bit vload/vstore on x86"
  • Loading branch information
jdmpapin committed May 17, 2022
2 parents 5d15977 + f8b81aa commit 974dee8
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 140 deletions.
4 changes: 0 additions & 4 deletions compiler/il/OMRDataTypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,7 @@ enum VectorLength
VectorLength64,
// TODO: Redefine, preferably based on platform, when some platform starts supporting other than 128-bit
// Defining per platform is not necessary for functional correctness but for reducing NumAllTypes
#if defined(TR_TARGET_X86)
NumVectorLengths = VectorLength512
#else
NumVectorLengths = VectorLength128
#endif
};

/**
Expand Down
33 changes: 12 additions & 21 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,58 +1028,49 @@ OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::ILOpCode opcode, TR::D
}

TR::DataType ot = opcode.getVectorResultDataType();

if (ot.getVectorLength() != TR::VectorLength128) return false;

TR::DataType et = ot.getVectorElementType();

switch (opcode.getVectorOperation())
{
case OMR::vadd:
case OMR::vsub:
if (et == TR::Int8 || et == TR::Int16 || et == TR::Int32 || et == TR::Int64 || et == TR::Float || et == TR::Double)
return ot.getVectorLength() == TR::VectorLength128;
return true;
else
return false;
case OMR::vmul:
TR_ASSERT_FATAL(self()->comp()->compileRelocatableCode() || self()->comp()->isOutOfProcessCompilation() || self()->comp()->compilePortableCode() || self()->getX86ProcessorInfo().supportsSSE4_1() == self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1), "supportsSSE4_1() failed\n");
if (et == TR::Float || et == TR::Double || (et == TR::Int32 && self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1)))
return ot.getVectorLength() == TR::VectorLength128;
return true;
else
return false;
case OMR::vdiv:
if (et == TR::Float || et == TR::Double)
return ot.getVectorLength() == TR::VectorLength128;
return true;
else
return false;
case OMR::vneg:
return ot.getVectorLength() == TR::VectorLength128;
if (et == TR::Int8 || et == TR::Int16 || et == TR::Int32 || et == TR::Int64 || et == TR::Float || et == TR::Double)
return true;
else
return false;
case OMR::vxor:
case OMR::vor:
case OMR::vand:
if (et == TR::Int32 || et == TR::Int64)
return ot.getVectorLength() == TR::VectorLength128;
return true;
else
return false;
case OMR::vload:
case OMR::vloadi:
case OMR::vstore:
case OMR::vstorei:
switch (ot.getVectorLength())
{
case TR::VectorLength512:
if (!self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_AVX512F))
return false;
return true;
case TR::VectorLength256:
if (!self()->comp()->target().cpu.supportsAVX())
return false;
return true;
case TR::VectorLength128:
return true;
default:
return false;
}
case OMR::vsplats:
if (et == TR::Int32 || et == TR::Int64 || et == TR::Float || et == TR::Double)
return ot.getVectorLength() == TR::VectorLength128;
return true;
else
return false;
default:
Expand Down
38 changes: 8 additions & 30 deletions compiler/x/codegen/SIMDTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,31 +75,20 @@ TR::Register* OMR::X86::TreeEvaluator::SIMDloadEvaluator(TR::Node* node, TR::Cod
tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg);
TR::Register* resultReg = cg->allocateRegister(TR_VRF);

TR::InstOpCode::Mnemonic opCode = TR::InstOpCode::MOVDQURegMem;
OMR::X86::Encoding encoding = Legacy;

TR::InstOpCode::Mnemonic opCode = TR::InstOpCode::bad;
switch (node->getSize())
{
case 16:
if (cg->comp()->target().cpu.supportsAVX())
encoding = VEX_L128;
break;
case 32:
TR_ASSERT_FATAL(cg->comp()->target().cpu.supportsAVX(), "256-bit vload requires AVX");
encoding = VEX_L256;
break;
case 64:
TR_ASSERT_FATAL(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_AVX512F), "512-bit vload requires AVX-512");
encoding = EVEX_L512;
opCode = TR::InstOpCode::MOVDQURegMem;
break;
default:
if (cg->comp()->getOption(TR_TraceCG))
traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node);
TR_ASSERT_FATAL(false, "Unsupported fill size");
TR_ASSERT(false, "Unsupported fill size");
break;
}

TR::Instruction* instr = generateRegMemInstruction(opCode, node, resultReg, tempMR, cg, encoding);
TR::Instruction* instr = generateRegMemInstruction(opCode, node, resultReg, tempMR, cg);
if (node->getOpCode().isIndirect())
cg->setImplicitExceptionPoint(instr);
node->setRegister(resultReg);
Expand All @@ -114,31 +103,20 @@ TR::Register* OMR::X86::TreeEvaluator::SIMDstoreEvaluator(TR::Node* node, TR::Co
tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg);
TR::Register* valueReg = cg->evaluate(valueNode);

TR::InstOpCode::Mnemonic opCode = TR::InstOpCode::MOVDQUMemReg;
OMR::X86::Encoding encoding = Legacy;

TR::InstOpCode::Mnemonic opCode = TR::InstOpCode::bad;
switch (node->getSize())
{
case 16:
if (cg->comp()->target().cpu.supportsAVX())
encoding = VEX_L128;
break;
case 32:
TR_ASSERT_FATAL(cg->comp()->target().cpu.supportsAVX(), "256-bit vstore requires AVX");
encoding = VEX_L256;
break;
case 64:
TR_ASSERT_FATAL(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_AVX512F), "512-bit vstore requires AVX-512");
encoding = EVEX_L512;
opCode = TR::InstOpCode::MOVDQUMemReg;
break;
default:
if (cg->comp()->getOption(TR_TraceCG))
traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node);
TR_ASSERT_FATAL(false, "Unsupported fill size");
TR_ASSERT(false, "Unsupported fill size");
break;
}

TR::Instruction* instr = generateMemRegInstruction(opCode, node, tempMR, valueReg, cg, encoding);
TR::Instruction* instr = generateMemRegInstruction(opCode, node, tempMR, valueReg, cg);

cg->decReferenceCount(valueNode);
tempMR->decNodeReferenceCounts(cg);
Expand Down
85 changes: 0 additions & 85 deletions fvtest/compilertriltest/VectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,94 +21,9 @@

#include "JitTest.hpp"
#include "default_compiler.hpp"
#include "compilerunittest/CompilerUnitTest.hpp"

class VectorTest : public TRTest::JitTest {};

class ParameterizedVectorTest : public VectorTest, public ::testing::WithParamInterface<std::tuple<TR::VectorLength, TR::DataTypes>> {};

TEST_P(ParameterizedVectorTest, VLoadStore) {
TR::VectorLength vl = std::get<0>(GetParam());
TR::DataTypes et = std::get<1>(GetParam());

//TODO: Re-enable this test on S390 after issue #1843 is resolved.
SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)";
SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)";
SKIP_ON_RISCV(MissingImplementation);

if (vl != TR::VectorLength128) {
SKIP_ON_POWER(MissingImplementation);
SKIP_ON_ARM(MissingImplementation);
}

#if defined(TR_TARGET_X86)
OMRProcessorDesc desc;
omrsysinfo_get_processor_description(&desc);
bool supportsAVX = omrsysinfo_processor_has_feature(&desc, OMR_FEATURE_X86_AVX);
bool supportsAVX512 = omrsysinfo_processor_has_feature(&desc, OMR_FEATURE_X86_AVX512F);

if (!supportsAVX && vl == TR::VectorLength256) {
SKIP_ON_X86(UnsupportedFeature) << "Cannot execute 256-bit vector test without AVX hardware";
SKIP_ON_HAMMER(UnsupportedFeature) << "Cannot execute 256-bit vector test without AVX hardware";
} else if (!supportsAVX512 && vl == TR::VectorLength512) {
SKIP_ON_X86(UnsupportedFeature) << "Cannot execute 512-bit vector test without AVX-512 hardware";
SKIP_ON_HAMMER(UnsupportedFeature) << "Cannot execute 512-bit vector test without AVX-512 hardware";
}
#endif

char inputTrees[1024];
char *formatStr = "(method return= NoType args=[Address,Address] "
" (block "
" (vstorei%s offset=0 "
" (aload parm=0) "
" (vloadi%s (aload parm=1))) "
" (return))) ";
TR::DataType vt = TR::DataType::createVectorType(et, vl);

sprintf(inputTrees, formatStr, vt.toString(), vt.toString());
auto trees = parseString(inputTrees);
ASSERT_NOTNULL(trees);

Tril::DefaultCompiler compiler(trees);
ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees;

auto entry_point = compiler.getEntryPoint<void (*)(void *,void *)>();

const uint8_t maxVectorLength = 64;
char output[maxVectorLength] = {0};
char input[maxVectorLength] = {0};
char zero[maxVectorLength] = {0};

for (int i = 0; i < maxVectorLength; i++) {
input[i] = i;
}

entry_point(output, input);

EXPECT_EQ(0, memcmp(input, output, TR::DataType::getSize(vt)));
EXPECT_EQ(0, memcmp(output + TR::DataType::getSize(vt), zero, maxVectorLength - TR::DataType::getSize(vt)));
}

INSTANTIATE_TEST_CASE_P(VLoadStoreVectorTest, ParameterizedVectorTest, ::testing::ValuesIn(*TRTest::MakeVector<std::tuple<TR::VectorLength, TR::DataTypes>>(
std::make_tuple(TR::VectorLength128, TR::Int8),
std::make_tuple(TR::VectorLength128, TR::Int16),
std::make_tuple(TR::VectorLength128, TR::Int32),
std::make_tuple(TR::VectorLength128, TR::Int64),
std::make_tuple(TR::VectorLength128, TR::Float),
std::make_tuple(TR::VectorLength128, TR::Double),
std::make_tuple(TR::VectorLength256, TR::Int8),
std::make_tuple(TR::VectorLength256, TR::Int16),
std::make_tuple(TR::VectorLength256, TR::Int32),
std::make_tuple(TR::VectorLength256, TR::Int64),
std::make_tuple(TR::VectorLength256, TR::Float),
std::make_tuple(TR::VectorLength256, TR::Double),
std::make_tuple(TR::VectorLength512, TR::Int8),
std::make_tuple(TR::VectorLength512, TR::Int16),
std::make_tuple(TR::VectorLength512, TR::Int32),
std::make_tuple(TR::VectorLength512, TR::Int64),
std::make_tuple(TR::VectorLength512, TR::Float),
std::make_tuple(TR::VectorLength512, TR::Double)
)));

TEST_F(VectorTest, VDoubleAdd) {

Expand Down

0 comments on commit 974dee8

Please sign in to comment.