Skip to content

Commit

Permalink
Merge pull request #6604 from Akira1Saitoh/aarch64VectorMulByElement
Browse files Browse the repository at this point in the history
AArch64: Add floating point multiplication by element instructions
  • Loading branch information
knn-k committed Jul 13, 2022
2 parents 779ff43 + bffea78 commit f70a5f6
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 0 deletions.
37 changes: 37 additions & 0 deletions compiler/aarch64/codegen/ARM64BinaryEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,43 @@ uint8_t *TR::ARM64Trg1Src2ExtendedInstruction::generateBinaryEncoding()
return cursor;
}

void TR::ARM64Trg1Src2IndexedElementInstruction::insertIndex(uint32_t *instruction)
{
TR::InstOpCode::Mnemonic mnemonic = getOpCodeValue();
if ((mnemonic >= TR::InstOpCode::fmulelem_4s) && (mnemonic <= TR::InstOpCode::vfmulelem_2d))
{
uint8_t h = 0, l = 0;
if ((mnemonic == TR::InstOpCode::fmulelem_4s) || (mnemonic == TR::InstOpCode::vfmulelem_4s))
{
h = (getIndex() >> 1) & 1;
l = getIndex() & 1;
}
else
{
h = getIndex() & 1;
}
*instruction |= (h << 11) | (l << 21);
}
else
{
TR_ASSERT_FATAL(false, "unsupported opcode: %d", mnemonic);
}
}

uint8_t *TR::ARM64Trg1Src2IndexedElementInstruction::generateBinaryEncoding()
{
uint8_t *instructionStart = cg()->getBinaryBufferCursor();
uint8_t *cursor = getOpCode().copyBinaryToBuffer(instructionStart);
insertTargetRegister(toARM64Cursor(cursor));
insertSource1Register(toARM64Cursor(cursor));
insertSource2Register(toARM64Cursor(cursor));
insertIndex(toARM64Cursor(cursor));
cursor += ARM64_INSTRUCTION_LENGTH;
setBinaryLength(ARM64_INSTRUCTION_LENGTH);
setBinaryEncoding(instructionStart);
return cursor;
}

uint8_t *TR::ARM64Trg1Src2ZeroInstruction::generateBinaryEncoding()
{
uint8_t *instructionStart = cg()->getBinaryBufferCursor();
Expand Down
22 changes: 22 additions & 0 deletions compiler/aarch64/codegen/ARM64Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,10 @@ static const char *opCodeToNameMap[] =
"vinseh",
"vinses",
"vinsed",
"fmulelem_4s",
"fmulelem_2d",
"vfmulelem_4s",
"vfmulelem_2d",
"vumlal_8h",
"vumlal_4s",
"vumlal_2d",
Expand Down Expand Up @@ -1107,6 +1111,9 @@ TR_Debug::print(TR::FILE *pOutFile, TR::Instruction *instr)
case OMR::Instruction::IsTrg1Src2Extended:
print(pOutFile, (TR::ARM64Trg1Src2ExtendedInstruction *)instr);
break;
case OMR::Instruction::IsTrg1Src2IndexedElement:
print(pOutFile, (TR::ARM64Trg1Src2IndexedElementInstruction *)instr);
break;
case OMR::Instruction::IsTrg1Src2Zero:
print(pOutFile, (TR::ARM64Trg1Src2ZeroInstruction *)instr);
break;
Expand Down Expand Up @@ -2123,6 +2130,21 @@ TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2ExtendedInstruction *instr)
trfflush(_comp->getOutFile());
}

void
TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2IndexedElementInstruction *instr)
{
printPrefix(pOutFile, instr);
trfprintf(pOutFile, "%s \t", getOpCodeName(&instr->getOpCode()));

print(pOutFile, instr->getTargetRegister(), TR_WordReg); trfprintf(pOutFile, ", ");
print(pOutFile, instr->getSource1Register(), TR_WordReg); trfprintf(pOutFile, ", ");
print(pOutFile, instr->getSource2Register(), TR_WordReg);

TR::InstOpCode::Mnemonic op = instr->getOpCodeValue();
trfprintf(pOutFile, ".[%d]", instr->getIndex());
trfflush(_comp->getOutFile());
}

void
TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2ZeroInstruction *instr)
{
Expand Down
74 changes: 74 additions & 0 deletions compiler/aarch64/codegen/ARM64Instruction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2866,6 +2866,80 @@ class ARM64Trg1Src2ExtendedInstruction : public ARM64Trg1Src2Instruction
virtual uint8_t *generateBinaryEncoding();
};

class ARM64Trg1Src2IndexedElementInstruction : public ARM64Trg1Src2Instruction
{
uint32_t _index;

public:

/*
* @brief Constructor
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] cg : CodeGenerator
*/
ARM64Trg1Src2IndexedElementInstruction(TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index, TR::CodeGenerator *cg)
: ARM64Trg1Src2Instruction(op, node, treg, s1reg, s2reg, cg), _index(index)
{
}

/*
* @brief Constructor
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] precedingInstruction : preceding instruction
* @param[in] cg : CodeGenerator
*/
ARM64Trg1Src2IndexedElementInstruction(TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index,
TR::Instruction *precedingInstruction, TR::CodeGenerator *cg)
: ARM64Trg1Src2Instruction(op, node, treg, s1reg, s2reg, precedingInstruction, cg),
_index(index)
{
}

/**
* @brief Gets instruction kind
* @return instruction kind
*/
virtual Kind getKind() { return IsTrg1Src2IndexedElement; }

/**
* @brief Gets index
* @return index of element in s2reg
*/
uint32_t getIndex() {return _index;}

/**
* @brief Sets index in binary encoding
* @param[in] instruction : instruction cursor
*/
void insertIndex(uint32_t *instruction);

/**
* @brief Generates binary encoding of the instruction
* @return instruction cursor
*/
virtual uint8_t *generateBinaryEncoding();
};

/*
* This class is designated to be used for alias instruction such as mulw, mulx
*/
Expand Down
24 changes: 24 additions & 0 deletions compiler/aarch64/codegen/GenerateInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,30 @@ TR::Instruction *generateTrg1Src2ExtendedInstruction(TR::CodeGenerator *cg, TR::
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2ExtendedInstruction(op, node, treg, s1reg, s2reg, extendType, shiftAmount, cg);
}

TR::Instruction *generateTrg1Src2IndexedElementInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node *node,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg,
uint32_t index, TR::Instruction *preced)
{
if ((op >= TR::InstOpCode::fmulelem_4s) && (op <= TR::InstOpCode::vfmulelem_2d))
{
if ((op == TR::InstOpCode::fmulelem_4s) || (op == TR::InstOpCode::vfmulelem_4s))
{
TR_ASSERT_FATAL_WITH_NODE(node, index <= 3, "index is out of range: %d", index);
}
else
{
TR_ASSERT_FATAL_WITH_NODE(node, index <= 1, "index is out of range: %d", index);
}
}
else
{
TR_ASSERT_FATAL_WITH_NODE(node, false, "unsupported opcode: %d", op);
}
if (preced)
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2IndexedElementInstruction(op, node, treg, s1reg, s2reg, index, preced, cg);
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2IndexedElementInstruction(op, node, treg, s1reg, s2reg, index, cg);
}

TR::Instruction *generateTrg1Src3Instruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node *node,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg, TR::Register *s3reg, TR::Instruction *preced)
{
Expand Down
22 changes: 22 additions & 0 deletions compiler/aarch64/codegen/GenerateInstructions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ TR::Instruction *generateTrg1Src2ExtendedInstruction(
uint32_t shiftAmount,
TR::Instruction *preced = NULL);

/*
* @brief Generates src2-to-trg indexed element instruction
* @param[in] cg : CodeGenerator
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] preced : preceding instruction
* @return generated instruction
*/
TR::Instruction *generateTrg1Src2IndexedElementInstruction(
TR::CodeGenerator *cg,
TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index,
TR::Instruction *preced = NULL);

/*
* @brief Generates src3-to-trg instruction
* @param[in] cg : CodeGenerator
Expand Down
5 changes: 5 additions & 0 deletions compiler/aarch64/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,11 @@
vinseh, /* 0x6E020400 INS */
vinses, /* 0x6E040400 INS */
vinsed, /* 0x6E080400 INS */
/* Vector by element arithmetics */
fmulelem_4s, /* 0x5F809000 FMUL (scalar)*/
fmulelem_2d, /* 0x5FC09000 FMUL (scalar)*/
vfmulelem_4s, /* 0x4F809000 FMUL */
vfmulelem_2d, /* 0x4FC09000 FMUL */
/* Vector widening and narrowing arithmetics */
vumlal_8h, /* 0x2E208000 UMLAL */
vumlal_4s, /* 0x2E608000 UMLAL */
Expand Down
1 change: 1 addition & 0 deletions compiler/aarch64/codegen/OMRInstructionKindEnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
IsCondTrg1Src2,
IsTrg1Src2Shifted,
IsTrg1Src2Extended,
IsTrg1Src2IndexedElement,
IsTrg1Src2Zero,
IsTrg1Src3,
IsTrg1Mem,
Expand Down
5 changes: 5 additions & 0 deletions compiler/aarch64/codegen/OpBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,11 @@ const OMR::ARM64::InstOpCode::OpCodeBinaryEntry OMR::ARM64::InstOpCode::binaryEn
0x6E020400, /* INS vinseh */
0x6E040400, /* INS vinses */
0x6E080400, /* INS vinsed */
/* Vector by element arithmetics */
0x5F809000, /* FMUL (scalar) fmulelem_4s */
0x5FC09000, /* FMUL (scalar) fmulelem_2d */
0x4F809000, /* FMUL vfmulelem_4s */
0x4FC09000, /* FMUL vfmulelem_4s */
/* Vector widening and narrowing arithmetics */
0x2E208000, /* UMLAL vumlal_8h */
0x2E608000, /* UMLAL vumlal_4s */
Expand Down
2 changes: 2 additions & 0 deletions compiler/ras/Debug.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ namespace TR { class ARM64Trg1Src2Instruction; }
namespace TR { class ARM64CondTrg1Src2Instruction; }
namespace TR { class ARM64Trg1Src2ShiftedInstruction; }
namespace TR { class ARM64Trg1Src2ExtendedInstruction; }
namespace TR { class ARM64Trg1Src2IndexedElementInstruction; }
namespace TR { class ARM64Trg1Src2ZeroInstruction; }
namespace TR { class ARM64Trg1Src3Instruction; }
namespace TR { class ARM64Trg1MemInstruction; }
Expand Down Expand Up @@ -1135,6 +1136,7 @@ class TR_Debug
void print(TR::FILE *, TR::ARM64CondTrg1Src2Instruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ShiftedInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ExtendedInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2IndexedElementInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ZeroInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src3Instruction *);
void print(TR::FILE *, TR::ARM64Trg1MemInstruction *);
Expand Down
54 changes: 54 additions & 0 deletions fvtest/compilerunittest/aarch64/BinaryEncoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ TEST_P(ARM64Trg1Src2EncodingTest, encode) {
ASSERT_EQ(std::get<4>(GetParam()), encodeInstruction(instr));
}

class ARM64Trg1Src2IndexedElementEncodingTest : public TRTest::BinaryEncoderTest<ARM64_INSTRUCTION_ALIGNMENT>, public ::testing::WithParamInterface<std::tuple<TR::InstOpCode::Mnemonic, TR::RealRegister::RegNum, TR::RealRegister::RegNum, TR::RealRegister::RegNum, uint32_t, ARM64BinaryInstruction>> {};

TEST_P(ARM64Trg1Src2IndexedElementEncodingTest, encode) {
auto trgReg = cg()->machine()->getRealRegister(std::get<1>(GetParam()));
auto src1Reg = cg()->machine()->getRealRegister(std::get<2>(GetParam()));
auto src2Reg = cg()->machine()->getRealRegister(std::get<3>(GetParam()));

auto instr = generateTrg1Src2IndexedElementInstruction(cg(), std::get<0>(GetParam()), fakeNode, trgReg, src1Reg, src2Reg, std::get<4>(GetParam()));

ASSERT_EQ(std::get<5>(GetParam()), encodeInstruction(instr));
}

class ARM64VectorShiftImmediateEncodingTest : public TRTest::BinaryEncoderTest<ARM64_INSTRUCTION_ALIGNMENT>, public ::testing::WithParamInterface<std::tuple<TR::InstOpCode::Mnemonic, TR::RealRegister::RegNum, TR::RealRegister::RegNum, uint32_t, ARM64BinaryInstruction>> {};

TEST_P(ARM64VectorShiftImmediateEncodingTest, encode) {
Expand Down Expand Up @@ -2108,3 +2120,45 @@ INSTANTIATE_TEST_CASE_P(ScalarFAddp, ARM64Trg1Src1EncodingTest, ::testing::Value
std::make_tuple(TR::InstOpCode::faddp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70d80f"),
std::make_tuple(TR::InstOpCode::faddp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70d81f")
));

INSTANTIATE_TEST_CASE_P(ScalarFmulElem, ARM64Trg1Src2IndexedElementEncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "5f80900f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 1, "5fa0900f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 2, "5f80981f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 3, "5fa0981f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "5f8091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 1, "5fa091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 2, "5f809be0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 3, "5fa09be0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "5f8f9000"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 1, "5faf9000"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 2, "5f9f9800"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 3, "5fbf9800"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "5fc0900f"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 1, "5fc0981f"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "5fc091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 1, "5fc09be0"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "5fcf9000"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "5fdf9800")
));

INSTANTIATE_TEST_CASE_P(VectorFmulElem, ARM64Trg1Src2IndexedElementEncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "4f80900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 1, "4fa0900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 2, "4f80981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 3, "4fa0981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "4f8091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 1, "4fa091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 2, "4f809be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 3, "4fa09be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4f8f9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 1, "4faf9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 2, "4f9f9800"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 3, "4fbf9800"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "4fc0900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 1, "4fc0981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "4fc091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 1, "4fc09be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4fcf9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "4fdf9800")
));

0 comments on commit f70a5f6

Please sign in to comment.