Skip to content

Commit

Permalink
AArch64: Add floating point multiplication by element instructions
Browse files Browse the repository at this point in the history
This commit adds vector and scalar floating point multiplication
by element instructions and binary encoding unit tests.

Signed-off-by: Akira Saitoh <saiaki@jp.ibm.com>
  • Loading branch information
Akira Saitoh committed Jul 13, 2022
1 parent 4c52ff1 commit bffea78
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 0 deletions.
37 changes: 37 additions & 0 deletions compiler/aarch64/codegen/ARM64BinaryEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,43 @@ uint8_t *TR::ARM64Trg1Src2ExtendedInstruction::generateBinaryEncoding()
return cursor;
}

void TR::ARM64Trg1Src2IndexedElementInstruction::insertIndex(uint32_t *instruction)
{
TR::InstOpCode::Mnemonic mnemonic = getOpCodeValue();
if ((mnemonic >= TR::InstOpCode::fmulelem_4s) && (mnemonic <= TR::InstOpCode::vfmulelem_2d))
{
uint8_t h = 0, l = 0;
if ((mnemonic == TR::InstOpCode::fmulelem_4s) || (mnemonic == TR::InstOpCode::vfmulelem_4s))
{
h = (getIndex() >> 1) & 1;
l = getIndex() & 1;
}
else
{
h = getIndex() & 1;
}
*instruction |= (h << 11) | (l << 21);
}
else
{
TR_ASSERT_FATAL(false, "unsupported opcode: %d", mnemonic);
}
}

uint8_t *TR::ARM64Trg1Src2IndexedElementInstruction::generateBinaryEncoding()
{
uint8_t *instructionStart = cg()->getBinaryBufferCursor();
uint8_t *cursor = getOpCode().copyBinaryToBuffer(instructionStart);
insertTargetRegister(toARM64Cursor(cursor));
insertSource1Register(toARM64Cursor(cursor));
insertSource2Register(toARM64Cursor(cursor));
insertIndex(toARM64Cursor(cursor));
cursor += ARM64_INSTRUCTION_LENGTH;
setBinaryLength(ARM64_INSTRUCTION_LENGTH);
setBinaryEncoding(instructionStart);
return cursor;
}

uint8_t *TR::ARM64Trg1Src2ZeroInstruction::generateBinaryEncoding()
{
uint8_t *instructionStart = cg()->getBinaryBufferCursor();
Expand Down
22 changes: 22 additions & 0 deletions compiler/aarch64/codegen/ARM64Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,10 @@ static const char *opCodeToNameMap[] =
"vinseh",
"vinses",
"vinsed",
"fmulelem_4s",
"fmulelem_2d",
"vfmulelem_4s",
"vfmulelem_2d",
"vumlal_8h",
"vumlal_4s",
"vumlal_2d",
Expand Down Expand Up @@ -1107,6 +1111,9 @@ TR_Debug::print(TR::FILE *pOutFile, TR::Instruction *instr)
case OMR::Instruction::IsTrg1Src2Extended:
print(pOutFile, (TR::ARM64Trg1Src2ExtendedInstruction *)instr);
break;
case OMR::Instruction::IsTrg1Src2IndexedElement:
print(pOutFile, (TR::ARM64Trg1Src2IndexedElementInstruction *)instr);
break;
case OMR::Instruction::IsTrg1Src2Zero:
print(pOutFile, (TR::ARM64Trg1Src2ZeroInstruction *)instr);
break;
Expand Down Expand Up @@ -2123,6 +2130,21 @@ TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2ExtendedInstruction *instr)
trfflush(_comp->getOutFile());
}

void
TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2IndexedElementInstruction *instr)
{
printPrefix(pOutFile, instr);
trfprintf(pOutFile, "%s \t", getOpCodeName(&instr->getOpCode()));

print(pOutFile, instr->getTargetRegister(), TR_WordReg); trfprintf(pOutFile, ", ");
print(pOutFile, instr->getSource1Register(), TR_WordReg); trfprintf(pOutFile, ", ");
print(pOutFile, instr->getSource2Register(), TR_WordReg);

TR::InstOpCode::Mnemonic op = instr->getOpCodeValue();
trfprintf(pOutFile, ".[%d]", instr->getIndex());
trfflush(_comp->getOutFile());
}

void
TR_Debug::print(TR::FILE *pOutFile, TR::ARM64Trg1Src2ZeroInstruction *instr)
{
Expand Down
74 changes: 74 additions & 0 deletions compiler/aarch64/codegen/ARM64Instruction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2866,6 +2866,80 @@ class ARM64Trg1Src2ExtendedInstruction : public ARM64Trg1Src2Instruction
virtual uint8_t *generateBinaryEncoding();
};

class ARM64Trg1Src2IndexedElementInstruction : public ARM64Trg1Src2Instruction
{
uint32_t _index;

public:

/*
* @brief Constructor
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] cg : CodeGenerator
*/
ARM64Trg1Src2IndexedElementInstruction(TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index, TR::CodeGenerator *cg)
: ARM64Trg1Src2Instruction(op, node, treg, s1reg, s2reg, cg), _index(index)
{
}

/*
* @brief Constructor
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] precedingInstruction : preceding instruction
* @param[in] cg : CodeGenerator
*/
ARM64Trg1Src2IndexedElementInstruction(TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index,
TR::Instruction *precedingInstruction, TR::CodeGenerator *cg)
: ARM64Trg1Src2Instruction(op, node, treg, s1reg, s2reg, precedingInstruction, cg),
_index(index)
{
}

/**
* @brief Gets instruction kind
* @return instruction kind
*/
virtual Kind getKind() { return IsTrg1Src2IndexedElement; }

/**
* @brief Gets index
* @return index of element in s2reg
*/
uint32_t getIndex() {return _index;}

/**
* @brief Sets index in binary encoding
* @param[in] instruction : instruction cursor
*/
void insertIndex(uint32_t *instruction);

/**
* @brief Generates binary encoding of the instruction
* @return instruction cursor
*/
virtual uint8_t *generateBinaryEncoding();
};

/*
* This class is designated to be used for alias instruction such as mulw, mulx
*/
Expand Down
24 changes: 24 additions & 0 deletions compiler/aarch64/codegen/GenerateInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,30 @@ TR::Instruction *generateTrg1Src2ExtendedInstruction(TR::CodeGenerator *cg, TR::
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2ExtendedInstruction(op, node, treg, s1reg, s2reg, extendType, shiftAmount, cg);
}

TR::Instruction *generateTrg1Src2IndexedElementInstruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node *node,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg,
uint32_t index, TR::Instruction *preced)
{
if ((op >= TR::InstOpCode::fmulelem_4s) && (op <= TR::InstOpCode::vfmulelem_2d))
{
if ((op == TR::InstOpCode::fmulelem_4s) || (op == TR::InstOpCode::vfmulelem_4s))
{
TR_ASSERT_FATAL_WITH_NODE(node, index <= 3, "index is out of range: %d", index);
}
else
{
TR_ASSERT_FATAL_WITH_NODE(node, index <= 1, "index is out of range: %d", index);
}
}
else
{
TR_ASSERT_FATAL_WITH_NODE(node, false, "unsupported opcode: %d", op);
}
if (preced)
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2IndexedElementInstruction(op, node, treg, s1reg, s2reg, index, preced, cg);
return new (cg->trHeapMemory()) TR::ARM64Trg1Src2IndexedElementInstruction(op, node, treg, s1reg, s2reg, index, cg);
}

TR::Instruction *generateTrg1Src3Instruction(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node *node,
TR::Register *treg, TR::Register *s1reg, TR::Register *s2reg, TR::Register *s3reg, TR::Instruction *preced)
{
Expand Down
22 changes: 22 additions & 0 deletions compiler/aarch64/codegen/GenerateInstructions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ TR::Instruction *generateTrg1Src2ExtendedInstruction(
uint32_t shiftAmount,
TR::Instruction *preced = NULL);

/*
* @brief Generates src2-to-trg indexed element instruction
* @param[in] cg : CodeGenerator
* @param[in] op : instruction opcode
* @param[in] node : node
* @param[in] treg : target register
* @param[in] s1reg : source register 1
* @param[in] s2reg : source register 2
* @param[in] index : index of element in s2reg
* @param[in] preced : preceding instruction
* @return generated instruction
*/
TR::Instruction *generateTrg1Src2IndexedElementInstruction(
TR::CodeGenerator *cg,
TR::InstOpCode::Mnemonic op,
TR::Node *node,
TR::Register *treg,
TR::Register *s1reg,
TR::Register *s2reg,
uint32_t index,
TR::Instruction *preced = NULL);

/*
* @brief Generates src3-to-trg instruction
* @param[in] cg : CodeGenerator
Expand Down
5 changes: 5 additions & 0 deletions compiler/aarch64/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,11 @@
vinseh, /* 0x6E020400 INS */
vinses, /* 0x6E040400 INS */
vinsed, /* 0x6E080400 INS */
/* Vector by element arithmetics */
fmulelem_4s, /* 0x5F809000 FMUL (scalar)*/
fmulelem_2d, /* 0x5FC09000 FMUL (scalar)*/
vfmulelem_4s, /* 0x4F809000 FMUL */
vfmulelem_2d, /* 0x4FC09000 FMUL */
/* Vector widening and narrowing arithmetics */
vumlal_8h, /* 0x2E208000 UMLAL */
vumlal_4s, /* 0x2E608000 UMLAL */
Expand Down
1 change: 1 addition & 0 deletions compiler/aarch64/codegen/OMRInstructionKindEnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
IsCondTrg1Src2,
IsTrg1Src2Shifted,
IsTrg1Src2Extended,
IsTrg1Src2IndexedElement,
IsTrg1Src2Zero,
IsTrg1Src3,
IsTrg1Mem,
Expand Down
5 changes: 5 additions & 0 deletions compiler/aarch64/codegen/OpBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,11 @@ const OMR::ARM64::InstOpCode::OpCodeBinaryEntry OMR::ARM64::InstOpCode::binaryEn
0x6E020400, /* INS vinseh */
0x6E040400, /* INS vinses */
0x6E080400, /* INS vinsed */
/* Vector by element arithmetics */
0x5F809000, /* FMUL (scalar) fmulelem_4s */
0x5FC09000, /* FMUL (scalar) fmulelem_2d */
0x4F809000, /* FMUL vfmulelem_4s */
0x4FC09000, /* FMUL vfmulelem_4s */
/* Vector widening and narrowing arithmetics */
0x2E208000, /* UMLAL vumlal_8h */
0x2E608000, /* UMLAL vumlal_4s */
Expand Down
2 changes: 2 additions & 0 deletions compiler/ras/Debug.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ namespace TR { class ARM64Trg1Src2Instruction; }
namespace TR { class ARM64CondTrg1Src2Instruction; }
namespace TR { class ARM64Trg1Src2ShiftedInstruction; }
namespace TR { class ARM64Trg1Src2ExtendedInstruction; }
namespace TR { class ARM64Trg1Src2IndexedElementInstruction; }
namespace TR { class ARM64Trg1Src2ZeroInstruction; }
namespace TR { class ARM64Trg1Src3Instruction; }
namespace TR { class ARM64Trg1MemInstruction; }
Expand Down Expand Up @@ -1135,6 +1136,7 @@ class TR_Debug
void print(TR::FILE *, TR::ARM64CondTrg1Src2Instruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ShiftedInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ExtendedInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2IndexedElementInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src2ZeroInstruction *);
void print(TR::FILE *, TR::ARM64Trg1Src3Instruction *);
void print(TR::FILE *, TR::ARM64Trg1MemInstruction *);
Expand Down
54 changes: 54 additions & 0 deletions fvtest/compilerunittest/aarch64/BinaryEncoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ TEST_P(ARM64Trg1Src2EncodingTest, encode) {
ASSERT_EQ(std::get<4>(GetParam()), encodeInstruction(instr));
}

class ARM64Trg1Src2IndexedElementEncodingTest : public TRTest::BinaryEncoderTest<ARM64_INSTRUCTION_ALIGNMENT>, public ::testing::WithParamInterface<std::tuple<TR::InstOpCode::Mnemonic, TR::RealRegister::RegNum, TR::RealRegister::RegNum, TR::RealRegister::RegNum, uint32_t, ARM64BinaryInstruction>> {};

TEST_P(ARM64Trg1Src2IndexedElementEncodingTest, encode) {
auto trgReg = cg()->machine()->getRealRegister(std::get<1>(GetParam()));
auto src1Reg = cg()->machine()->getRealRegister(std::get<2>(GetParam()));
auto src2Reg = cg()->machine()->getRealRegister(std::get<3>(GetParam()));

auto instr = generateTrg1Src2IndexedElementInstruction(cg(), std::get<0>(GetParam()), fakeNode, trgReg, src1Reg, src2Reg, std::get<4>(GetParam()));

ASSERT_EQ(std::get<5>(GetParam()), encodeInstruction(instr));
}

class ARM64VectorShiftImmediateEncodingTest : public TRTest::BinaryEncoderTest<ARM64_INSTRUCTION_ALIGNMENT>, public ::testing::WithParamInterface<std::tuple<TR::InstOpCode::Mnemonic, TR::RealRegister::RegNum, TR::RealRegister::RegNum, uint32_t, ARM64BinaryInstruction>> {};

TEST_P(ARM64VectorShiftImmediateEncodingTest, encode) {
Expand Down Expand Up @@ -2108,3 +2120,45 @@ INSTANTIATE_TEST_CASE_P(ScalarFAddp, ARM64Trg1Src1EncodingTest, ::testing::Value
std::make_tuple(TR::InstOpCode::faddp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70d80f"),
std::make_tuple(TR::InstOpCode::faddp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70d81f")
));

INSTANTIATE_TEST_CASE_P(ScalarFmulElem, ARM64Trg1Src2IndexedElementEncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "5f80900f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 1, "5fa0900f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 2, "5f80981f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 3, "5fa0981f"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "5f8091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 1, "5fa091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 2, "5f809be0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 3, "5fa09be0"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "5f8f9000"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 1, "5faf9000"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 2, "5f9f9800"),
std::make_tuple(TR::InstOpCode::fmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 3, "5fbf9800"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "5fc0900f"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 1, "5fc0981f"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "5fc091e0"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 1, "5fc09be0"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "5fcf9000"),
std::make_tuple(TR::InstOpCode::fmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "5fdf9800")
));

INSTANTIATE_TEST_CASE_P(VectorFmulElem, ARM64Trg1Src2IndexedElementEncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "4f80900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 1, "4fa0900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 2, "4f80981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 3, "4fa0981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "4f8091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 1, "4fa091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 2, "4f809be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 3, "4fa09be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4f8f9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 1, "4faf9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 2, "4f9f9800"),
std::make_tuple(TR::InstOpCode::vfmulelem_4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 3, "4fbf9800"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, 0, "4fc0900f"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, 1, "4fc0981f"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, 0, "4fc091e0"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, 1, "4fc09be0"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4fcf9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "4fdf9800")
));

0 comments on commit bffea78

Please sign in to comment.