Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AArch64: Add floating point min/max pairwise instructions #6609

Merged
merged 1 commit into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions compiler/aarch64/codegen/ARM64Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,22 @@ static const char *opCodeToNameMap[] =
"vfaddp2d",
"faddp2s",
"faddp2d",
"vfmaxnmp4s",
"vfmaxnmp2d",
"fmaxnmp2s",
"fmaxnmp2d",
"vfmaxp4s",
"vfmaxp2d",
"fmaxp2s",
"fmaxp2d",
"vfminnmp4s",
"vfminnmp2d",
"fminnmp2s",
"fminnmp2d",
"vfminp4s",
"vfminp2d",
"fminp2s",
"fminp2d",
"nop",
};

Expand Down
16 changes: 16 additions & 0 deletions compiler/aarch64/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,22 @@
vfaddp2d, /* 0x6E60D400 FADDP */
faddp2s, /* 0x7E30D800 FADDP(scalar) */
faddp2d, /* 0x7E70D800 FADDP(scalar) */
vfmaxnmp4s, /* 0x6E20C400 FMAXNMP */
vfmaxnmp2d, /* 0x6E60C400 FMAXNMP */
fmaxnmp2s, /* 0x7E30C800 FMAXNMP(scalar) */
fmaxnmp2d, /* 0x7E70C800 FMAXNMP(scalar) */
vfmaxp4s, /* 0x6E20F400 FMAXP */
vfmaxp2d, /* 0x6E60F400 FMAXP */
fmaxp2s, /* 0x7E30F800 FMAXP(scalar) */
fmaxp2d, /* 0x7E70F800 FMAXP(scalar) */
vfminnmp4s, /* 0x6EA0C400 FMINNMP */
vfminnmp2d, /* 0x6EE0C400 FMINNMP */
fminnmp2s, /* 0x7EB0C800 FMINNMP(scalar) */
fminnmp2d, /* 0x7EF0C800 FMINNMP(scalar) */
vfminp4s, /* 0x6EA0F400 FMINP */
vfminp2d, /* 0x6EE0F400 FMINP */
fminp2s, /* 0x7EB0F800 FMINP(scalar) */
fminp2d, /* 0x7EF0F800 FMINP(scalar) */
/* Hint instructions */
nop, /* 0xD503201F NOP */
/* Internal OpCodes */
Expand Down
16 changes: 16 additions & 0 deletions compiler/aarch64/codegen/OpBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,22 @@ const OMR::ARM64::InstOpCode::OpCodeBinaryEntry OMR::ARM64::InstOpCode::binaryEn
0x6E60D400, /* FADDP vfaddp2d */
0x7E30D800, /* FADDP(scalar) faddp2s */
0x7E70D800, /* FADDP(scalar) faddp2d */
0x6E20C400, /* FMAXNMP vfmaxnmp4s */
0x6E60C400, /* FMAXNMP vfmaxnmp2d */
0x7E30C800, /* FMAXNMP(scalar) fmaxnmp2s */
0x7E70C800, /* FMAXNMP(scalar) fmaxnmp2d */
0x6E20F400, /* FMAXP vfmaxp4s */
0x6E60F400, /* FMAXP vfmaxp2d */
0x7E30F800, /* FMAXP(scalar) fmaxp2s */
0x7E70F800, /* FMAXP(scalar) fmaxp2d */
0x6EA0C400, /* FMINNMP vfminnmp4s */
0x6EE0C400, /* FMINNMP vfminnmp2d */
0x7EB0C800, /* FMINNMP(scalar) fminnmp2s */
0x7EF0C800, /* FMINNMP(scalar) fminnmp2d */
0x6EA0F400, /* FMINP vfminp4s */
0x6EE0F400, /* FMINP vfminp2d */
0x7EB0F800, /* FMINP(scalar) fminp2s */
0x7EF0F800, /* FMINP(scalar) fminp2d */
/* Hint instructions */
0xD503201F, /* NOP nop */
};
86 changes: 86 additions & 0 deletions fvtest/compilerunittest/aarch64/BinaryEncoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2162,3 +2162,89 @@ INSTANTIATE_TEST_CASE_P(VectorFmulElem, ARM64Trg1Src2IndexedElementEncodingTest,
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, 0, "4fcf9000"),
std::make_tuple(TR::InstOpCode::vfmulelem_2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, 1, "4fdf9800")
));

INSTANTIATE_TEST_CASE_P(VectorFloatMinMaxPairwise, ARM64Trg1Src2EncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e20c40f"),
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e20c41f"),
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e20c5e0"),
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e20c7e0"),
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e2fc400"),
std::make_tuple(TR::InstOpCode::vfmaxnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e3fc400"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e60c40f"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e60c41f"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e60c5e0"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e60c7e0"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e6fc400"),
std::make_tuple(TR::InstOpCode::vfmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e7fc400"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e20f40f"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e20f41f"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e20f5e0"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e20f7e0"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e2ff400"),
std::make_tuple(TR::InstOpCode::vfmaxp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e3ff400"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6e60f40f"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6e60f41f"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6e60f5e0"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6e60f7e0"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6e6ff400"),
std::make_tuple(TR::InstOpCode::vfmaxp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6e7ff400"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0c40f"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0c41f"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ea0c5e0"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ea0c7e0"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eafc400"),
std::make_tuple(TR::InstOpCode::vfminnmp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6ebfc400"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0c40f"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0c41f"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ee0c5e0"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ee0c7e0"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eefc400"),
std::make_tuple(TR::InstOpCode::vfminnmp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6effc400"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0f40f"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ea0f41f"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ea0f5e0"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ea0f7e0"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eaff400"),
std::make_tuple(TR::InstOpCode::vfminp4s, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6ebff400"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v15, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0f40f"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v31, TR::RealRegister::v0, TR::RealRegister::v0, "6ee0f41f"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v15, TR::RealRegister::v0, "6ee0f5e0"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v31, TR::RealRegister::v0, "6ee0f7e0"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v15, "6eeff400"),
std::make_tuple(TR::InstOpCode::vfminp2d, TR::RealRegister::v0, TR::RealRegister::v0, TR::RealRegister::v31, "6efff400")
));

INSTANTIATE_TEST_CASE_P(ScalarFloatMinMaxPairwise, ARM64Trg1Src1EncodingTest, ::testing::Values(
std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7e30c80f"),
std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7e30c81f"),
std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7e30c9e0"),
std::make_tuple(TR::InstOpCode::fmaxnmp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7e30cbe0"),
std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70c80f"),
std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70c81f"),
std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7e70c9e0"),
std::make_tuple(TR::InstOpCode::fmaxnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7e70cbe0"),
std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7e30f80f"),
std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7e30f81f"),
std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7e30f9e0"),
std::make_tuple(TR::InstOpCode::fmaxp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7e30fbe0"),
std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7e70f80f"),
std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7e70f81f"),
std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7e70f9e0"),
std::make_tuple(TR::InstOpCode::fmaxp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7e70fbe0"),
std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7eb0c80f"),
std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7eb0c81f"),
std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7eb0c9e0"),
std::make_tuple(TR::InstOpCode::fminnmp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7eb0cbe0"),
std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7ef0c80f"),
std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7ef0c81f"),
std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7ef0c9e0"),
std::make_tuple(TR::InstOpCode::fminnmp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7ef0cbe0"),
std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v15, TR::RealRegister::v0, "7eb0f80f"),
std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v31, TR::RealRegister::v0, "7eb0f81f"),
std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v0, TR::RealRegister::v15, "7eb0f9e0"),
std::make_tuple(TR::InstOpCode::fminp2s, TR::RealRegister::v0, TR::RealRegister::v31, "7eb0fbe0"),
std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v15, TR::RealRegister::v0, "7ef0f80f"),
std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v31, TR::RealRegister::v0, "7ef0f81f"),
std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v0, TR::RealRegister::v15, "7ef0f9e0"),
std::make_tuple(TR::InstOpCode::fminp2d, TR::RealRegister::v0, TR::RealRegister::v31, "7ef0fbe0")
));